import pandas as pd
import os
import param
import panel as pn
import holoviews as hv
from holoviews import opts
import pickle
hv.extension('bokeh')
pn.extension()Loading...
This notebook is visualization tool used to explore different imputation algorithms behaved under different amounts of artificial missing data.
df = pd.read_csv('dataset.csv', parse_dates=True, index_col=0)
df.head(10)Loading...
# Create a dropdown selector
site_selector = pn.widgets.Select(name='Site', options=list(df.columns))
def highlight_nan_regions(label):
series = df[label]
# Identify NaN regions
is_nan = series.isna()
nan_ranges = []
current_start = None
for date, missing in is_nan.items():
if missing and current_start is None:
current_start = date
elif not missing and current_start is not None:
nan_ranges.append((current_start, date))
current_start = None
if current_start is not None:
nan_ranges.append((current_start, series.index[-1]))
# Create shaded regions
spans = [
hv.VSpan(start, end).opts(color='red', alpha=0.2)
for start, end in nan_ranges
]
curve = hv.Curve(series, label=label).opts(
width=900, height=250, tools=['hover', 'box_zoom', 'pan', 'wheel_zoom'],
show_grid=True, title=label
)
return curve * hv.Overlay(spans)
interactive_plot = hv.DynamicMap(pn.bind(highlight_nan_regions, site_selector))
pn.Column(site_selector, interactive_plot, 'Highlighted regions show gaps in each series')Loading...
Visualize experiment results¶
df = pd.read_csv('results.csv')
plots = []
for metric in ['MAE', 'RMSE']:
scatter = hv.NdOverlay({
imputer: hv.Scatter(df[df['imputer_name'] == imputer], 'missing_fraction', metric, label=imputer).opts(size=8)
for imputer in df['imputer_name'].unique()
})
scatter.opts(
title=f'{metric} vs Missing Fraction by Imputation Strategy',
xlabel='Missing Fraction (%)',
ylabel=metric,
width=800,
height=400,
legend_position='right'
)
plots.append(scatter)
(plots[0] + plots[1]).cols(1)Loading...
Results (interactive dashboard)¶
This is the same information as the above pair of plots but presented using the param library.
class ResultsExplorer(param.Parameterized):
imputer = param.ObjectSelector(default=df['imputer_name'].unique()[0],
objects=list(df['imputer_name'].unique()))
metric = param.ObjectSelector(default='MAE', objects=['MAE', 'RMSE'])
@param.depends('imputer', 'metric')
def view(self):
subset = df[df['imputer_name'] == self.imputer]
return hv.Scatter(subset, 'missing_fraction', self.metric).opts(
title=f'{self.metric} vs Missing Fraction ({self.imputer})',
xlabel='Missing Fraction (%)',
ylabel=self.metric,
size=8,
alpha=0.7,
width=800,
height=400
)
explorer = ResultsExplorer()# Create a Panel layout with separate controls and plot
controls = pn.Param(
explorer.param,
widgets={
'imputer': pn.widgets.Select,
'metric': pn.widgets.RadioButtonGroup
},
show_name=False
)
# Compose everything together in a clean layout
dashboard = pn.Row(
pn.Column(pn.pane.Markdown("### Controls"), controls, width=250),
pn.Column(explorer.view)
)
dashboardLoading...
Specific experiments¶
This visualization shows the results from a particular combination of imputation algorithm and artificial gaps. It is useful for understanding how different algorithm compare to each other.
# Load your data dictionary
results_dir = 'results'
data = {}
for fname in os.listdir(results_dir):
if fname.endswith('.pkl'):
with open(os.path.join(results_dir, fname), 'rb') as f:
data[fname] = pickle.load(f)
# Widgets
imputer_selector = param.ObjectSelector(default=df['imputer_name'].unique()[0], objects=list(df['imputer_name'].unique()))
experiment_selector = pn.widgets.Select(name='Experiment', options=list(data.keys()))
column_selector = pn.widgets.Select(name='Column', options=[])
# Update column options
def update_columns(event=None):
df = data[experiment_selector.value]['df']
column_selector.options = list(df.columns)
experiment_selector.param.watch(update_columns, 'value')
update_columns()
# Plot function
@pn.depends(experiment_selector, column_selector)
def overlay_plot(experiment, column):
entry = data[experiment]
curves = []
labels = {'df': 'Observed', 'df_true': 'True', 'df_imputed': 'Imputed'}
colors = {'df': 'gray', 'df_true': 'green', 'df_imputed': 'orange'}
for key in ['df_true', 'df_imputed', 'df']:
if key in entry and column in entry[key].columns:
df = entry[key]
curve = hv.Curve((df.index, df[column]), 'Time', 'Value', label=labels[key]).opts(color=colors[key])
curves.append(curve)
overlay = hv.Overlay(curves)
# Add gap highlight from internal 'gaps' key
if 'gaps' in entry and column in entry['gaps']:
df = entry['df']
start_idx, end_idx = entry['gaps'][column]
x0 = df.index[start_idx]
x1 = df.index[end_idx]
y0 = df[column].min()
y1 = df[column].max()
gap_box = hv.Rectangles([(x0, y0, x1, y1)]).opts(
fill_color='lightgray',
fill_alpha=0.5,
line_alpha=0,
tools=[]
)
overlay *= gap_box
return overlay.opts(
title=f"{column} across Observed, True, and Imputed",
width=800,
height=400,
legend_position='right',
tools=['hover']
)
# Layout
dashboard = pn.Column(
pn.Row(experiment_selector, column_selector),
overlay_plot,
"Gray box indicates an artifical gap."
)
dashboardLoading...