Appendix: Visualization methods - Time Series Imputation for Oceanographic Data

import pandas as pd
import os
import param
import panel as pn
import holoviews as hv
from holoviews import opts
import pickle
hv.extension('bokeh')
pn.extension()

This notebook is visualization tool used to explore different imputation algorithms behaved under different amounts of artificial missing data.

df = pd.read_csv('dataset.csv', parse_dates=True, index_col=0)

df.head(10)

# Create a dropdown selector
site_selector = pn.widgets.Select(name='Site', options=list(df.columns))

def highlight_nan_regions(label):

    series = df[label]
    
    # Identify NaN regions
    is_nan = series.isna()
    nan_ranges = []
    current_start = None

    for date, missing in is_nan.items():
        if missing and current_start is None:
            current_start = date
        elif not missing and current_start is not None:
            nan_ranges.append((current_start, date))
            current_start = None
    if current_start is not None:
        nan_ranges.append((current_start, series.index[-1]))

    # Create shaded regions
    spans = [
        hv.VSpan(start, end).opts(color='red', alpha=0.2)
        for start, end in nan_ranges
    ]

    curve = hv.Curve(series, label=label).opts(
        width=900, height=250, tools=['hover', 'box_zoom', 'pan', 'wheel_zoom'],
        show_grid=True, title=label
    )

    return curve * hv.Overlay(spans)
    
interactive_plot = hv.DynamicMap(pn.bind(highlight_nan_regions, site_selector))

pn.Column(site_selector, interactive_plot, 'Highlighted regions show gaps in each series')

Visualize experiment results¶

df = pd.read_csv('results.csv')

plots = []
for metric in ['MAE', 'RMSE']:
    
    scatter = hv.NdOverlay({
        imputer: hv.Scatter(df[df['imputer_name'] == imputer], 'missing_fraction', metric, label=imputer).opts(size=8)
        for imputer in df['imputer_name'].unique()
    })
    
    scatter.opts(
        title=f'{metric} vs Missing Fraction by Imputation Strategy',
        xlabel='Missing Fraction (%)',
        ylabel=metric,
        width=800,
        height=400,
        legend_position='right'
    )

    plots.append(scatter)

(plots[0] + plots[1]).cols(1)

Results (interactive dashboard)¶

This is the same information as the above pair of plots but presented using the param library.

class ResultsExplorer(param.Parameterized):
    imputer = param.ObjectSelector(default=df['imputer_name'].unique()[0],
                                    objects=list(df['imputer_name'].unique()))
    metric = param.ObjectSelector(default='MAE', objects=['MAE', 'RMSE'])

    @param.depends('imputer', 'metric')
    def view(self):
        subset = df[df['imputer_name'] == self.imputer]
        return hv.Scatter(subset, 'missing_fraction', self.metric).opts(
            title=f'{self.metric} vs Missing Fraction ({self.imputer})',
            xlabel='Missing Fraction (%)',
            ylabel=self.metric,
            size=8,
            alpha=0.7,
            width=800,
            height=400
        )

explorer = ResultsExplorer()

# Create a Panel layout with separate controls and plot
controls = pn.Param(
    explorer.param,
    widgets={
        'imputer': pn.widgets.Select,
        'metric': pn.widgets.RadioButtonGroup
    },
    show_name=False
)

# Compose everything together in a clean layout
dashboard = pn.Row(
    pn.Column(pn.pane.Markdown("### Controls"), controls, width=250),
    pn.Column(explorer.view)
)

dashboard

Specific experiments¶

This visualization shows the results from a particular combination of imputation algorithm and artificial gaps. It is useful for understanding how different algorithm compare to each other.

# Load your data dictionary
results_dir = 'results'
data = {}
for fname in os.listdir(results_dir):
    if fname.endswith('.pkl'):
        with open(os.path.join(results_dir, fname), 'rb') as f:
            data[fname] = pickle.load(f)

# Widgets
imputer_selector = param.ObjectSelector(default=df['imputer_name'].unique()[0], objects=list(df['imputer_name'].unique()))
experiment_selector = pn.widgets.Select(name='Experiment', options=list(data.keys()))
column_selector = pn.widgets.Select(name='Column', options=[])

# Update column options
def update_columns(event=None):
    df = data[experiment_selector.value]['df']
    column_selector.options = list(df.columns)

experiment_selector.param.watch(update_columns, 'value')
update_columns()

# Plot function
@pn.depends(experiment_selector, column_selector)
def overlay_plot(experiment, column):
    entry = data[experiment]
    curves = []
    labels = {'df': 'Observed', 'df_true': 'True', 'df_imputed': 'Imputed'}
    colors = {'df': 'gray', 'df_true': 'green', 'df_imputed': 'orange'}

    for key in ['df_true', 'df_imputed', 'df']:
        if key in entry and column in entry[key].columns:
            df = entry[key]
            curve = hv.Curve((df.index, df[column]), 'Time', 'Value', label=labels[key]).opts(color=colors[key])
            curves.append(curve)

    overlay = hv.Overlay(curves)

    # Add gap highlight from internal 'gaps' key
    if 'gaps' in entry and column in entry['gaps']:
        df = entry['df']
        start_idx, end_idx = entry['gaps'][column]
        x0 = df.index[start_idx]
        x1 = df.index[end_idx]
        y0 = df[column].min()
        y1 = df[column].max()
        gap_box = hv.Rectangles([(x0, y0, x1, y1)]).opts(
            fill_color='lightgray',
            fill_alpha=0.5,
            line_alpha=0,
            tools=[]
        )
        overlay *= gap_box

    return overlay.opts(
        title=f"{column} across Observed, True, and Imputed",
        width=800,
        height=400,
        legend_position='right',
        tools=['hover']
    )
# Layout
dashboard = pn.Column(
    pn.Row(experiment_selector, column_selector),
    overlay_plot,
    "Gray box indicates an artifical gap."
)

dashboard

Time Series Imputation for Oceanographic Data

9. Case Study: Bring Your Own Data

Time Series Imputation for Oceanographic Data

Appendix: Optimize Hyperparameters for Different Methods