Skip to article frontmatterSkip to article content

5. Results

from erddapy import ERDDAP
import os
import pandas as pd
import numpy as np

import panel as pn
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
pn.extension()
Loading...

Load the data set

df = pd.read_csv('dataset.csv', parse_dates=True, index_col=0)

We have determined in the Experiments notebook that the MissForestappears to work reasonably well when imputing artificially large gaps.

We use it to gap fill the missing data in this dataset.

from imputeMF import imputeMF
df_imputed = pd.DataFrame(imputeMF(df.values, 10, print_stats=True), columns=df.columns, index=df.index)
Statistics:
iteration 1, gamma = 0.03425468635593164
Statistics:
iteration 2, gamma = 0.0006042664939214106
Statistics:
iteration 3, gamma = 9.74190663753796e-05
Statistics:
iteration 4, gamma = 2.7013204750538315e-05
Statistics:
iteration 5, gamma = 1.9360935351100095e-05
Statistics:
iteration 6, gamma = 9.403293930896005e-06
Statistics:
iteration 7, gamma = 7.947575792084705e-06
Statistics:
iteration 8, gamma = 8.262845957832012e-06

Save the results

df_imputed.to_csv('dataset_imputed.csv')
# Create a dropdown selector
site_selector = pn.widgets.Select(name='Site', options=list(df.columns))

def highlight_imputed_regions(label):

    series = df[label]
    series_imputed = df_imputed[label]
    
    # Identify NaN regions
    is_nan = series.isna()
    nan_ranges = []
    current_start = None

    for date, missing in is_nan.items():
        if missing and current_start is None:
            current_start = date
        elif not missing and current_start is not None:
            nan_ranges.append((current_start, date))
            current_start = None
    if current_start is not None:
        nan_ranges.append((current_start, series.index[-1]))

    # Create shaded regions
    spans = [
        hv.VSpan(start, end).opts(color='red', alpha=0.2)
        for start, end in nan_ranges
    ]

    curve = hv.Curve(series_imputed, label=label).opts(
        width=900, height=250, tools=['hover', 'box_zoom', 'pan', 'wheel_zoom'],
        show_grid=True, title=label
    )

    return curve * hv.Overlay(spans)
    
interactive_plot = hv.DynamicMap(pn.bind(highlight_imputed_regions, site_selector))

pn.Column(site_selector, interactive_plot)
Loading...

Highlighted regions have been imputed using MissForest.