import pandas as pd
import numpy as np
import panel as pn
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
pn.extension()Loading...
Consider the following data set from CMAR Water Quality Data taken from different stations off the coast of Shelburne County.
df = pd.read_csv("dataset.csv", parse_dates=['date'], index_col='date')
dfLoading...
Explore the data¶
We can plot the temperature at any of these different station-depth locations.
# Create a dropdown selector
station_depth_selector = pn.widgets.Select(name='Station @ Depth', options=list(df.columns))
def plot_curve(station_depth):
curve = hv.Curve(df[station_depth], label=station_depth).opts(
width=900, height=400, tools=['hover', 'box_zoom', 'pan', 'wheel_zoom'],
show_grid=True, title="Daily Avg Temperature"
)
return curve
interactive_plot = hv.DynamicMap(pn.bind(plot_curve, station_depth_selector))Browse through this dataset by selecting different stations and depths.
pn.Column(station_depth_selector, interactive_plot)Loading...
Over the four year period from May 2018 to May 2022, there are gaps within this data set depending on the station and the depth.
def highlight_nan_regions(series, label):
# Identify NaN regions
is_nan = series.isna()
nan_ranges = []
current_start = None
for date, missing in is_nan.items():
if missing and current_start is None:
current_start = date
elif not missing and current_start is not None:
nan_ranges.append((current_start, date))
current_start = None
if current_start is not None:
nan_ranges.append((current_start, series.index[-1]))
# Create shaded regions
spans = [
hv.VSpan(start, end).opts(color='red', alpha=0.2)
for start, end in nan_ranges
]
curve = hv.Curve(series, label=label).opts(
width=900, height=250, tools=['hover', 'box_zoom', 'pan', 'wheel_zoom'],
show_grid=True, title=label
)
return curve * hv.Overlay(spans)plot1 = highlight_nan_regions(df['BlueIsland_5m'], 'Blue Island @ 5.0m')
plot2 = highlight_nan_regions(df['Ingomar_10m'], 'Ingomar @ 10.0m')
plot3 = highlight_nan_regions(df['McNuttsIsland_10m'], 'McNutts Island @ 10.0m')
layout = (plot1 + plot2 + plot3).cols(1)
layoutLoading...
This dataset covers four years of daily observations (2018-05-15 to 2022-05-14) for 17 different “sites” (different stations and vertical depths).
image_data = df.astype('float32').T.values
x_labels = df.index.strftime('%Y-%m-%d') # dates → x-axis
y_labels = list(df.columns) # station-depths → y-axis
x_coords = np.arange(len(x_labels))
y_coords = np.arange(len(y_labels))
heatmap = hv.Image((x_coords, y_coords, image_data)).opts(
xaxis='bottom',
xlabel='Date',
ylabel='Station @ Depth',
xticks=list(zip(x_coords[::30], x_labels[::30])), # every 30th date
yticks=list(zip(y_coords, y_labels)),
xrotation=45,
cmap='Viridis',
colorbar=True,
width=1000,
height=800,
tools=['hover']
)
heatmapLoading...
Our objective is to fill in the gaps in this dataset.