Utils Module🔗

`calculate_temporal_stats(df)` 🔗

Calculate temporal statistics for a given DataFrame.

Source code in src/water_timeseries/utils/data.py

def calculate_temporal_stats(df: pd.DataFrame) -> pd.DataFrame:
    """Calculate temporal statistics for a given DataFrame."""
    df = df.copy()
    df["pre_break_median"] = df["pre_break_median"].where(df["pre_break_median"] != 0, np.nan)
    df["post_break_median"] = df["post_break_median"].where(df["post_break_median"] != 0, np.nan)
    # df.dropna(subset=["pre_break_median", "post_break_median"], inplace=True)
    breaks = pd.to_datetime(df["date_break"])
    df["date_break_year"] = breaks.dt.year
    df["date_break_month"] = breaks.dt.month
    # change area ha
    df["water_change_ha"] = df["post_break_median"] - df["pre_break_median"]
    # change area perc
    df["water_change_perc"] = df["water_change_ha"].div(df["pre_break_median"].replace(0, np.nan)) * 100
    return df

`create_tile_layers()` 🔗

Create tile layers for folium maps.

Returns:

Type	Description
	List of tile layer names that can be added to folium.Map

Source code in src/water_timeseries/utils/map_styling.py

def create_tile_layers():
    """Create tile layers for folium maps.

    Returns:
        List of tile layer names that can be added to folium.Map
    """
    return ["CartoDB.DarkMatter", "Esri.WorldImagery"]

`format_tooltip_columns(valid_gdf, id_column, tooltip_columns=None)` 🔗

Format columns for tooltip display to avoid JSON serialization issues.

Parameters:

Name	Type	Description	Default
`valid_gdf`		GeoDataFrame to format	required
`id_column`	`str`	Name of the ID column (always shown first)	required
`tooltip_columns`		List of tuples (original_col, display_alias, format_string, unit) If None, uses default NetChange columns	`None`

Returns:

Name	Type	Description
`formatted_gdf`		GeoDataFrame with display columns added
`fields_to_show`		List of field names for tooltip
`aliases_to_show`		List of field aliases for tooltip

Source code in src/water_timeseries/utils/map_styling.py

def format_tooltip_columns(
    valid_gdf,
    id_column: str,
    tooltip_columns=None,
):
    """Format columns for tooltip display to avoid JSON serialization issues.

    Args:
        valid_gdf: GeoDataFrame to format
        id_column: Name of the ID column (always shown first)
        tooltip_columns: List of tuples (original_col, display_alias, format_string, unit)
                        If None, uses default NetChange columns

    Returns:
        formatted_gdf: GeoDataFrame with display columns added
        fields_to_show: List of field names for tooltip
        aliases_to_show: List of field aliases for tooltip
    """
    import pandas as pd

    if tooltip_columns is None:
        # Default tooltip columns if NetChange data exists
        tooltip_columns = [
            ("NetChange_perc", "Net Change (%):", "{:.2f}", "%"),
            ("NetChange_ha", "Net Change (ha):", "{:.2f}", " ha"),
        ]

    # Check if we have any of the tooltip columns
    has_tooltip_data = any(col[0] in valid_gdf.columns for col in tooltip_columns)

    if has_tooltip_data:
        valid_gdf = valid_gdf.copy()
        display_columns = []
        alias_mapping = []

        for orig_col, alias, fmt, unit in tooltip_columns:
            if orig_col in valid_gdf.columns:
                display_col = f"{orig_col}_display"
                valid_gdf[display_col] = valid_gdf[orig_col].apply(
                    lambda x: f"{fmt.format(x)}{unit}" if pd.notna(x) else "N/A"
                )
                display_columns.append(display_col)
                alias_mapping.append(alias)

        # Show ID first, then formatted columns
        fields_to_show = [id_column] + display_columns
        aliases_to_show = ["ID:"] + alias_mapping
    else:
        # Fallback to ID only
        fields_to_show = [id_column]
        aliases_to_show = ["ID:"]

    return valid_gdf, fields_to_show, aliases_to_show

`get_colored_style_function(color_column='NetChange_perc', vmin=-40, vmax=40, colormap=None, default_color='#cccccc', fill_opacity=0.6, edge_color='#dddddd', edge_weight=1)` 🔗

Create a style function for folium polygons based on a numeric column.

Parameters:

Name	Type	Description	Default
`color_column`	`str`	Column name to use for coloring	`'NetChange_perc'`
`vmin`	`float`	Minimum value for normalization	`-40`
`vmax`	`float`	Maximum value for normalization	`40`
`colormap`		Matplotlib colormap (defaults to RdBu_r)	`None`
`default_color`	`str`	Color for missing/null values	`'#cccccc'`
`fill_opacity`	`float`	Opacity of polygon fill (0-1)	`0.6`
`edge_color`	`str`	Color of polygon edges	`'#dddddd'`
`edge_weight`	`float`	Width of polygon edges	`1`

Returns:

Name	Type	Description
`style_function`		Function that can be passed to folium.GeoJson style_function parameter

Source code in src/water_timeseries/utils/map_styling.py

def get_colored_style_function(
    color_column: str = "NetChange_perc",
    vmin: float = -40,
    vmax: float = 40,
    colormap=None,
    default_color: str = "#cccccc",
    fill_opacity: float = 0.6,
    edge_color: str = "#dddddd",
    edge_weight: float = 1,
):
    """Create a style function for folium polygons based on a numeric column.

    Args:
        color_column: Column name to use for coloring
        vmin: Minimum value for normalization
        vmax: Maximum value for normalization
        colormap: Matplotlib colormap (defaults to RdBu_r)
        default_color: Color for missing/null values
        fill_opacity: Opacity of polygon fill (0-1)
        edge_color: Color of polygon edges
        edge_weight: Width of polygon edges

    Returns:
        style_function: Function that can be passed to folium.GeoJson style_function parameter
    """
    import matplotlib.pyplot as plt
    import pandas as pd

    if colormap is None:
        colormap = plt.cm.RdBu_r

    norm = plt.Normalize(vmin=vmin, vmax=vmax)

    def style_function(feature):
        props = feature.get("properties", {})
        value = props.get(color_column, None)

        if value is None or pd.isna(value):
            return {
                "fillColor": default_color,
                "color": edge_color,
                "weight": edge_weight,
                "fillOpacity": 0.5,
            }

        # Normalize value and get color from colormap
        color = colormap(norm(value))
        # Convert RGBA to hex manually to avoid JSON serialization issues
        r, g, b, a = color
        hex_color = "#{:02x}{:02x}{:02x}".format(int(r * 255), int(g * 255), int(b * 255))

        return {
            "fillColor": hex_color,
            "color": edge_color,
            "weight": edge_weight,
            "fillOpacity": fill_opacity,
        }

    return style_function

`get_default_style_function(fill_color='blue', edge_color='#dddddd', edge_weight=1, fill_opacity=0.5)` 🔗

Create a default style function for folium polygons.

Parameters:

Name	Type	Description	Default
`fill_color`	`str`	Fill color for all polygons	`'blue'`
`edge_color`	`str`	Color of polygon edges	`'#dddddd'`
`edge_weight`	`float`	Width of polygon edges	`1`
`fill_opacity`	`float`	Opacity of polygon fill (0-1)	`0.5`

Returns:

Name	Type	Description
`style_function`		Function that can be passed to folium.GeoJson style_function parameter

Source code in src/water_timeseries/utils/map_styling.py

def get_default_style_function(
    fill_color: str = "blue",
    edge_color: str = "#dddddd",
    edge_weight: float = 1,
    fill_opacity: float = 0.5,
):
    """Create a default style function for folium polygons.

    Args:
        fill_color: Fill color for all polygons
        edge_color: Color of polygon edges
        edge_weight: Width of polygon edges
        fill_opacity: Opacity of polygon fill (0-1)

    Returns:
        style_function: Function that can be passed to folium.GeoJson style_function parameter
    """

    def style_function(feature):
        return {
            "fillColor": fill_color,
            "color": edge_color,
            "weight": edge_weight,
            "fillOpacity": fill_opacity,
        }

    return style_function

`get_water_dataset_type(input_ds)` 🔗

Determine the water dataset type based on the presence of specific variables in the dataset.

Source code in src/water_timeseries/utils/data.py

def get_water_dataset_type(input_ds) -> str:
    """Determine the water dataset type based on the presence of specific variables in the dataset."""
    if "area_water_permanent" in input_ds.data_vars:
        water_dataset_type = "jrc"
    elif "water" in input_ds.data_vars:
        water_dataset_type = "dynamic_world"
    else:
        raise ValueError("Unknown water dataset type")

    return water_dataset_type

`load_vector_dataset(file_path, logger=None)` 🔗

Load a vector dataset from file based on file extension.

Supports GeoPackage, Shapefile, GeoJSON, and Parquet formats.

Parameters:

Name	Type	Description	Default
`file_path`	`Union[str, Path]`	Path to the vector dataset file.	required
`logger`	`Optional[logger]`	Optional logger instance for logging messages.	`None`

Returns:

Type	Description
`Optional[GeoDataFrame]`	GeoDataFrame if successful, None otherwise.

Raises:

Type	Description
`FileNotFoundError`	If the file does not exist.

Source code in src/water_timeseries/utils/io.py

def load_vector_dataset(
    file_path: Union[str, Path],
    logger: Optional[logger] = None,
) -> Optional[gpd.GeoDataFrame]:
    """Load a vector dataset from file based on file extension.

    Supports GeoPackage, Shapefile, GeoJSON, and Parquet formats.

    Args:
        file_path: Path to the vector dataset file.
        logger: Optional logger instance for logging messages.

    Returns:
        GeoDataFrame if successful, None otherwise.

    Raises:
        FileNotFoundError: If the file does not exist.
    """
    file_path = Path(file_path)

    if not file_path.exists():
        if logger:
            logger.warning(f"Vector dataset file not found: {file_path}")
        raise FileNotFoundError(f"Vector dataset file not found: {file_path}")

    suffix = file_path.suffix.lower()

    if logger:
        logger.info(f"Loading vector dataset from {file_path}")

    # GeoPackage, Shapefile, GeoJSON formats
    if suffix in [".gpkg", ".shp", ".geojson", ".gjson"]:
        vector_ds = gpd.read_file(file_path)
    elif suffix in [".parquet"]:
        vector_ds = gpd.read_parquet(file_path)
    else:
        if logger:
            logger.warning(f"Unsupported vector file format: {suffix}")
        return None

    return vector_ds

`load_xarray_dataset(path, format=None)` 🔗

Load xarray dataset from file.

Parameters:

Name	Type	Description	Default
`path`	`Union[str, Path]`	Path to the dataset file.	required
`format`	`Optional[str]`	Format of the file ('zarr' or 'netcdf'). If None, auto-detected from extension.	`None`

Returns:

Type	Description
`Dataset`	xr.Dataset: The loaded dataset.

Raises:

Type	Description
`ValueError`	If the file format is not supported.

Source code in src/water_timeseries/utils/io.py

def load_xarray_dataset(
    path: Union[str, Path],
    format: Optional[str] = None,
) -> xr.Dataset:
    """Load xarray dataset from file.

    Args:
        path: Path to the dataset file.
        format: Format of the file ('zarr' or 'netcdf'). If None, auto-detected
            from extension.

    Returns:
        xr.Dataset: The loaded dataset.

    Raises:
        ValueError: If the file format is not supported.
    """
    path = Path(path)

    if format is None:
        ext = path.suffix.lower()
        if ext == ".zarr":
            format = "zarr"
        elif ext == ".nc":
            format = "netcdf"
        else:
            raise ValueError(f"Cannot auto-detect format for extension: {ext}")

    if format == "zarr":
        return xr.open_zarr(path)
    elif format == "netcdf":
        return xr.open_dataset(path)
    else:
        raise ValueError(f"Unsupported format: {format}. Use 'zarr' or 'netcdf'.")

`save_xarray_dataset(ds, save_path, output_dir=None, logger=None)` 🔗

Save xarray dataset to file.

Parameters:

Name	Type	Description	Default
`ds`	`Dataset`	The xarray dataset to save.	required
`save_path`	`Union[str, Path]`	Path to save the file. Format is determined by extension: - '.zarr' for Zarr format - '.nc' for NetCDF format If a relative path is provided and output_dir is specified, the file will be saved in that directory.	required
`output_dir`	`Optional[Union[str, Path]]`	Directory for relative paths. If None and save_path is relative, the current working directory is used.	`None`
`logger`		Logger for logging progress. If None, print statements are used.	`None`

Returns:

Name	Type	Description
`Path`	`Path`	The resolved path where the dataset was saved.

Raises:

Type	Description
`ValueError`	If the file extension is not supported.

Source code in src/water_timeseries/utils/io.py

def save_xarray_dataset(
    ds: xr.Dataset,
    save_path: Union[str, Path],
    output_dir: Optional[Union[str, Path]] = None,
    logger=None,
) -> Path:
    """Save xarray dataset to file.

    Args:
        ds: The xarray dataset to save.
        save_path: Path to save the file. Format is determined by extension:
            - '.zarr' for Zarr format
            - '.nc' for NetCDF format
            If a relative path is provided and output_dir is specified,
            the file will be saved in that directory.
        output_dir: Directory for relative paths. If None and save_path is relative,
            the current working directory is used.
        logger: Logger for logging progress. If None, print statements are used.

    Returns:
        Path: The resolved path where the dataset was saved.

    Raises:
        ValueError: If the file extension is not supported.
    """
    path = Path(save_path)

    # Handle relative path
    if not path.is_absolute() and output_dir is not None:
        path = Path(output_dir) / path

    # Ensure parent directory exists
    path.parent.mkdir(parents=True, exist_ok=True)

    # Determine format from extension
    ext = path.suffix.lower()

    # Logging helper
    def _log(msg: str):
        if logger is not None:
            logger.info(msg)
        else:
            print(msg)

    _log(f"Saving to {ext[1:].upper()} format: {path}")

    if ext == ".zarr":
        ds.to_zarr(path, mode="w")
    elif ext == ".nc":
        ds.to_netcdf(path)
    else:
        raise ValueError(f"Unsupported file extension: {ext}. Use '.zarr' or '.nc'.")

    _log(f"Dataset saved successfully to {path}")

    return path

Utils Module🔗

calculate_temporal_stats(df) 🔗

create_tile_layers() 🔗

format_tooltip_columns(valid_gdf, id_column, tooltip_columns=None) 🔗

get_colored_style_function(color_column='NetChange_perc', vmin=-40, vmax=40, colormap=None, default_color='#cccccc', fill_opacity=0.6, edge_color='#dddddd', edge_weight=1) 🔗

get_default_style_function(fill_color='blue', edge_color='#dddddd', edge_weight=1, fill_opacity=0.5) 🔗

get_water_dataset_type(input_ds) 🔗

load_vector_dataset(file_path, logger=None) 🔗

load_xarray_dataset(path, format=None) 🔗

save_xarray_dataset(ds, save_path, output_dir=None, logger=None) 🔗

`calculate_temporal_stats(df)` 🔗

`create_tile_layers()` 🔗

`format_tooltip_columns(valid_gdf, id_column, tooltip_columns=None)` 🔗

`get_colored_style_function(color_column='NetChange_perc', vmin=-40, vmax=40, colormap=None, default_color='#cccccc', fill_opacity=0.6, edge_color='#dddddd', edge_weight=1)` 🔗

`get_default_style_function(fill_color='blue', edge_color='#dddddd', edge_weight=1, fill_opacity=0.5)` 🔗

`get_water_dataset_type(input_ds)` 🔗

`load_vector_dataset(file_path, logger=None)` 🔗

`load_xarray_dataset(path, format=None)` 🔗

`save_xarray_dataset(ds, save_path, output_dir=None, logger=None)` 🔗