###############################################################################
## ##
## LIBRARIES ##
## ##
###############################################################################
# Standard library imports
import json
from pathlib import Path
from typing import Dict, Union
# Data handling libraries
import numpy as np
import pandas as pd
import xarray as xr
import scipy.io
###############################################################################
## ##
## FUNCTIONS ##
## ##
###############################################################################
[docs]
def save_satellite_data(output_path, Sat_lon, Sat_lat, SatData_complete):
"""
Save satellite longitude, latitude, and data arrays to disk in user-selected formats.
This function saves the provided satellite coordinate and data arrays to the specified directory,
allowing the user to choose between MATLAB .mat format, NetCDF .nc format, .json format or all of the above.
Longitude and latitude arrays must be 2D; the satellite data array must be 3D (time, lat, lon).
Parameters
----------
output_path : str or pathlib.Path
Directory path where the output files will be saved.
Sat_lon : np.ndarray or xarray.DataArray
2D array representing satellite longitudes.
Sat_lat : np.ndarray or xarray.DataArray
2D array representing satellite latitudes.
SatData_complete : np.ndarray or xarray.DataArray
3D array of satellite data with dimensions (time, latitude, longitude).
Raises
------
TypeError
If any input is not of the expected type.
ValueError
If input array dimensions are incorrect or if output_path is not a valid directory.
Notes
-----
The user is prompted to select the desired file format(s) interactively.
Example
-------
>>> save_satellite_data('./output', Sat_lon, Sat_lat, SatData_complete)
Choose a file format to save the data:
1. MAT-File (.mat)
2. NetCDF (.nc)
3. Both MAT and NetCDF
4. JSON (.json)
5. All of the above
Enter the number corresponding to your choice: 3
Saving data as a single .mat file...
Data saved as SatData_clean.mat
Saving the data as separate .nc files...
Sat_lon saved as Sat_lon.nc
Sat_lat saved as Sat_lat.nc
SatData_complete saved as SatData_complete.nc
✅ The requested data has been saved!
"""
# Validate output path
if not isinstance(output_path, (str, Path)):
raise TypeError("❌ output_path must be a string or Path object ❌")
output_path = Path(output_path)
if not output_path.is_dir():
raise ValueError(f"❌ '{output_path}' is not a valid directory ❌")
# Validate data types
if not isinstance(Sat_lon, (np.ndarray, xr.DataArray)):
raise TypeError("❌ Sat_lon must be a NumPy array or xarray DataArray ❌")
if not isinstance(Sat_lat, (np.ndarray, xr.DataArray)):
raise TypeError("❌ Sat_lat must be a NumPy array or xarray DataArray ❌")
if not isinstance(SatData_complete, (np.ndarray, xr.DataArray)):
raise TypeError("❌ SatData_complete must be a NumPy array or xarray DataArray ❌")
# Validate dimensions
if Sat_lon.ndim != 2:
raise ValueError(f"❌ Sat_lon should be 2D, got shape {Sat_lon.shape} ❌")
if Sat_lat.ndim != 2:
raise ValueError(f"❌ Sat_lat should be 2D, got shape {Sat_lat.shape} ❌")
if SatData_complete.ndim != 3:
raise ValueError(f"❌ SatData_complete should be 3D (time, lat, lon), got shape {SatData_complete.shape} ❌")
# Convert xarray to numpy if needed
if isinstance(Sat_lon, xr.DataArray):
Sat_lon = Sat_lon.values
if isinstance(Sat_lat, xr.DataArray):
Sat_lat = Sat_lat.values
if isinstance(SatData_complete, xr.DataArray):
SatData_complete = SatData_complete.values
print("Choose a file format to save the data:")
print("1. MAT-File (.mat)")
print("2. NetCDF (.nc)")
print("3. Both MAT and NetCDF")
print("4. JSON (.json)")
print("5. All formats")
choice = input("Enter the number corresponding to your choice: ").strip()
print('-' * 45)
# Save as .mat
if choice in {'1', '3', '5'}:
print("Saving data as a single .mat file...")
scipy.io.savemat(str(output_path / "SatData_clean.mat"), {
'Sat_lon': Sat_lon,
'Sat_lat': Sat_lat,
'SatData_complete': SatData_complete
})
print("Data saved as SatData_clean.mat")
print("-" * 45)
# Save as .nc
if choice in {'2', '3', '5'}:
print("Saving the data as separate .nc files...")
xr.DataArray(Sat_lon).to_netcdf(str(output_path / "Sat_lon.nc"))
print("Sat_lon saved as Sat_lon.nc")
xr.DataArray(Sat_lat).to_netcdf(str(output_path / "Sat_lat.nc"))
print("Sat_lat saved as Sat_lat.nc")
xr.DataArray(SatData_complete).to_netcdf(str(output_path / "SatData_complete.nc"))
print("SatData_complete saved as SatData_complete.nc")
print("-" * 45)
# Save as .json
if choice in {'4', '5'}:
print("Saving data as a JSON file (flattened)...")
data_json = {
'Sat_lon': Sat_lon.tolist(),
'Sat_lat': Sat_lat.tolist(),
'SatData_complete': SatData_complete.tolist()
}
json_path = output_path / "SatData_clean.json"
with open(json_path, 'w') as f:
json.dump(data_json, f)
print("Data saved as SatData_clean.json")
print("-" * 45)
# Invalid choice
if choice not in {'1', '2', '3', '4', '5'}:
print("❌ Invalid choice. Please run the script again and select a valid option.")
return
# Done
print("\033[92m✅ The requested data has been saved!\033[0m")
print("*" * 45)
###############################################################################
###############################################################################
[docs]
def save_model_data(output_path, ModData_complete):
"""
Save model data array to disk in user-selected formats.
This function saves the provided 3D model data array to the specified directory.
The user is prompted to choose between MATLAB .mat format, NetCDF .nc format, or both.
A warning is displayed regarding the absence of satellite mask application.
Parameters
----------
output_path : str or pathlib.Path
Directory path where the output files will be saved.
ModData_complete : np.ndarray or xarray.DataArray
3D array of model data (e.g., dimensions [time, lat, lon]).
Raises
------
TypeError
If input types are incorrect.
ValueError
If ModData_complete does not have 3 dimensions or if output_path is invalid.
Notes
-----
It is recommended to apply the satellite mask via the Interpolator.m script
before further analysis to ensure alignment with satellite data, especially
when using Level 3 data.
"""
# Verify output_path is a string or Path, then convert to Path object for consistency
if not isinstance(output_path, (str, Path)):
raise TypeError("❌ output_path must be a string or Path object ❌")
output_path = Path(output_path)
# Ensure output_path exists and is a directory to avoid write errors
if not output_path.is_dir():
raise ValueError(f"❌ '{output_path}' is not a valid directory ❌")
# Confirm ModData_complete is either a NumPy array or an xarray DataArray for compatibility
if not isinstance(ModData_complete, (np.ndarray, xr.DataArray)):
raise TypeError("❌ ModData_complete must be a NumPy array or xarray DataArray ❌")
# Check that ModData_complete is 3D to fit the expected (time, lat, lon) format
if ModData_complete.ndim != 3:
raise ValueError(f"❌ ModData_complete should be 3D (e.g., [time, lat, lon]), got shape {ModData_complete.shape} ❌")
# Display important warning that the satellite mask (satnan) has not been applied,
# which could affect analysis due to missing satellite data masking
print("\033[91m⚠️ Careful ⚠️\033[0m")
print("\033[91m⚠️ These dataset do not have the satnan mask applied to them ⚠️\033[0m")
print(" For further analysis it is suggested to pass these data")
print(" through the Interpolator.m script provided alongside")
print(" these Python scripts to ensure that shapes etc. match")
print(" with the Satellite data, especially regarding the presence")
print(" of the missing satellite values")
print("\033[91m⚠️ This is necessary when using the level3 data ⚠️\033[0m")
# Prompt user to choose preferred output format(s)
print("Choose a file format to save the data:")
print("1. MAT-File (.mat)")
print("2. NetCDF (.nc)")
print("3. Both MAT and NetCDF")
print("4. JSON (.json)")
print("5. All formats")
choice = input("Enter the number corresponding to your choice: ").strip()
print('-' * 45)
# Convert to NumPy array if xarray provided
if isinstance(ModData_complete, xr.DataArray):
ModData_complete = ModData_complete.values
# Save as .mat file if chosen or if both formats chosen
if choice in {"1", "3", "5"}:
print("Saving data as a .mat file...")
# Save the 3D array under the variable name "ModData_complete"
scipy.io.savemat(str(output_path / "ModData_complete.mat"), {"ModData_complete": ModData_complete})
print("Data saved as ModData_complete.mat")
print("-" * 45)
# Save as NetCDF if chosen or if both formats chosen
if choice in {"2", "3", "5"}:
print("Saving ModData_complete as a .nc file...")
# Convert to xarray DataArray for easy NetCDF saving, preserving structure
xr.DataArray(ModData_complete).to_netcdf(str(output_path / "ModData_complete.nc"))
print("ModData_complete saved as ModData_complete.nc")
print("-" * 45)
# Save as JSON if chosen or if all formats selected
if choice in {"4", "5"}:
print("Saving data as a JSON file (flattened)...")
# Flatten the NumPy array into a nested list for JSON serialization
data_json = {"ModData_complete": ModData_complete.tolist()}
json_path = output_path / "ModData_complete.json"
with open(json_path, 'w') as f:
json.dump(data_json, f)
print("Data saved as ModData_complete.json")
print("-" * 45)
# If user enters an invalid option, notify and advise to rerun
if choice not in {"1", "2", "3", "4", "5"}:
print("❌ Invalid choice. Please run the script again and select a valid option. ❌")
return
print("\033[92m✅ The requested data has been saved!\033[0m")
print("*" * 45)
###############################################################################
###############################################################################
[docs]
def save_to_netcdf(data_dict: Dict[str, Union[np.ndarray, xr.DataArray]], output_path: Union[str, Path]) -> None:
"""
Save each variable from a dictionary of arrays or DataArrays as separate NetCDF files.
Parameters
----------
data_dict : dict
Dictionary where keys are variable names and values are numpy arrays or xarray.DataArrays.
output_path : str or Path
Directory path where NetCDF files will be saved.
Raises
------
ValueError
If output_path is not a valid directory.
TypeError
If data items are not NumPy arrays or xarray DataArrays.
Example
-------
>>> import numpy as np
>>> import xarray as xr
>>> data_dict = {
... 'temperature': np.random.rand(10, 5, 5),
... 'precipitation': xr.DataArray(np.random.rand(10, 5, 5))
... }
>>> save_to_netcdf(data_dict, "./output_data")
"""
# Convert output_path to Path object to standardize path operations
output_path = Path(output_path)
# Validate the output_path exists and is a directory to avoid file saving errors
if not output_path.is_dir():
raise ValueError(f"❌ '{output_path}' is not a valid directory ❌")
# Iterate over each key-value pair in the data dictionary
for var_name, data in data_dict.items():
# If the data is a numpy ndarray, convert it to an xarray DataArray for easier NetCDF saving
if isinstance(data, np.ndarray):
data = xr.DataArray(data, name=var_name)
# If already an xarray DataArray but lacks a name, assign the dict key as its name
elif isinstance(data, xr.DataArray):
if data.name is None:
data.name = var_name
else:
# Raise an error if the data type is unsupported for saving
raise TypeError(f"❌ Data for variable '{var_name}' must be a NumPy array or xarray DataArray ❌")
# Wrap the DataArray inside a Dataset so we can save it to NetCDF format
ds = xr.Dataset({data.name: data})
# Define the full output path filename for the NetCDF file
filepath = output_path / f"{var_name}.nc"
# Save the Dataset to a NetCDF file at the specified path
ds.to_netcdf(filepath)
###############################################################################
###############################################################################
[docs]
def convert_to_serializable(obj):
"""
Recursively convert an object to a form compatible with JSON serialization.
Parameters
----------
obj : any
The object to convert.
Returns
-------
obj_serializable : JSON-compatible representation
Notes
-----
Converts NumPy arrays, pandas DataFrames/Series, and xarray DataArrays/Datasets to JSON-friendly types.
Fallbacks to string representation for unsupported objects.
"""
# Directly return JSON-native types
if isinstance(obj, (str, int, float, bool, type(None))):
return obj
# Recursively handle iterable types
elif isinstance(obj, (list, tuple, set)):
return [convert_to_serializable(i) for i in obj]
# Recursively handle dictionaries
elif isinstance(obj, dict):
return {str(k): convert_to_serializable(v) for k, v in obj.items()}
# Convert NumPy arrays to nested lists
elif isinstance(obj, np.ndarray):
return obj.tolist()
# Convert pandas DataFrame to list of row dictionaries
elif isinstance(obj, pd.DataFrame):
return obj.to_dict(orient="records")
# Convert pandas Series to dictionary
elif isinstance(obj, pd.Series):
return obj.to_dict()
# xarray DataArray: separate out dims, coords, and values
elif isinstance(obj, xr.DataArray):
return {
"dims": obj.dims,
"coords": {k: v.values.tolist() for k, v in obj.coords.items()},
"data": obj.values.tolist()
}
# xarray Dataset: convert to dict
elif isinstance(obj, xr.Dataset):
return obj.to_dict(data=True)
# Try using .to_dict() if available (e.g., dataclass)
elif hasattr(obj, "to_dict"):
return obj.to_dict()
# Fallback: return string representation
return str(obj)
###############################################################################
###############################################################################
[docs]
def save_variable_to_json(variable, output_path):
"""
Save any Python variable to a JSON file in a serializable format.
Supports basic Python types, NumPy arrays, pandas DataFrames/Series,
xarray DataArrays/Datasets, dictionaries, and nested combinations.
Parameters
----------
variable : any
The Python object or data structure to save (e.g., dict, array, DataFrame, DataArray).
output_path : str or Path
File path (must end in .json) where the data will be saved.
Raises
------
ValueError
If the output_path does not end with '.json'.
TypeError
If the object cannot be serialized to JSON and no fallback is possible.
Example
-------
>>> import numpy as np
>>> save_variable_to_json(np.array([[1, 2], [3, 4]]), "array_data.json")
"""
output_path = Path(output_path)
# Ensure output is a JSON file
if output_path.suffix.lower() != '.json':
raise ValueError("❌ Output file must have a .json extension ❌")
# Convert the variable to a JSON-compatible object
serializable_obj = convert_to_serializable(variable)
# Write to file using built-in JSON module
with open(output_path, 'w') as f:
json.dump(serializable_obj, f, indent=2)
print(f"\033[92m✅ Variable saved to {output_path}\033[0m")