Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 66 additions & 4 deletions dataretrieval/waterdata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
SAMPLES_URL,
_check_profiles,
_default_headers,
_drop_hash_columns,
_get_args,
get_ogc_data,
get_stats_data,
Expand Down Expand Up @@ -57,6 +58,7 @@ def get_daily(
filter: str | None = None,
filter_lang: FILTER_LANG | None = None,
convert_type: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Daily data provide one data value to represent water conditions for the
day.
Expand Down Expand Up @@ -189,6 +191,9 @@ def get_daily(
and the lexicographic-comparison pitfall.
convert_type : boolean, optional
If True, converts columns to appropriate types.
include_hash : boolean, optional
If False (default), drop the opaque hash-valued ID columns. Set True to
keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.

Returns
-------
Expand Down Expand Up @@ -272,6 +277,7 @@ def get_continuous(
filter: str | None = None,
filter_lang: FILTER_LANG | None = None,
convert_type: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""
Continuous data provide instantaneous water conditions.
Expand Down Expand Up @@ -399,6 +405,9 @@ def get_continuous(
convert_type : boolean, optional
If True, the function will convert the data to dates and qualifier to
string vector
include_hash : boolean, optional
If False (default), drop the opaque hash-valued ID columns. Set True to
keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.

Returns
-------
Expand Down Expand Up @@ -492,6 +501,7 @@ def get_monitoring_locations(
filter: str | None = None,
filter_lang: FILTER_LANG | None = None,
convert_type: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Location information is basic information about the monitoring location
including the name, identifier, agency responsible for data collection, and
Expand Down Expand Up @@ -707,6 +717,9 @@ def get_monitoring_locations(
and the lexicographic-comparison pitfall.
convert_type : boolean, optional
If True, converts columns to appropriate types.
include_hash : boolean, optional
If False (default), drop the opaque hash-valued ID columns. Set True to
keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.

Returns
-------
Expand Down Expand Up @@ -770,6 +783,7 @@ def get_time_series_metadata(
filter: str | None = None,
filter_lang: FILTER_LANG | None = None,
convert_type: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Daily data and continuous measurements are grouped into time series,
which represent a collection of observations of a single parameter,
Expand Down Expand Up @@ -930,6 +944,9 @@ def get_time_series_metadata(
and the lexicographic-comparison pitfall.
convert_type : boolean, optional
If True, converts columns to appropriate types.
include_hash : boolean, optional
If False (default), drop the opaque hash-valued ID columns. Set True to
keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.

Returns
-------
Expand Down Expand Up @@ -1027,6 +1044,7 @@ def get_combined_metadata(
filter: str | None = None,
filter_lang: FILTER_LANG | None = None,
convert_type: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Get combined monitoring-location and time-series metadata.

Expand Down Expand Up @@ -1127,6 +1145,9 @@ def get_combined_metadata(
and the lexicographic-comparison pitfall.
convert_type : boolean, optional
If True, converts columns to appropriate types.
include_hash : boolean, optional
If False (default), drop the opaque hash-valued ID columns. Set True to
keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.

Returns
-------
Expand Down Expand Up @@ -1215,6 +1236,7 @@ def get_latest_continuous(
filter: str | None = None,
filter_lang: FILTER_LANG | None = None,
convert_type: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""This endpoint provides the most recent observation for each time series
of continuous data. Continuous data are collected via automated sensors
Expand Down Expand Up @@ -1344,6 +1366,9 @@ def get_latest_continuous(
and the lexicographic-comparison pitfall.
convert_type : boolean, optional
If True, converts columns to appropriate types.
include_hash : boolean, optional
If False (default), drop the opaque hash-valued ID columns. Set True to
keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.

Returns
-------
Expand Down Expand Up @@ -1410,6 +1435,7 @@ def get_latest_daily(
filter: str | None = None,
filter_lang: FILTER_LANG | None = None,
convert_type: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Daily data provide one data value to represent water conditions for the
day.
Expand Down Expand Up @@ -1541,6 +1567,9 @@ def get_latest_daily(
and the lexicographic-comparison pitfall.
convert_type : boolean, optional
If True, converts columns to appropriate types.
include_hash : boolean, optional
If False (default), drop the opaque hash-valued ID columns. Set True to
keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.

Returns
-------
Expand Down Expand Up @@ -1608,6 +1637,7 @@ def get_field_measurements(
filter: str | None = None,
filter_lang: FILTER_LANG | None = None,
convert_type: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Field measurements are physically measured values collected during a
visit to the monitoring location. Field measurements consist of measurements
Expand Down Expand Up @@ -1729,6 +1759,9 @@ def get_field_measurements(
and the lexicographic-comparison pitfall.
convert_type : boolean, optional
If True, converts columns to appropriate types.
include_hash : boolean, optional
If False (default), drop the opaque hash-valued ID columns. Set True to
keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.

Returns
-------
Expand Down Expand Up @@ -1792,6 +1825,7 @@ def get_field_measurements_metadata(
filter: str | None = None,
filter_lang: FILTER_LANG | None = None,
convert_type: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Get field-measurement metadata: one row per (location, parameter) series.

Expand Down Expand Up @@ -1847,6 +1881,9 @@ def get_field_measurements_metadata(
and the lexicographic-comparison pitfall.
convert_type : boolean, optional
If True, converts columns to appropriate types.
include_hash : boolean, optional
If False (default), drop the opaque hash-valued ID columns. Set True to
keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.

Returns
-------
Expand Down Expand Up @@ -1913,6 +1950,7 @@ def get_peaks(
filter: str | None = None,
filter_lang: FILTER_LANG | None = None,
convert_type: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Get the annual peak streamflow / stage record for a monitoring location.

Expand Down Expand Up @@ -1971,6 +2009,9 @@ def get_peaks(
and the lexicographic-comparison pitfall.
convert_type : boolean, optional
If True, converts columns to appropriate types.
include_hash : boolean, optional
If False (default), drop the opaque hash-valued ID columns. Set True to
keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.

Returns
-------
Expand Down Expand Up @@ -2148,6 +2189,7 @@ def get_samples(
pointLocationWithinMiles: float | None = None,
projectIdentifier: str | Iterable[str] | None = None,
recordIdentifierUserSupplied: str | Iterable[str] | None = None,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Search Samples database for USGS water quality data.
This is a wrapper function for the Samples database API. All potential
Expand Down Expand Up @@ -2278,6 +2320,9 @@ def get_samples(
recordIdentifierUserSupplied : string or iterable of strings, optional
Internal AQS record identifier that returns 1 entry. Only available
for the "results" service.
include_hash : boolean, optional
If False (default), drop the opaque per-activity / per-result UUID columns
(``Activity_ActivityIdentifier``, ``Result_MeasureIdentifier``).

Returns
-------
Expand Down Expand Up @@ -2327,7 +2372,7 @@ def get_samples(
_check_profiles(service, profile)

# Build argument dictionary, omitting None values
params = _get_args(locals(), exclude={"ssl_check", "profile"})
params = _get_args(locals(), exclude={"ssl_check", "profile", "include_hash"})

params.update({"mimeType": "text/csv"})

Expand All @@ -2348,6 +2393,7 @@ def get_samples(

df = pd.read_csv(StringIO(response.text), delimiter=",")
df = _attach_datetime_columns(df)
df = _drop_hash_columns(df, include_hash)

return df, BaseMetadata(response)

Expand Down Expand Up @@ -2438,6 +2484,7 @@ def get_stats_por(
site_type_name: str | Iterable[str] | None = None,
parameter_code: str | Iterable[str] | None = None,
expand_percentiles: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Get day-of-year and month-of-year water data statistics from the
USGS Water Data API.
Expand Down Expand Up @@ -2516,6 +2563,9 @@ def get_stats_por(
argument will return both the "values" column, containing the list
of percentile threshold values, and a "value" column, containing
the singular summary value for the other statistics.
include_hash : boolean, optional
If False (default), drop the hash columns (``computation_id``,
``parent_time_series_id``); set True to keep them for joining to metadata.

Examples
--------
Expand All @@ -2540,10 +2590,13 @@ def get_stats_por(
... )
"""
# Build argument dictionary, omitting None values
params = _get_args(locals(), exclude={"expand_percentiles"})
params = _get_args(locals(), exclude={"expand_percentiles", "include_hash"})

return get_stats_data(
args=params, service="observationNormals", expand_percentiles=expand_percentiles
args=params,
service="observationNormals",
expand_percentiles=expand_percentiles,
include_hash=include_hash,
)


Expand All @@ -2562,6 +2615,7 @@ def get_stats_date_range(
site_type_name: str | Iterable[str] | None = None,
parameter_code: str | Iterable[str] | None = None,
expand_percentiles: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Get monthly and annual water data statistics from the USGS Water Data API.
This service (called the "observationIntervals" endpoint on api.waterdata.usgs.gov)
Expand Down Expand Up @@ -2644,6 +2698,9 @@ def get_stats_date_range(
argument will return both the "values" column, containing the list
of percentile threshold values, and a "value" column, containing
the singular summary value for the other statistics.
include_hash : boolean, optional
If False (default), drop the hash columns (``computation_id``,
``parent_time_series_id``); set True to keep them for joining to metadata.

Examples
--------
Expand All @@ -2669,12 +2726,13 @@ def get_stats_date_range(
... )
"""
# Build argument dictionary, omitting None values
params = _get_args(locals(), exclude={"expand_percentiles"})
params = _get_args(locals(), exclude={"expand_percentiles", "include_hash"})

return get_stats_data(
args=params,
service="observationIntervals",
expand_percentiles=expand_percentiles,
include_hash=include_hash,
)


Expand Down Expand Up @@ -2710,6 +2768,7 @@ def get_channel(
filter: str | None = None,
filter_lang: FILTER_LANG | None = None,
convert_type: bool = True,
include_hash: bool = False,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""
Channel measurements taken as part of streamflow field measurements.
Expand Down Expand Up @@ -2823,6 +2882,9 @@ def get_channel(
convert_type : boolean, optional
If True, the function will convert the data to dates and qualifier to
string vector
include_hash : boolean, optional
If False (default), drop the opaque hash-valued ID columns. Set True to
keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.

Returns
-------
Expand Down
44 changes: 44 additions & 0 deletions dataretrieval/waterdata/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,47 @@
"count",
],
}


# --- CF / xarray vocabulary mappings ---------------------------------------
# Lookup tables used by :mod:`dataretrieval.waterdata.xarray` to translate
# USGS terms into CF-conventions metadata. Each is intentionally partial:
# anything not listed falls back to a sensible default (raw unit string kept
# verbatim; no standard_name emitted) rather than guessing a wrong CF term.
# They are plain data, so they live here rather than in the (xarray-optional)
# converter module and can be extended without importing xarray.

# USGS unit strings -> UDUNITS / CF-canonical form.
CF_UNIT_MAP = {
"ft^3/s": "ft3 s-1",
"ft3/s": "ft3 s-1",
"ft": "ft",
"in": "in",
"degC": "degC",
"deg C": "degC",
"uS/cm": "uS/cm",
"mg/l": "mg L-1",
"mg/L": "mg L-1",
"tons/day": "short_ton day-1",
"%": "percent",
}

# USGS statistic_id -> the operator in a CF ``cell_methods`` string.
CF_CELL_METHODS = {
"00001": "maximum",
"00002": "minimum",
"00003": "mean",
"00006": "sum",
"00008": "median",
"00011": "point", # instantaneous
}

# USGS 5-digit parameter code -> CF standard_name. Deliberately conservative;
# codes without a confident match are left without a standard_name.
CF_STANDARD_NAMES = {
"00060": "water_volume_transport_in_river_channel",
"00010": "water_temperature",
"00065": "water_surface_height_above_reference_datum",
"63160": "water_surface_height_above_reference_datum",
"00045": "lwe_thickness_of_precipitation_amount",
}
Loading
Loading