DOI-USGS · thodson-usgs · May 17, 2026 · May 17, 2026 · May 17, 2026 · May 17, 2026
diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py
@@ -29,6 +29,7 @@
     SAMPLES_URL,
     _check_profiles,
     _default_headers,
+    _drop_hash_columns,
     _get_args,
     get_ogc_data,
     get_stats_data,
@@ -57,6 +58,7 @@ def get_daily(
     filter: str | None = None,
     filter_lang: FILTER_LANG | None = None,
     convert_type: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Daily data provide one data value to represent water conditions for the
     day.
@@ -189,6 +191,9 @@ def get_daily(
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
+    include_hash : boolean, optional
+        If False (default), drop the opaque hash-valued ID columns. Set True to
+        keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.
 
     Returns
     -------
@@ -272,6 +277,7 @@ def get_continuous(
     filter: str | None = None,
     filter_lang: FILTER_LANG | None = None,
     convert_type: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """
     Continuous data provide instantaneous water conditions.
@@ -399,6 +405,9 @@ def get_continuous(
     convert_type : boolean, optional
         If True, the function will convert the data to dates and qualifier to
         string vector
+    include_hash : boolean, optional
+        If False (default), drop the opaque hash-valued ID columns. Set True to
+        keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.
 
     Returns
     -------
@@ -492,6 +501,7 @@ def get_monitoring_locations(
     filter: str | None = None,
     filter_lang: FILTER_LANG | None = None,
     convert_type: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Location information is basic information about the monitoring location
     including the name, identifier, agency responsible for data collection, and
@@ -707,6 +717,9 @@ def get_monitoring_locations(
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
+    include_hash : boolean, optional
+        If False (default), drop the opaque hash-valued ID columns. Set True to
+        keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.
 
     Returns
     -------
@@ -770,6 +783,7 @@ def get_time_series_metadata(
     filter: str | None = None,
     filter_lang: FILTER_LANG | None = None,
     convert_type: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Daily data and continuous measurements are grouped into time series,
     which represent a collection of observations of a single parameter,
@@ -930,6 +944,9 @@ def get_time_series_metadata(
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
+    include_hash : boolean, optional
+        If False (default), drop the opaque hash-valued ID columns. Set True to
+        keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.
 
     Returns
     -------
@@ -1027,6 +1044,7 @@ def get_combined_metadata(
     filter: str | None = None,
     filter_lang: FILTER_LANG | None = None,
     convert_type: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Get combined monitoring-location and time-series metadata.
 
@@ -1127,6 +1145,9 @@ def get_combined_metadata(
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
+    include_hash : boolean, optional
+        If False (default), drop the opaque hash-valued ID columns. Set True to
+        keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.
 
     Returns
     -------
@@ -1215,6 +1236,7 @@ def get_latest_continuous(
     filter: str | None = None,
     filter_lang: FILTER_LANG | None = None,
     convert_type: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """This endpoint provides the most recent observation for each time series
     of continuous data. Continuous data are collected via automated sensors
@@ -1344,6 +1366,9 @@ def get_latest_continuous(
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
+    include_hash : boolean, optional
+        If False (default), drop the opaque hash-valued ID columns. Set True to
+        keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.
 
     Returns
     -------
@@ -1410,6 +1435,7 @@ def get_latest_daily(
     filter: str | None = None,
     filter_lang: FILTER_LANG | None = None,
     convert_type: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Daily data provide one data value to represent water conditions for the
     day.
@@ -1541,6 +1567,9 @@ def get_latest_daily(
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
+    include_hash : boolean, optional
+        If False (default), drop the opaque hash-valued ID columns. Set True to
+        keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.
 
     Returns
     -------
@@ -1608,6 +1637,7 @@ def get_field_measurements(
     filter: str | None = None,
     filter_lang: FILTER_LANG | None = None,
     convert_type: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Field measurements are physically measured values collected during a
     visit to the monitoring location. Field measurements consist of measurements
@@ -1729,6 +1759,9 @@ def get_field_measurements(
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
+    include_hash : boolean, optional
+        If False (default), drop the opaque hash-valued ID columns. Set True to
+        keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.
 
     Returns
     -------
@@ -1792,6 +1825,7 @@ def get_field_measurements_metadata(
     filter: str | None = None,
     filter_lang: FILTER_LANG | None = None,
     convert_type: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Get field-measurement metadata: one row per (location, parameter) series.
 
@@ -1847,6 +1881,9 @@ def get_field_measurements_metadata(
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
+    include_hash : boolean, optional
+        If False (default), drop the opaque hash-valued ID columns. Set True to
+        keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.
 
     Returns
     -------
@@ -1913,6 +1950,7 @@ def get_peaks(
     filter: str | None = None,
     filter_lang: FILTER_LANG | None = None,
     convert_type: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Get the annual peak streamflow / stage record for a monitoring location.
 
@@ -1971,6 +2009,9 @@ def get_peaks(
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
+    include_hash : boolean, optional
+        If False (default), drop the opaque hash-valued ID columns. Set True to
+        keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.
 
     Returns
     -------
@@ -2148,6 +2189,7 @@ def get_samples(
     pointLocationWithinMiles: float | None = None,
     projectIdentifier: str | Iterable[str] | None = None,
     recordIdentifierUserSupplied: str | Iterable[str] | None = None,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Search Samples database for USGS water quality data.
     This is a wrapper function for the Samples database API. All potential
@@ -2278,6 +2320,9 @@ def get_samples(
     recordIdentifierUserSupplied : string or iterable of strings, optional
         Internal AQS record identifier that returns 1 entry. Only available
         for the "results" service.
+    include_hash : boolean, optional
+        If False (default), drop the opaque per-activity / per-result UUID columns
+        (``Activity_ActivityIdentifier``, ``Result_MeasureIdentifier``).
 
     Returns
     -------
@@ -2327,7 +2372,7 @@ def get_samples(
     _check_profiles(service, profile)
 
     # Build argument dictionary, omitting None values
-    params = _get_args(locals(), exclude={"ssl_check", "profile"})
+    params = _get_args(locals(), exclude={"ssl_check", "profile", "include_hash"})
 
     params.update({"mimeType": "text/csv"})
 
@@ -2348,6 +2393,7 @@ def get_samples(
 
     df = pd.read_csv(StringIO(response.text), delimiter=",")
     df = _attach_datetime_columns(df)
+    df = _drop_hash_columns(df, include_hash)
 
     return df, BaseMetadata(response)
 
@@ -2438,6 +2484,7 @@ def get_stats_por(
     site_type_name: str | Iterable[str] | None = None,
     parameter_code: str | Iterable[str] | None = None,
     expand_percentiles: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Get day-of-year and month-of-year water data statistics from the
     USGS Water Data API.
@@ -2516,6 +2563,9 @@ def get_stats_por(
         argument will return both the "values" column, containing the list
         of percentile threshold values, and a "value" column, containing
         the singular summary value for the other statistics.
+    include_hash : boolean, optional
+        If False (default), drop the hash columns (``computation_id``,
+        ``parent_time_series_id``); set True to keep them for joining to metadata.
 
     Examples
     --------
@@ -2540,10 +2590,13 @@ def get_stats_por(
         ... )
     """
     # Build argument dictionary, omitting None values
-    params = _get_args(locals(), exclude={"expand_percentiles"})
+    params = _get_args(locals(), exclude={"expand_percentiles", "include_hash"})
 
     return get_stats_data(
-        args=params, service="observationNormals", expand_percentiles=expand_percentiles
+        args=params,
+        service="observationNormals",
+        expand_percentiles=expand_percentiles,
+        include_hash=include_hash,
     )
 
 
@@ -2562,6 +2615,7 @@ def get_stats_date_range(
     site_type_name: str | Iterable[str] | None = None,
     parameter_code: str | Iterable[str] | None = None,
     expand_percentiles: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Get monthly and annual water data statistics from the USGS Water Data API.
     This service (called the "observationIntervals" endpoint on api.waterdata.usgs.gov)
@@ -2644,6 +2698,9 @@ def get_stats_date_range(
         argument will return both the "values" column, containing the list
         of percentile threshold values, and a "value" column, containing
         the singular summary value for the other statistics.
+    include_hash : boolean, optional
+        If False (default), drop the hash columns (``computation_id``,
+        ``parent_time_series_id``); set True to keep them for joining to metadata.
 
     Examples
     --------
@@ -2669,12 +2726,13 @@ def get_stats_date_range(
         ... )
     """
     # Build argument dictionary, omitting None values
-    params = _get_args(locals(), exclude={"expand_percentiles"})
+    params = _get_args(locals(), exclude={"expand_percentiles", "include_hash"})
 
     return get_stats_data(
         args=params,
         service="observationIntervals",
         expand_percentiles=expand_percentiles,
+        include_hash=include_hash,
     )
 
 
@@ -2710,6 +2768,7 @@ def get_channel(
     filter: str | None = None,
     filter_lang: FILTER_LANG | None = None,
     convert_type: bool = True,
+    include_hash: bool = False,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """
     Channel measurements taken as part of streamflow field measurements.
@@ -2823,6 +2882,9 @@ def get_channel(
     convert_type : boolean, optional
         If True, the function will convert the data to dates and qualifier to
         string vector
+    include_hash : boolean, optional
+        If False (default), drop the opaque hash-valued ID columns. Set True to
+        keep the secondary hashes (e.g. ``time_series_id``) that join to metadata.
 
     Returns
     -------

diff --git a/dataretrieval/waterdata/types.py b/dataretrieval/waterdata/types.py
@@ -74,3 +74,47 @@
         "count",
     ],
 }
+
+
+# --- CF / xarray vocabulary mappings ---------------------------------------
+# Lookup tables used by :mod:`dataretrieval.waterdata.xarray` to translate
+# USGS terms into CF-conventions metadata. Each is intentionally partial:
+# anything not listed falls back to a sensible default (raw unit string kept
+# verbatim; no standard_name emitted) rather than guessing a wrong CF term.
+# They are plain data, so they live here rather than in the (xarray-optional)
+# converter module and can be extended without importing xarray.
+
+# USGS unit strings -> UDUNITS / CF-canonical form.
+CF_UNIT_MAP = {
+    "ft^3/s": "ft3 s-1",
+    "ft3/s": "ft3 s-1",
+    "ft": "ft",
+    "in": "in",
+    "degC": "degC",
+    "deg C": "degC",
+    "uS/cm": "uS/cm",
+    "mg/l": "mg L-1",
+    "mg/L": "mg L-1",
+    "tons/day": "short_ton day-1",
+    "%": "percent",
+}
+
+# USGS statistic_id -> the operator in a CF ``cell_methods`` string.
+CF_CELL_METHODS = {
+    "00001": "maximum",
+    "00002": "minimum",
+    "00003": "mean",
+    "00006": "sum",
+    "00008": "median",
+    "00011": "point",  # instantaneous
+}
+
+# USGS 5-digit parameter code -> CF standard_name. Deliberately conservative;
+# codes without a confident match are left without a standard_name.
+CF_STANDARD_NAMES = {
+    "00060": "water_volume_transport_in_river_channel",
+    "00010": "water_temperature",
+    "00065": "water_surface_height_above_reference_datum",
+    "63160": "water_surface_height_above_reference_datum",
+    "00045": "lwe_thickness_of_precipitation_amount",
+}