Source code for aospy.model

"""Functionality for representing data on disk of individual models."""
import logging

import numpy as np
import xarray as xr

from ._constants import RADIUS_EARTH
from . import internal_names
from . import utils


def _get_grid_attr(grid_objs, attr_name):
    """Get attribute from the grid_objs file(s)."""
    for xds in grid_objs:
        try:
            return getattr(xds, attr_name)
        except AttributeError:
            pass


def _rename_coords(ds, attrs):
    """Rename coordinates to aospy's internal names."""
    for name_int, names_ext in attrs.items():
        # Check if coord is in dataset already.
        ds_coord_name = set(names_ext).intersection(set(ds.coords))
        if ds_coord_name:
            # Rename to the aospy internal name.
            try:
                ds = ds.rename({list(ds_coord_name)[0]: name_int})
                logging.debug("Rename coord from `{0}` to `{1}` for "
                              "Dataset `{2}`".format(ds_coord_name,
                                                     name_int, ds))
            # xarray throws a ValueError if the name already exists
            except ValueError:
                ds = ds
    return ds


def _bounds_from_array(arr, dim_name, bounds_name):
    """Get the bounds of an array given its center values.

    E.g. if lat-lon grid center lat/lon values are known, but not the
    bounds of each grid box.  The algorithm assumes that the bounds
    are simply halfway between each pair of center values.
    """
    # TODO: don't assume needed dimension is in axis=0
    # TODO: refactor to get rid of repetitive code
    spacing = arr.diff(dim_name).values
    lower = xr.DataArray(np.empty_like(arr), dims=arr.dims,
                         coords=arr.coords)
    lower.values[:-1] = arr.values[:-1] - 0.5*spacing
    lower.values[-1] = arr.values[-1] - 0.5*spacing[-1]
    upper = xr.DataArray(np.empty_like(arr), dims=arr.dims,
                         coords=arr.coords)
    upper.values[:-1] = arr.values[:-1] + 0.5*spacing
    upper.values[-1] = arr.values[-1] + 0.5*spacing[-1]
    bounds = xr.concat([lower, upper], dim='bounds')
    return bounds.T


def _diff_bounds(bounds, coord):
    """Get grid spacing by subtracting upper and lower bounds."""
    try:
        return bounds[:, 1] - bounds[:, 0]
    except IndexError:
        diff = np.diff(bounds, axis=0)
        return xr.DataArray(diff, dims=coord.dims, coords=coord.coords)


def _grid_sfc_area(lon, lat, lon_bounds=None, lat_bounds=None):
    """Calculate surface area of each grid cell in a lon-lat grid."""
    # Compute the bounds if not given.
    if lon_bounds is None:
        lon_bounds = _bounds_from_array(
            lon, internal_names.LON_STR, internal_names.LON_BOUNDS_STR)
    if lat_bounds is None:
        lat_bounds = _bounds_from_array(
            lat, internal_names.LAT_STR, internal_names.LAT_BOUNDS_STR)
    # Compute the surface area.
    dlon = _diff_bounds(utils.vertcoord.to_radians(lon_bounds, is_delta=True),
                        lon)
    sinlat_bounds = np.sin(utils.vertcoord.to_radians(lat_bounds,
                                                      is_delta=True))
    dsinlat = np.abs(_diff_bounds(sinlat_bounds, lat))
    sfc_area = dlon*dsinlat*(RADIUS_EARTH**2)
    # Rename the coordinates such that they match the actual lat / lon.
    try:
        sfc_area = sfc_area.rename(
            {internal_names.LAT_BOUNDS_STR: internal_names.LAT_STR,
             internal_names.LON_BOUNDS_STR: internal_names.LON_STR})
    except ValueError:
        pass
    # Clean up: correct names and dimension order.
    sfc_area = sfc_area.rename(internal_names.SFC_AREA_STR)
    sfc_area[internal_names.LAT_STR] = lat
    sfc_area[internal_names.LON_STR] = lon
    return sfc_area.transpose()


[docs]class Model(object): """An object that describes a single climate or weather model. Each `Model` object is associated with a parent `Proj` object and also with one or more child `Run` objects. If aospy is being used to work with non climate- or weather-model data, the `Model` object can be used e.g. to represent a gridded observational product, with its child `Run` objects representing different released versions of that dataset. Attributes ---------- name : str The model's name description : str A description of the model proj : {None, aospy.Proj} The model's parent aospy.Proj object runs : list A list of this model's child Run objects default_runs : list The default subset of child run objects on which to perform calculations via `aospy.Calc` with this model if not otherwise specified grid_file_paths : list The paths to netCDF files stored on disk from which the model's coordinate data can be taken. default_start_date, default_end_date : datetime.datetime The default start and end dates of any calculations using this Model """
[docs] def __init__(self, name=None, description=None, proj=None, grid_file_paths=None, default_start_date=None, default_end_date=None, runs=None, default_runs=None, load_grid_data=False, grid_attrs=None): """ Parameters ---------- name : str The model's name. This must be unique from that of any other `Model` objects being used by the parent `Proj`. description : str, optional A description of the model. This is not used internally by aospy; it is solely for the user's information. proj : {None, aospy.Proj}, optional The parent Proj object. When the parent `Proj` object is instantiated with this Model included in its `models` attribute, this will be over-written with that `Proj` object. grid_file_paths : {None, sequence of strings}, optional The paths to netCDF files stored on disk from which the model's coordinate data can be taken. default_start_date : {None, `datetime.datetime`}, optional Default start date of calculations to be performed using this Model. default_end_date : {None, `datetime.datetime`}, optional Default end date of calculations to be performed using this Model. runs : {None, sequence of aospy.Run objects}, optional The child run objects of this Model default_runs : {None, sequence of aospy.Run objects}, optional The subset of this Model's runs over which to perform calculations by default. load_grid_data : bool, optional (default False) Whether or not to load the grid data specified by 'grid_file_paths' upon initilization grid_attrs : dict, optional (default None) Dictionary mapping aospy internal names of grid attributes to their corresponding names used in a particular model. E.g. ``{TIME_STR: 'T'}``. While aospy checks for a number of alternative names for grid attributes used by various models, it is not possible to anticipate all possible names. This option allows the user to explicitly tell aospy which variables correspond to which internal names (internal names not provided in this dictionary will be attempted to be found in the usual way). For a list of built-in alternative names see :ref:`the table here <built-in-alternative-names>`. See Also -------- aospy.DataLoader, aospy.Proj, aospy.Run Notes ----- A side-effect of instantiating a Model object is that the `parent` attribute of all of the model's `Run` objects is set to that model. """ if isinstance(name, str) and name: self.name = name else: raise ValueError("Non-empty string value of `name` is required") self.description = '' if description is None else description self.proj = proj grid_file_paths = [] if grid_file_paths is None else grid_file_paths self.grid_file_paths = grid_file_paths self.default_start_date = default_start_date self.default_end_date = default_end_date self.runs = runs [setattr(run, 'parent', self) for run in self.runs] if default_runs is None: self.default_runs = [] else: self.default_runs = default_runs self.grid_attrs = grid_attrs self._grid_data_is_set = False if load_grid_data: self.set_grid_data() self._grid_data_is_set = True
def __str__(self): return 'Model instance "' + self.name + '"' __repr__ = __str__ def _get_grid_files(self): """Get the files holding grid data for an aospy object.""" grid_file_paths = self.grid_file_paths datasets = [] if isinstance(grid_file_paths, str): grid_file_paths = [grid_file_paths] for path in grid_file_paths: try: ds = xr.open_dataset(path, decode_times=False) except TypeError: ds = xr.open_mfdataset(path, decode_times=False).load() except (RuntimeError, OSError) as e: msg = str(e) + ': {}'.format(path) raise RuntimeError(msg) datasets.append(ds) return tuple(datasets) def _set_mult_grid_attr(self): """ Set multiple attrs from grid file given their names in the grid file. """ grid_objs = self._get_grid_files() if self.grid_attrs is None: self.grid_attrs = {} # Override GRID_ATTRS with entries in grid_attrs attrs = internal_names.GRID_ATTRS.copy() for k, v in self.grid_attrs.items(): if k not in attrs: raise ValueError( 'Unrecognized internal name, {!r}, specified for a ' 'custom grid attribute name. See the full list of ' 'valid internal names below:\n\n{}'.format( k, list(internal_names.GRID_ATTRS.keys()))) attrs[k] = (v, ) for name_int, names_ext in attrs.items(): for name in names_ext: grid_attr = _get_grid_attr(grid_objs, name) if grid_attr is not None: TIME_STR = internal_names.TIME_STR renamed_attr = _rename_coords(grid_attr, attrs) if ((TIME_STR not in renamed_attr.dims) and (TIME_STR in renamed_attr.coords)): renamed_attr = renamed_attr.drop(TIME_STR) setattr(self, name_int, renamed_attr) break
[docs] def set_grid_data(self): """Populate the attrs that hold grid data.""" if self._grid_data_is_set: return self._set_mult_grid_attr() if not np.any(getattr(self, 'sfc_area', None)): try: sfc_area = _grid_sfc_area(self.lon, self.lat, self.lon_bounds, self.lat_bounds) except AttributeError: sfc_area = _grid_sfc_area(self.lon, self.lat) self.sfc_area = sfc_area try: self.levs_thick = utils.vertcoord.level_thickness(self.level) except AttributeError: self.level = None self.levs_thick = None self._grid_data_is_set = True