Source code for aospy.model

"""Functionality for representing data on disk of individual models."""
import logging

import numpy as np
import xarray as xr

from .constants import r_e
from . import internal_names
from . import utils


def _get_grid_attr(grid_objs, attr_name):
    """Get attribute from the grid_objs file(s)."""
    for xds in grid_objs:
        try:
            return getattr(xds, attr_name)
        except AttributeError:
            pass


def _rename_coords(ds):
    """Rename coordinates to aospy's internal names."""
    for name_int, names_ext in internal_names.GRID_ATTRS.items():
        # Check if coord is in dataset already.
        ds_coord_name = set(names_ext).intersection(set(ds.coords))
        if ds_coord_name:
            # Rename to the aospy internal name.
            try:
                ds = ds.rename({list(ds_coord_name)[0]: name_int})
                logging.debug("Rename coord from `{0}` to `{1}` for "
                              "Dataset `{2}`".format(ds_coord_name,
                                                     name_int, ds))
            # xarray throws a ValueError if the name already exists
            except ValueError:
                ds = ds
    return ds


def _bounds_from_array(arr, dim_name, bounds_name):
    """Get the bounds of an array given its center values.

    E.g. if lat-lon grid center lat/lon values are known, but not the
    bounds of each grid box.  The algorithm assumes that the bounds
    are simply halfway between each pair of center values.
    """
    # TODO: don't assume needed dimension is in axis=0
    # TODO: refactor to get rid of repetitive code
    spacing = arr.diff(dim_name).values
    lower = xr.DataArray(np.empty_like(arr), dims=arr.dims,
                         coords=arr.coords)
    lower.values[:-1] = arr.values[:-1] - 0.5*spacing
    lower.values[-1] = arr.values[-1] - 0.5*spacing[-1]
    upper = xr.DataArray(np.empty_like(arr), dims=arr.dims,
                         coords=arr.coords)
    upper.values[:-1] = arr.values[:-1] + 0.5*spacing
    upper.values[-1] = arr.values[-1] + 0.5*spacing[-1]
    bounds = xr.concat([lower, upper], dim='bounds')
    return bounds.T


def _diff_bounds(bounds, coord):
    """Get grid spacing by subtracting upper and lower bounds."""
    try:
        return bounds[:, 1] - bounds[:, 0]
    except IndexError:
        diff = np.diff(bounds, axis=0)
        return xr.DataArray(diff, dims=coord.dims, coords=coord.coords)


def _grid_sfc_area(lon, lat, lon_bounds=None, lat_bounds=None):
    """Calculate surface area of each grid cell in a lon-lat grid."""
    # Compute the bounds if not given.
    if lon_bounds is None:
        lon_bounds = _bounds_from_array(
            lon, internal_names.LON_STR, internal_names.LON_BOUNDS_STR)
    if lat_bounds is None:
        lat_bounds = _bounds_from_array(
            lat, internal_names.LAT_STR, internal_names.LAT_BOUNDS_STR)
    # Compute the surface area.
    dlon = _diff_bounds(utils.vertcoord.to_radians(lon_bounds, is_delta=True),
                        lon)
    sinlat_bounds = np.sin(utils.vertcoord.to_radians(lat_bounds,
                                                      is_delta=True))
    dsinlat = np.abs(_diff_bounds(sinlat_bounds, lat))
    sfc_area = dlon*dsinlat*(r_e**2)
    # Rename the coordinates such that they match the actual lat / lon.
    try:
        sfc_area = sfc_area.rename(
            {internal_names.LAT_BOUNDS_STR: internal_names.LAT_STR,
             internal_names.LON_BOUNDS_STR: internal_names.LON_STR})
    except ValueError:
        pass
    # Clean up: correct names and dimension order.
    sfc_area = sfc_area.rename(internal_names.SFC_AREA_STR)
    sfc_area[internal_names.LAT_STR] = lat
    sfc_area[internal_names.LON_STR] = lon
    return sfc_area.transpose()


[docs]class Model(object): """An object that describes a single climate or weather model. Each `Model` object is associated with a parent `Proj` object and also with one or more child `Run` objects. If aospy is being used to work with non climate- or weather-model data, the `Model` object can be used e.g. to represent a gridded observational product, with its child `Run` objects representing different released versions of that dataset. Attributes ---------- name : str The model's name description : str A description of the model proj : {None, aospy.Proj} The model's parent aospy.Proj object runs : list A list of this model's child Run objects default_runs : list The default subset of child run objects on which to perform calculations via `aospy.Calc` with this model if not otherwise specified grid_file_paths : list The paths to netCDF files stored on disk from which the model's coordinate data can be taken. default_start_date, default_end_date : datetime.datetime The default start and end dates of any calculations using this Model """
[docs] def __init__(self, name=None, description=None, proj=None, grid_file_paths=None, default_start_date=None, default_end_date=None, runs=None, default_runs=None, load_grid_data=False): """ Parameters ---------- name : str The model's name. This must be unique from that of any other `Model` objects being used by the parent `Proj`. description : str, optional A description of the model. This is not used internally by aospy; it is solely for the user's information. proj : {None, aospy.Proj}, optional The parent Proj object. When the parent `Proj` object is instantiated with this Model included in its `models` attribute, this will be over-written with that `Proj` object. grid_file_paths : {None, sequence of strings}, optional The paths to netCDF files stored on disk from which the model's coordinate data can be taken. default_start_date, default_end_date : {None, `datetime.datetime`}, optional Default start and end dates of calculations to be performed using this Model. runs : {None, sequence of aospy.Run objects}, optional The child run objects of this Model default_runs : {None, sequence of aospy.Run objects}, optional The subset of this Model's runs over which to perform calculations by default. load_grid_data : bool, optional (default False) Whether or not to load the grid data specified by 'grid_file_paths' upon initilization See Also -------- aospy.DataLoader, aospy.Proj, aospy.Run Note ---- A side-effect of instantiating a Model object is that the `parent` attribute of all of the model's `Run` objects is set to that model. """ if isinstance(name, str) and name: self.name = name else: raise ValueError("Non-empty string value of `name` is required") self.description = '' if description is None else description self.proj = proj grid_file_paths = [] if grid_file_paths is None else grid_file_paths self.grid_file_paths = grid_file_paths self.default_start_date = default_start_date self.default_end_date = default_end_date self.runs = runs [setattr(run, 'parent', self) for run in self.runs] if default_runs is None: self.default_runs = [] else: self.default_runs = default_runs self._grid_data_is_set = False if load_grid_data: self.set_grid_data() self._grid_data_is_set = True
def __str__(self): return 'Model instance "' + self.name + '"' __repr__ = __str__ def _get_grid_files(self): """Get the files holding grid data for an aospy object.""" grid_file_paths = self.grid_file_paths datasets = [] if isinstance(grid_file_paths, str): grid_file_paths = [grid_file_paths] for path in grid_file_paths: try: ds = xr.open_dataset(path, decode_times=False) except TypeError: ds = xr.open_mfdataset(path, decode_times=False).load() except (RuntimeError, OSError) as e: msg = str(e) + ': {}'.format(path) raise RuntimeError(msg) datasets.append(ds) return tuple(datasets) def _set_mult_grid_attr(self): """ Set multiple attrs from grid file given their names in the grid file. """ grid_objs = self._get_grid_files() for name_int, names_ext in internal_names.GRID_ATTRS.items(): for name in names_ext: grid_attr = _get_grid_attr(grid_objs, name) if grid_attr is not None: TIME_STR = internal_names.TIME_STR renamed_attr = _rename_coords(grid_attr) if ((TIME_STR not in renamed_attr.dims) and (TIME_STR in renamed_attr)): renamed_attr = renamed_attr.drop(TIME_STR) setattr(self, name_int, renamed_attr) break
[docs] def set_grid_data(self): """Populate the attrs that hold grid data.""" if self._grid_data_is_set: return self._set_mult_grid_attr() if not np.any(getattr(self, 'sfc_area', None)): try: sfc_area = _grid_sfc_area(self.lon, self.lat, self.lon_bounds, self.lat_bounds) except AttributeError: sfc_area = _grid_sfc_area(self.lon, self.lat) self.sfc_area = sfc_area try: self.levs_thick = utils.vertcoord.level_thickness(self.level) except AttributeError: self.level = None self.levs_thick = None self._grid_data_is_set = True