Source code for pvlib.iotools.midc

"""Functions to read NREL MIDC data.
"""
import io


import requests
import pandas as pd


# MIDC_VARIABLE_MAP maps some variables of interest at each MIDC site to their
# pvlib counterparts. The mapping dictionary for a site can be found by looking
# up the Site's id in the dictionary. It is not a comprehensive list, and may
# not be the best fit for your application, but should serve as a base for
# creating your own mappings.
#
# In particular, these mappings coincide with the raw ddata files.
# All site's field list can be found at:
#     https://midcdmz.nrel.gov/apps/daily.pl?site=<SITE ID>&live=1
# Where id is the key found in this dictionary
MIDC_VARIABLE_MAP = {
    'BMS': {
        'Global CMP22 (vent/cor) [W/m^2]': 'ghi',
        'Direct NIP [W/m^2]': 'dni',
        'Diffuse CM22-1 (vent/cor) [W/m^2]': 'dhi',
        'Avg Wind Speed @ 6ft [m/s]': 'wind_speed',
        'Tower Dry Bulb Temp [deg C]': 'temp_air',
        'Tower RH [%]': 'relative_humidity'},
    'UOSMRL': {
        'Global CMP22 [W/m^2]': 'ghi',
        'Direct NIP [W/m^2]': 'dni',
        'Diffuse Schenk [W/m^2]': 'dhi',
        'Air Temperature [deg C]': 'temp_air',
        'Relative Humidity [%]': 'relative_humidity',
        'Avg Wind Speed @ 10m [m/s]': 'wind_speed'},
    'HSU': {
        'Global Horiz [W/m^2]': 'ghi',
        'Direct Normal (calc) [W/m^2]': 'dni',
        'Diffuse Horiz (band_corr) [W/m^2]': 'dhi'},
    'UTPASRL': {
        'Global Horizontal [W/m^2]': 'ghi',
        'Direct Normal [W/m^2]': 'dni',
        'Diffuse Horizontal [W/m^2]': 'dhi',
        'CHP1 Temp [deg C]': 'temp_air'},
    'UAT': {
        'Global Horiz (platform) [W/m^2]': 'ghi',
        'Direct Normal [W/m^2]': 'dni',
        'Diffuse Horiz [W/m^2]': 'dhi',
        'Air Temperature [deg C]': 'temp_air',
        'Rel Humidity [%]': 'relative_humidity',
        'Avg Wind Speed @ 3m [m/s]': 'wind_speed'},
    'STAC': {
        'Global Horizontal [W/m^2]': 'ghi',
        'Direct Normal [W/m^2]': 'dni',
        'Diffuse Horizontal [W/m^2]': 'dhi',
        'Avg Wind Speed @ 10m [m/s]': 'wind_speed',
        'Air Temperature [deg C]': 'temp_air',
        'Rel Humidity [%]': 'relative_humidity'},
    'UNLV': {
        'Global Horiz [W/m^2]': 'ghi',
        'Direct Normal [W/m^2]': 'dni',
        'Diffuse Horiz (calc) [W/m^2]': 'dhi',
        'Dry Bulb Temp [deg C]': 'temp_air',
        'Avg Wind Speed @ 30ft [m/s]': 'wind_speed'},
    'ORNL': {
        'Global Horizontal [W/m^2]': 'ghi',
        'Direct Normal [W/m^2]': 'dni',
        'Diffuse Horizontal [W/m^2]': 'dhi',
        'Air Temperature [deg C]': 'temp_air',
        'Rel Humidity [%]': 'relative_humidity',
        'Avg Wind Speed @ 42ft [m/s]': 'wind_speed'},
    'NELHA': {
        'Global Horizontal [W/m^2]': 'ghi',
        'Air Temperature [W/m^2]': 'temp_air',
        'Avg Wind Speed @ 10m [m/s]': 'wind_speed',
        'Rel Humidity [%]': 'relative_humidity'},
    'ULL': {
        'Global Horizontal [W/m^2]': 'ghi',
        'Direct Normal [W/m^2]': 'dni',
        'Diffuse Horizontal [W/m^2]': 'dhi',
        'Air Temperature [deg C]': 'temp_air',
        'Rel Humidity [%]': 'relative_humidity',
        'Avg Wind Speed @ 3m [m/s]': 'wind_speed'},
    'VTIF': {
        'Global Horizontal [W/m^2]': 'ghi',
        'Direct Normal [W/m^2]': 'dni',
        'Diffuse Horizontal [W/m^2]': 'dhi',
        'Air Temperature [deg C]': 'temp_air',
        'Avg Wind Speed @ 3m [m/s]': 'wind_speed',
        'Rel Humidity [%]': 'relative_humidity'},
    'NWTC': {
        'Global PSP [W/m^2]': 'ghi',
        'Temperature @ 2m [deg C]': 'temp_air',
        'Avg Wind Speed @ 2m [m/s]': 'wind_speed',
        'Relative Humidity [%]': 'relative_humidity'}}


# Maps problematic timezones to 'Etc/GMT' for parsing.

TZ_MAP = {
    'PST': 'Etc/GMT+8',
    'CST': 'Etc/GMT+6',
}


def _format_index(data):
    """Create DatetimeIndex for the Dataframe localized to the timezone provided
    as the label of the second (time) column.

    Parameters
    ----------
    data: Dataframe
        Must contain 'DATE (MM/DD/YYYY)' column, second column must be labeled
        with the timezone and contain times in 'HH:MM' format.

    Returns
    -------
    data: Dataframe
        Dataframe with DatetimeIndex localized to the provided timezone.
    """
    tz_raw = data.columns[1]
    timezone = TZ_MAP.get(tz_raw, tz_raw)
    datetime = data['DATE (MM/DD/YYYY)'] + data[tz_raw]
    datetime = pd.to_datetime(datetime, format='%m/%d/%Y%H:%M')
    data = data.set_index(datetime)
    data = data.tz_localize(timezone)
    return data


def _format_index_raw(data):
    """Create DatetimeIndex for the Dataframe localized to the timezone provided
    as the label of the third column.

    Parameters
    ----------
    data: Dataframe
        Must contain columns 'Year' and 'DOY'. Timezone must be found as the
        label of the third (time) column.

    Returns
    -------
    data: Dataframe
        The data with a Datetime index localized to the provided timezone.
    """
    tz_raw = data.columns[3]
    timezone = TZ_MAP.get(tz_raw, tz_raw)
    year = data.Year.apply(str)
    jday = data.DOY.apply(lambda x: '{:03d}'.format(x))
    time = data[tz_raw].apply(lambda x: '{:04d}'.format(x))
    index = pd.to_datetime(year + jday + time, format="%Y%j%H%M")
    data = data.set_index(index)
    data = data.tz_localize(timezone)
    return data


[docs]def read_midc(filename, variable_map={}, raw_data=False, **kwargs):
    """Read in National Renewable Energy Laboratory Measurement and
    Instrumentation Data Center weather data.  The MIDC is described in [1]_.

    Parameters
    ----------
    filename: string or file-like object
        Filename, url, or file-like object of data to read.
    variable_map: dictionary
        Dictionary for mapping MIDC field names to pvlib names. Used to rename
        the columns of the resulting DataFrame. Does not map names by default.
        See Notes for an example.
    raw_data: boolean
        Set to true to use format_index_raw to correctly format the date/time
        columns of MIDC raw data files.
    kwargs : dict
       Additional keyword arguments to pass to `pandas.read_csv`

    Returns
    -------
    data: Dataframe
        A dataframe with DatetimeIndex localized to the provided timezone.

    Notes
    -----
    The `variable_map` argument should map fields from MIDC data to pvlib
    names.

    E.g. if a MIDC file contains the variable 'Global Horizontal [W/m^2]',
    passing the dictionary below will rename the column to 'ghi' in
    the returned Dataframe.

             {'Global Horizontal [W/m^2]': 'ghi'}

    See the MIDC_VARIABLE_MAP for collection of mappings by site.
    For a full list of pvlib variable names see the
    :ref:`variables_style_rules`.

    Be sure to check the units for the variables you will use on the
    `MIDC site <https://midcdmz.nrel.gov/>`_.

    References
    ----------
    .. [1] NREL: Measurement and Instrumentation Data Center
        `https://midcdmz.nrel.gov/ <https://midcdmz.nrel.gov/>`_
    """
    data = pd.read_csv(filename, **kwargs)
    if raw_data:
        data = _format_index_raw(data)
    else:
        data = _format_index(data)
    data = data.rename(columns=variable_map)
    return data


[docs]def read_midc_raw_data_from_nrel(site, start, end, variable_map={},
                                 timeout=30):
    """Request and read MIDC data directly from the raw data api.

    Parameters
    ----------
    site: string
        The MIDC station id.
    start: datetime-like
        Start date for requested data.
    end: datetime-like
        End date for requested data.
    variable_map: dict
        A dictionary mapping MIDC field names to pvlib names. Used to
        rename columns of the resulting DataFrame. See Notes of
        :py:func:`pvlib.iotools.read_midc` for example.
    timeout : float, default 30
        Number of seconds to wait to connect/read from the API before
        failing.

    Returns
    -------
    data:
        Dataframe with DatetimeIndex localized to the station location.

    Raises
    ------
    requests.HTTPError
       For any error in retrieving the CSV file from the MIDC API
    requests.Timeout
       If data is not received in within ``timeout`` seconds

    Notes
    -----
    Requests spanning an instrumentation change will yield an error. See the
    MIDC raw data api page
    `here <https://midcdmz.nrel.gov/apps/data_api_doc.pl?_idtextlist>`_
    for more details and considerations.
    """
    args = {'site': site,
            'begin': pd.to_datetime(start).strftime('%Y%m%d'),
            'end': pd.to_datetime(end).strftime('%Y%m%d')}
    url = 'https://midcdmz.nrel.gov/apps/data_api.pl'
    # NOTE: just use requests.get(url, params=args) to build querystring
    # number of header columns and data columns do not always match,
    # so first parse the header to determine the number of data columns
    # to parse
    csv_request = requests.get(url, timeout=timeout, params=args)
    csv_request.raise_for_status()
    raw_csv = io.StringIO(csv_request.text)
    first_row = pd.read_csv(raw_csv, nrows=0)
    col_length = len(first_row.columns)
    raw_csv.seek(0)
    return read_midc(raw_csv, variable_map=variable_map, raw_data=True,
                     usecols=range(col_length))