Source code for pvlib.iotools.sodapro

"""Functions to access data from Copernicus Atmosphere Monitoring Service
    (CAMS) radiation service.
.. codeauthor:: Adam R. Jensen<adam-r-j@hotmail.com>
"""

import pandas as pd
import requests
import io
import warnings
from pvlib import tools

from pvlib._deprecation import deprecated, renamed_kwarg_warning

URL = 'api.soda-solardata.com'

CAMS_INTEGRATED_COLUMNS = [
    'TOA', 'Clear sky GHI', 'Clear sky BHI', 'Clear sky DHI', 'Clear sky BNI',
    'GHI', 'BHI', 'DHI', 'BNI',
    'GHI no corr', 'BHI no corr', 'DHI no corr', 'BNI no corr']

# Dictionary mapping CAMS Radiation and McClear variables to pvlib names
VARIABLE_MAP = {
    'TOA': 'ghi_extra',
    'Clear sky GHI': 'ghi_clear',
    'Clear sky BHI': 'bhi_clear',
    'Clear sky DHI': 'dhi_clear',
    'Clear sky BNI': 'dni_clear',
    'GHI': 'ghi',
    'BHI': 'bhi',
    'DHI': 'dhi',
    'BNI': 'dni',
    'sza': 'solar_zenith',
}

# Dictionary mapping time steps to CAMS time step format
TIME_STEPS_MAP = {'1min': 'PT01M', '15min': 'PT15M', '1h': 'PT01H',
                  '1d': 'P01D', '1M': 'P01M'}

TIME_STEPS_IN_HOURS = {'1min': 1/60, '15min': 15/60, '1h': 1, '1d': 24}

SUMMATION_PERIOD_TO_TIME_STEP = {'0 year 0 month 0 day 0 h 1 min 0 s': '1min',
                                 '0 year 0 month 0 day 0 h 15 min 0 s': '15min',  # noqa
                                 '0 year 0 month 0 day 1 h 0 min 0 s': '1h',
                                 '0 year 0 month 1 day 0 h 0 min 0 s': '1d',
                                 '0 year 1 month 0 day 0 h 0 min 0 s': '1M'}



[docs]
@renamed_kwarg_warning(
    since='0.13.0',
    old_param_name='server',
    new_param_name='url',
    removal="0.14.0")
def get_cams(latitude, longitude, start, end, email, identifier='mcclear',
             altitude=None, time_step='1h', time_ref='UT', verbose=False,
             integrated=False, label=None, map_variables=True,
             url=URL, timeout=30):
    """Retrieve irradiance and clear-sky time series from CAMS.

    Time-series of radiation and/or clear-sky global, beam, and
    diffuse radiation from CAMS (see [1]_). Data is retrieved from SoDa [2]_.

    Time coverage: 2004-01-01 to two days ago

    Access: free, but requires registration, see [2]_

    Requests: max. 100 per day

    Geographical coverage: worldwide for CAMS McClear and approximately -66° to
    66° in latitude and -66° to 180° in longitude for CAMS Radiation. See [3]_
    for a map of the geographical coverage.

    Parameters
    ----------
    latitude: float
        in decimal degrees, between -90 and 90, north is positive (ISO 19115)
    longitude : float
        in decimal degrees, between -180 and 180, east is positive (ISO 19115)
    start: datetime-like
        First day of the requested period
    end: datetime-like
        Last day of the requested period
    email: str
        Email address linked to a SoDa account
    identifier: {'mcclear', 'cams_radiation'}
        Specify whether to retrieve CAMS Radiation or McClear parameters
    altitude: float, optional
        Altitude in meters. If not specified, then the altitude is determined
        from the NASA SRTM database
    time_step: str, {'1min', '15min', '1h', '1d', '1M'}, default: '1h'
        Time step of the time series, either 1 minute, 15 minute, hourly,
        daily, or monthly.
    time_ref: str, {'UT', 'TST'}, default: 'UT'
        'UT' (universal time) or 'TST' (True Solar Time)
    verbose: boolean, default: False
        Verbose mode outputs additional parameters (aerosols). Only available
        for 1 minute and universal time. See [1]_ for parameter description.
    integrated: boolean, default False
        Whether to return radiation parameters as integrated values (Wh/m^2)
        or as average irradiance values (W/m^2) (pvlib preferred units)
    label : {'right', 'left'}, optional
        Which bin edge label to label time-step with. The default is 'left' for
        all time steps except for '1M' which has a default of 'right'.
    map_variables: bool, default: True
        When true, renames columns of the DataFrame to pvlib variable names
        where applicable. See variable :const:`VARIABLE_MAP`.
    url: str, default: :const:`pvlib.iotools.sodapro.URL`
        Base url of the SoDa Pro CAMS Radiation API.
    timeout : int, default: 30
        Time in seconds to wait for server response before timeout

    Returns
    -------
    data: pandas.DataFrame
        Timeseries data, see Notes for columns
    metadata: dict
        Metadata of the requested time-series

    Notes
    -----
    In order to use the CAMS services, users must register for a free SoDa
    account using an email address [2]_.

    The returned data DataFrame includes the following fields:

    ========================  ======  =========================================
    Key, mapped key           Format  Description
    ========================  ======  =========================================
    **Mapped field names are returned when the map_variables argument is True**
    ---------------------------------------------------------------------------
    Observation period        str     Beginning/end of time period
    TOA, ghi_extra            float   Horizontal radiation at top of atmosphere
    Clear sky GHI, ghi_clear  float   Clear sky global radiation on horizontal
    Clear sky BHI, bhi_clear  float   Clear sky beam radiation on horizontal
    Clear sky DHI, dhi_clear  float   Clear sky diffuse radiation on horizontal
    Clear sky BNI, dni_clear  float   Clear sky beam radiation normal to sun
    GHI, ghi†                 float   Global horizontal radiation
    BHI, bhi†                 float   Beam (direct) radiation on horizontal
    DHI, dhi†                 float   Diffuse horizontal radiation
    BNI, dni†                 float   Beam (direct) radiation normal to the sun
    Reliability†              float   Reliable data fraction in summarization
    ========================  ======  =========================================

    †Parameters only returned if identifier='cams_radiation'. For description
    of additional output parameters in verbose mode, see [1]_.

    Note that it is recommended to specify the latitude and longitude to at
    least the fourth decimal place.

    Variables corresponding to standard pvlib variables are renamed,
    e.g. `sza` becomes `solar_zenith`. See variable :const:`VARIABLE_MAP` for
    the complete mapping.

    For large geospatial areas, CAMS offers a pre-calculated
    gridded dataset [4]_ over land and coastal areas. This dataset
    may not include the most recent data coverage and may not be
    based on the most recent CAMS version. This dataset is not available
    through pvlib.

    See Also
    --------
    pvlib.iotools.read_cams

    Raises
    ------
    requests.HTTPError
        If the request is invalid, then an XML file is returned by the CAMS
        service and the error message will be raised as an exception.

    References
    ----------
    .. [1] `CAMS solar radiation time-series documentation. Climate Data Store.
       <https://ads.atmosphere.copernicus.eu/datasets/cams-solar-radiation-timeseries>`_
    .. [2] `CAMS Radiation Automatic Access (SoDa)
       <https://www.soda-pro.com/help/cams-services/cams-radiation-service/automatic-access>`_
    .. [3] A. R. Jensen et al., pvlib iotools — Open-source Python functions
       for seamless access to solar irradiance data. Solar Energy. 2023. Vol
       266, pp. 112092. :doi:`10.1016/j.solener.2023.112092`
    .. [4] `CAMS gridded solar radiation documentation.
       <https://ads.atmosphere.copernicus.eu/datasets/cams-gridded-solar-radiation>`_
    """
    try:
        time_step_str = TIME_STEPS_MAP[time_step]
    except KeyError:
        raise ValueError(f'Time step not recognized. Must be one of '
                         f'{list(TIME_STEPS_MAP.keys())}')

    if (verbose) and ((time_step != '1min') or (time_ref != 'UT')):
        verbose = False
        warnings.warn("Verbose mode only supports 1 min. UT time series!")

    if identifier not in ['mcclear', 'cams_radiation']:
        raise ValueError('Identifier must be either mcclear or cams_radiation')

    # Format verbose variable to the required format: {'true', 'false'}
    verbose = str(verbose).lower()

    if altitude is None:  # Let SoDa get elevation from the NASA SRTM database
        altitude = -999

    # Start and end date should be in the format: yyyy-mm-dd
    start = pd.to_datetime(start).strftime('%Y-%m-%d')
    end = pd.to_datetime(end).strftime('%Y-%m-%d')

    email = email.replace('@', '%2540')  # Format email address
    identifier = 'get_{}'.format(identifier.lower())  # Format identifier str

    base_url = f"https://{url}/service/wps"

    data_inputs_dict = {
        'latitude': latitude,
        'longitude': longitude,
        'altitude': altitude,
        'date_begin': start,
        'date_end': end,
        'time_ref': time_ref,
        'summarization': time_step_str,
        'username': email,
        'verbose': verbose}

    # Manual formatting of the input parameters separating each by a semicolon
    data_inputs = ";".join([f"{key}={value}" for key, value in
                            data_inputs_dict.items()])

    params = {'Service': 'WPS',
              'Request': 'Execute',
              'Identifier': identifier,
              'version': '1.0.0',
              'RawDataOutput': 'irradiation',
              }

    # The DataInputs parameter of the URL has to be manually formatted and
    # added to the base URL as it contains sub-parameters separated by
    # semi-colons, which gets incorrectly formatted by the requests function
    # if passed using the params argument.
    res = requests.get(base_url + '?DataInputs=' + data_inputs, params=params,
                       timeout=timeout)

    # Response from CAMS follows the status and reason format of PyWPS4
    # If an error occurs on server side, it will return error 400 - bad request
    # Additional information is available in the response text, so it is added
    # to the error displayed to facilitate users effort to fix their request
    if not res.ok:
        errors = res.text.split('ows:ExceptionText')[1][1:-2]
        res.reason = "%s: <%s>" % (res.reason, errors)
        res.raise_for_status()
    # Successful requests returns a csv data file
    else:
        fbuf = io.StringIO(res.content.decode('utf-8'))
        data, metadata = read_cams(fbuf, integrated=integrated, label=label,
                                   map_variables=map_variables)
        return data, metadata




[docs]
def read_cams(filename, integrated=False, label=None, map_variables=True):
    """
    Read a file or file-like buffer with data in the format of a CAMS
    Radiation or McClear file.

    The CAMS solar radiation services are described in [1]_.

    Parameters
    ----------
    filename: str, path-like, or buffer
        Filename or in-memory buffer of a file containing data to read.
    integrated: boolean, default False
        Whether to return radiation parameters as integrated values (Wh/m^2)
        or as average irradiance values (W/m^2) (pvlib preferred units)
    label : {'right', 'left'}, optional
        Which bin edge label to label time-step with. The default is 'left' for
        all time steps except for '1M' which has a default of 'right'.
    map_variables: bool, default: True
        When true, renames columns of the Dataframe to pvlib variable names
        where applicable. See variable :const:`VARIABLE_MAP`.

    Returns
    -------
    data: pandas.DataFrame
        Timeseries data from CAMS Radiation or McClear
    metadata: dict
        Metadata available in the file.

    See Also
    --------
    pvlib.iotools.get_cams

    References
    ----------
    .. [1] `CAMS solar radiation time-series documentation. Climate Data Store.
       <https://ads.atmosphere.copernicus.eu/datasets/cams-solar-radiation-timeseries>`_
    """
    metadata = {}

    with tools._file_context_manager(filename) as fbuf:

        # Initial lines starting with # contain metadata
        while True:
            line = fbuf.readline().rstrip('\n')
            if line.startswith('# Observation period'):
                # The last line of the metadata section has the column names
                names = line.lstrip('# ').split(';')
                break  # End of metadata section has been reached
            elif ': ' in line:
                key = line.split(': ')[0].lstrip('# ')
                value = line.split(': ')[1]
                metadata[key] = value

        data = pd.read_csv(fbuf, sep=';', comment='#', header=None,
                           names=names)

    # Convert latitude, longitude, and altitude values from strings to floats
    for k_old in list(metadata.keys()):
        k_new = k_old.lstrip().split(' ')[0].lower()
        if k_new in ['latitude', 'longitude', 'altitude']:
            metadata[k_new] = float(metadata.pop(k_old))

    metadata['radiation_unit'] = \
        {True: 'Wh/m^2', False: 'W/m^2'}[integrated]

    # Determine the time_step from the metadata dictionary
    time_step = SUMMATION_PERIOD_TO_TIME_STEP[
        metadata['Summarization (integration) period']]
    metadata['time_step'] = time_step

    obs_period = data['Observation period'].str.split('/')

    # Set index as the start observation time (left) and localize to UTC
    if (label == 'left') | ((label is None) & (time_step != '1M')):
        data.index = pd.to_datetime(obs_period.str[0], utc=True)
    # Set index as the stop observation time (right) and localize to UTC
    # default label for monthly data is 'right' following Pandas' convention
    elif (label == 'right') | ((label is None) & (time_step == '1M')):
        data.index = pd.to_datetime(obs_period.str[1], utc=True)

    # For time_steps '1d' and '1M', drop timezone and round to nearest midnight
    if (time_step == '1d') | (time_step == '1M'):
        data.index = pd.DatetimeIndex(data.index.date)
    # For monthly data with 'right' label, the index should be the last
    # date of the month and not the first date of the following month
    if (time_step == '1M') & (label != 'left'):
        data.index = data.index - pd.Timedelta(days=1)

    if not integrated:  # Convert radiation values from Wh/m2 to W/m2
        integrated_cols = [c for c in CAMS_INTEGRATED_COLUMNS
                           if c in data.columns]

        if time_step == '1M':
            time_delta = (pd.to_datetime(obs_period.str[1])
                          - pd.to_datetime(obs_period.str[0]))
            hours = time_delta.dt.total_seconds()/60/60
            data[integrated_cols] = data[integrated_cols].\
                divide(hours.tolist(), axis='rows')
        else:
            data[integrated_cols] = (data[integrated_cols] /
                                     TIME_STEPS_IN_HOURS[time_step])
    data.index.name = None  # Set index name to None
    if map_variables:
        data = data.rename(columns=VARIABLE_MAP)

    return data, metadata



parse_cams = deprecated(since="0.13.0", name="parse_cams",
                        alternative="read_cams")(read_cams)