Source code for pvlib.iotools.sodapro

"""Functions to access data from Copernicus Atmosphere Monitoring Service
    (CAMS) radiation service.
.. codeauthor:: Adam R. Jensen<adam-r-j@hotmail.com>
"""

import pandas as pd
import requests
import io
import warnings


URL = 'api.soda-solardata.com'

CAMS_INTEGRATED_COLUMNS = [
    'TOA', 'Clear sky GHI', 'Clear sky BHI', 'Clear sky DHI', 'Clear sky BNI',
    'GHI', 'BHI', 'DHI', 'BNI',
    'GHI no corr', 'BHI no corr', 'DHI no corr', 'BNI no corr']

# Dictionary mapping CAMS Radiation and McClear variables to pvlib names
VARIABLE_MAP = {
    'TOA': 'ghi_extra',
    'Clear sky GHI': 'ghi_clear',
    'Clear sky BHI': 'bhi_clear',
    'Clear sky DHI': 'dhi_clear',
    'Clear sky BNI': 'dni_clear',
    'GHI': 'ghi',
    'BHI': 'bhi',
    'DHI': 'dhi',
    'BNI': 'dni',
    'sza': 'solar_zenith',
}

# Dictionary mapping time steps to CAMS time step format
TIME_STEPS_MAP = {'1min': 'PT01M', '15min': 'PT15M', '1h': 'PT01H',
                  '1d': 'P01D', '1M': 'P01M'}

TIME_STEPS_IN_HOURS = {'1min': 1/60, '15min': 15/60, '1h': 1, '1d': 24}

SUMMATION_PERIOD_TO_TIME_STEP = {'0 year 0 month 0 day 0 h 1 min 0 s': '1min',
                                 '0 year 0 month 0 day 0 h 15 min 0 s': '15min',  # noqa
                                 '0 year 0 month 0 day 1 h 0 min 0 s': '1h',
                                 '0 year 0 month 1 day 0 h 0 min 0 s': '1d',
                                 '0 year 1 month 0 day 0 h 0 min 0 s': '1M'}


[docs]def get_cams(latitude, longitude, start, end, email, identifier='mcclear', altitude=None, time_step='1h', time_ref='UT', verbose=False, integrated=False, label=None, map_variables=True, server=URL, timeout=30): """Retrieve irradiance and clear-sky time series from CAMS. Time-series of radiation and/or clear-sky global, beam, and diffuse radiation from CAMS (see [1]_). Data is retrieved from SoDa [2]_. Time coverage: 2004-01-01 to two days ago Access: free, but requires registration, see [2]_ Requests: max. 100 per day Geographical coverage: worldwide for CAMS McClear and approximately -66° to 66° in latitude and -66° to 180° in longitude for CAMS Radiation. See [3]_ for a map of the geographical coverage. Parameters ---------- latitude: float in decimal degrees, between -90 and 90, north is positive (ISO 19115) longitude : float in decimal degrees, between -180 and 180, east is positive (ISO 19115) start: datetime-like First day of the requested period end: datetime-like Last day of the requested period email: str Email address linked to a SoDa account identifier: {'mcclear', 'cams_radiation'} Specify whether to retrieve CAMS Radiation or McClear parameters altitude: float, optional Altitude in meters. If not specified, then the altitude is determined from the NASA SRTM database time_step: str, {'1min', '15min', '1h', '1d', '1M'}, default: '1h' Time step of the time series, either 1 minute, 15 minute, hourly, daily, or monthly. time_ref: str, {'UT', 'TST'}, default: 'UT' 'UT' (universal time) or 'TST' (True Solar Time) verbose: boolean, default: False Verbose mode outputs additional parameters (aerosols). Only available for 1 minute and universal time. See [1]_ for parameter description. integrated: boolean, default False Whether to return radiation parameters as integrated values (Wh/m^2) or as average irradiance values (W/m^2) (pvlib preferred units) label : {'right', 'left'}, optional Which bin edge label to label time-step with. The default is 'left' for all time steps except for '1M' which has a default of 'right'. map_variables: bool, default: True When true, renames columns of the DataFrame to pvlib variable names where applicable. See variable :const:`VARIABLE_MAP`. server: str, default: :const:`pvlib.iotools.sodapro.URL` Base url of the SoDa Pro CAMS Radiation API. timeout : int, default: 30 Time in seconds to wait for server response before timeout Returns ------- data: pandas.DataFrame Timeseries data, see Notes for columns metadata: dict Metadata of the requested time-series Notes ----- In order to use the CAMS services, users must register for a free SoDa account using an email address [2]_. The returned data DataFrame includes the following fields: ======================== ====== ========================================= Key, mapped key Format Description ======================== ====== ========================================= **Mapped field names are returned when the map_variables argument is True** --------------------------------------------------------------------------- Observation period str Beginning/end of time period TOA, ghi_extra float Horizontal radiation at top of atmosphere Clear sky GHI, ghi_clear float Clear sky global radiation on horizontal Clear sky BHI, bhi_clear float Clear sky beam radiation on horizontal Clear sky DHI, dhi_clear float Clear sky diffuse radiation on horizontal Clear sky BNI, dni_clear float Clear sky beam radiation normal to sun GHI, ghi† float Global horizontal radiation BHI, bhi† float Beam (direct) radiation on horizontal DHI, dhi† float Diffuse horizontal radiation BNI, dni† float Beam (direct) radiation normal to the sun Reliability† float Reliable data fraction in summarization ======================== ====== ========================================= †Parameters only returned if identifier='cams_radiation'. For description of additional output parameters in verbose mode, see [1]_. Note that it is recommended to specify the latitude and longitude to at least the fourth decimal place. Variables corresponding to standard pvlib variables are renamed, e.g. `sza` becomes `solar_zenith`. See variable :const:`VARIABLE_MAP` for the complete mapping. See Also -------- pvlib.iotools.read_cams, pvlib.iotools.parse_cams Raises ------ requests.HTTPError If the request is invalid, then an XML file is returned by the CAMS service and the error message will be raised as an exception. References ---------- .. [1] `CAMS solar radiation documentation <https://atmosphere.copernicus.eu/solar-radiation>`_ .. [2] `CAMS Radiation Automatic Access (SoDa) <https://www.soda-pro.com/help/cams-services/cams-radiation-service/automatic-access>`_ .. [3] A. R. Jensen et al., pvlib iotools — Open-source Python functions for seamless access to solar irradiance data. Solar Energy. 2023. Vol 266, pp. 112092. :doi:`10.1016/j.solener.2023.112092` """ try: time_step_str = TIME_STEPS_MAP[time_step] except KeyError: raise ValueError(f'Time step not recognized. Must be one of ' f'{list(TIME_STEPS_MAP.keys())}') if (verbose) and ((time_step != '1min') or (time_ref != 'UT')): verbose = False warnings.warn("Verbose mode only supports 1 min. UT time series!") if identifier not in ['mcclear', 'cams_radiation']: raise ValueError('Identifier must be either mcclear or cams_radiation') # Format verbose variable to the required format: {'true', 'false'} verbose = str(verbose).lower() if altitude is None: # Let SoDa get elevation from the NASA SRTM database altitude = -999 # Start and end date should be in the format: yyyy-mm-dd start = pd.to_datetime(start).strftime('%Y-%m-%d') end = pd.to_datetime(end).strftime('%Y-%m-%d') email = email.replace('@', '%2540') # Format email address identifier = 'get_{}'.format(identifier.lower()) # Format identifier str base_url = f"https://{server}/service/wps" data_inputs_dict = { 'latitude': latitude, 'longitude': longitude, 'altitude': altitude, 'date_begin': start, 'date_end': end, 'time_ref': time_ref, 'summarization': time_step_str, 'username': email, 'verbose': verbose} # Manual formatting of the input parameters seperating each by a semicolon data_inputs = ";".join([f"{key}={value}" for key, value in data_inputs_dict.items()]) params = {'Service': 'WPS', 'Request': 'Execute', 'Identifier': identifier, 'version': '1.0.0', 'RawDataOutput': 'irradiation', } # The DataInputs parameter of the URL has to be manually formatted and # added to the base URL as it contains sub-parameters seperated by # semi-colons, which gets incorrectly formatted by the requests function # if passed using the params argument. res = requests.get(base_url + '?DataInputs=' + data_inputs, params=params, timeout=timeout) # Response from CAMS follows the status and reason format of PyWPS4 # If an error occurs on server side, it will return error 400 - bad request # Additional information is available in the response text, so it is added # to the error displayed to facilitate users effort to fix their request if not res.ok: errors = res.text.split('ows:ExceptionText')[1][1:-2] res.reason = "%s: <%s>" % (res.reason, errors) res.raise_for_status() # Successful requests returns a csv data file else: fbuf = io.StringIO(res.content.decode('utf-8')) data, metadata = parse_cams(fbuf, integrated=integrated, label=label, map_variables=map_variables) return data, metadata
[docs]def parse_cams(fbuf, integrated=False, label=None, map_variables=True): """ Parse a file-like buffer with data in the format of a CAMS Radiation or McClear file. The CAMS solar radiation services are described in [1]_. Parameters ---------- fbuf: file-like object File-like object containing data to read. integrated: boolean, default False Whether to return radiation parameters as integrated values (Wh/m^2) or as average irradiance values (W/m^2) (pvlib preferred units) label : {'right', 'left'}, optional Which bin edge label to label time-step with. The default is 'left' for all time steps except for '1M' which has a default of 'right'. map_variables: bool, default: True When true, renames columns of the Dataframe to pvlib variable names where applicable. See variable :const:`VARIABLE_MAP`. Returns ------- data: pandas.DataFrame Timeseries data from CAMS Radiation or McClear metadata: dict Metadata available in the file. See Also -------- pvlib.iotools.read_cams, pvlib.iotools.get_cams References ---------- .. [1] `CAMS solar radiation documentation <https://atmosphere.copernicus.eu/solar-radiation>`_ """ metadata = {} # Initial lines starting with # contain metadata while True: line = fbuf.readline().rstrip('\n') if line.startswith('# Observation period'): # The last line of the metadata section contains the column names names = line.lstrip('# ').split(';') break # End of metadata section has been reached elif ': ' in line: metadata[line.split(': ')[0].lstrip('# ')] = line.split(': ')[1] # Convert latitude, longitude, and altitude values from strings to floats for k_old in list(metadata.keys()): k_new = k_old.lstrip().split(' ')[0].lower() if k_new in ['latitude', 'longitude', 'altitude']: metadata[k_new] = float(metadata.pop(k_old)) metadata['radiation_unit'] = \ {True: 'Wh/m^2', False: 'W/m^2'}[integrated] # Determine the time_step from the metadata dictionary time_step = SUMMATION_PERIOD_TO_TIME_STEP[ metadata['Summarization (integration) period']] metadata['time_step'] = time_step data = pd.read_csv(fbuf, sep=';', comment='#', header=None, names=names) obs_period = data['Observation period'].str.split('/') # Set index as the start observation time (left) and localize to UTC if (label == 'left') | ((label is None) & (time_step != '1M')): data.index = pd.to_datetime(obs_period.str[0], utc=True) # Set index as the stop observation time (right) and localize to UTC # default label for monthly data is 'right' following Pandas' convention elif (label == 'right') | ((label is None) & (time_step == '1M')): data.index = pd.to_datetime(obs_period.str[1], utc=True) # For time_steps '1d' and '1M', drop timezone and round to nearest midnight if (time_step == '1d') | (time_step == '1M'): data.index = pd.DatetimeIndex(data.index.date) # For monthly data with 'right' label, the index should be the last # date of the month and not the first date of the following month if (time_step == '1M') & (label != 'left'): data.index = data.index - pd.Timedelta(days=1) if not integrated: # Convert radiation values from Wh/m2 to W/m2 integrated_cols = [c for c in CAMS_INTEGRATED_COLUMNS if c in data.columns] if time_step == '1M': time_delta = (pd.to_datetime(obs_period.str[1]) - pd.to_datetime(obs_period.str[0])) hours = time_delta.dt.total_seconds()/60/60 data[integrated_cols] = data[integrated_cols].\ divide(hours.tolist(), axis='rows') else: data[integrated_cols] = (data[integrated_cols] / TIME_STEPS_IN_HOURS[time_step]) data.index.name = None # Set index name to None if map_variables: data = data.rename(columns=VARIABLE_MAP) return data, metadata
[docs]def read_cams(filename, integrated=False, label=None, map_variables=True): """ Read a CAMS Radiation or McClear file into a pandas DataFrame. CAMS Radiation and McClear are described in [1]_. Parameters ---------- filename: str Filename of a file containing data to read. integrated: boolean, default False Whether to return radiation parameters as integrated values (Wh/m^2) or as average irradiance values (W/m^2) (pvlib preferred units) label : {'right', 'left}, optional Which bin edge label to label time-step with. The default is 'left' for all time steps except for '1M' which has a default of 'right'. map_variables: bool, default: True When true, renames columns of the Dataframe to pvlib variable names where applicable. See variable :const:`VARIABLE_MAP`. Returns ------- data: pandas.DataFrame Timeseries data from CAMS Radiation or McClear. See :func:`pvlib.iotools.get_cams` for fields. metadata: dict Metadata available in the file. See Also -------- pvlib.iotools.parse_cams, pvlib.iotools.get_cams References ---------- .. [1] `CAMS solar radiation documentation <https://atmosphere.copernicus.eu/solar-radiation>`_ """ with open(str(filename), 'r') as fbuf: content = parse_cams(fbuf, integrated, label, map_variables) return content