Source code for pvlib.iotools.sodapro

"""Functions to access data from Copernicus Atmosphere Monitoring Service
    (CAMS) radiation service.
.. codeauthor:: Adam R. Jensen<adam-r-j@hotmail.com>
"""

import pandas as pd
import requests
import io
import warnings


CAMS_INTEGRATED_COLUMNS = [
    'TOA', 'Clear sky GHI', 'Clear sky BHI', 'Clear sky DHI', 'Clear sky BNI',
    'GHI', 'BHI', 'DHI', 'BNI',
    'GHI no corr', 'BHI no corr', 'DHI no corr', 'BNI no corr']

# Dictionary mapping CAMS Radiation and McClear variables to pvlib names
CAMS_VARIABLE_MAP = {
    'TOA': 'ghi_extra',
    'Clear sky GHI': 'ghi_clear',
    'Clear sky BHI': 'bhi_clear',
    'Clear sky DHI': 'dhi_clear',
    'Clear sky BNI': 'dni_clear',
    'GHI': 'ghi',
    'BHI': 'bhi',
    'DHI': 'dhi',
    'BNI': 'dni',
    'sza': 'solar_zenith',
}

# Dictionary mapping time steps to CAMS time step format
TIME_STEPS_MAP = {'1min': 'PT01M', '15min': 'PT15M', '1h': 'PT01H',
                  '1d': 'P01D', '1M': 'P01M'}

TIME_STEPS_IN_HOURS = {'1min': 1/60, '15min': 15/60, '1h': 1, '1d': 24}

SUMMATION_PERIOD_TO_TIME_STEP = {'0 year 0 month 0 day 0 h 1 min 0 s': '1min',
                                 '0 year 0 month 0 day 0 h 15 min 0 s': '15min',  # noqa
                                 '0 year 0 month 0 day 1 h 0 min 0 s': '1h',
                                 '0 year 0 month 1 day 0 h 0 min 0 s': '1d',
                                 '0 year 1 month 0 day 0 h 0 min 0 s': '1M'}


[docs]def get_cams(latitude, longitude, start, end, email, identifier='mcclear', altitude=None, time_step='1h', time_ref='UT', verbose=False, integrated=False, label=None, map_variables=True, server='www.soda-is.com', timeout=30): """ Retrieve time-series of radiation and/or clear-sky global, beam, and diffuse radiation from CAMS. Data from CAMS Radiation [1]_ and CAMS McClear [2]_ are retrieved from SoDa [3]_. Time coverage: 2004-01-01 to two days ago Access: free, but requires registration, see [1]_ Requests: max. 100 per day Geographical coverage: Wordwide for CAMS McClear and -66° to 66° in both latitude and longitude for CAMS Radiation Parameters ---------- latitude: float in decimal degrees, between -90 and 90, north is positive (ISO 19115) longitude : float in decimal degrees, between -180 and 180, east is positive (ISO 19115) start: datetime like First day of the requested period end: datetime like Last day of the requested period email: str Email address linked to a SoDa account identifier: {'mcclear', 'cams_radiation'} Specify whether to retrieve CAMS Radiation or McClear parameters altitude: float, optional Altitude in meters. If None, then the altitude is determined from the NASA SRTM database time_step: str, {'1min', '15min', '1h', '1d', '1M'}, default: '1h' Time step of the time series, either 1 minute, 15 minute, hourly, daily, or monthly. time_ref: str, {'UT', 'TST'}, default: 'UT' 'UT' (universal time) or 'TST' (True Solar Time) verbose: boolean, default: False Verbose mode outputs additional parameters (aerosols). Only available for 1 minute and universal time. See [1]_ for parameter description. integrated: boolean, default False Whether to return radiation parameters as integrated values (Wh/m^2) or as average irradiance values (W/m^2) (pvlib preferred units) label: {'right', 'left'}, default: None Which bin edge label to label time-step with. The default is 'left' for all time steps except for '1M' which has a default of 'right'. map_variables: bool, default: True When true, renames columns of the DataFrame to pvlib variable names where applicable. See variable CAMS_VARIABLE_MAP. server: str, default: 'www.soda-is.com' Main server (www.soda-is.com) or backup mirror server (pro.soda-is.com) timeout : int, default: 30 Time in seconds to wait for server response before timeout Returns ------- data: pandas.DataFrame Timeseries data, see Notes for columns metadata: dict Metadata of the requested time-series Notes ----- In order to use the CAMS services, users must register for a free SoDa account using an email address [1]_. The returned data DataFrame includes the following fields: ======================== ====== ========================================= Key, mapped key Format Description ======================== ====== ========================================= **Mapped field names are returned when the map_variables argument is True** --------------------------------------------------------------------------- Observation period str Beginning/end of time period TOA, ghi_extra float Horizontal radiation at top of atmosphere Clear sky GHI, ghi_clear float Clear sky global radiation on horizontal Clear sky BHI, bhi_clear float Clear sky beam radiation on horizontal Clear sky DHI, dhi_clear float Clear sky diffuse radiation on horizontal Clear sky BNI, dni_clear float Clear sky beam radiation normal to sun GHI, ghi† float Global horizontal radiation BHI, bhi† float Beam (direct) radiation on horizontal DHI, dhi† float Diffuse horizontal radiation BNI, dni† float Beam (direct) radiation normal to the sun Reliability† float Reliable data fraction in summarization ======================== ====== ========================================= †Parameters only returned if identifier='cams_radiation'. For description of additional output parameters in verbose mode, see [1]_ and [2]_. Note that it is recommended to specify the latitude and longitude to at least the fourth decimal place. Variables corresponding to standard pvlib variables are renamed, e.g. `sza` becomes `solar_zenith`. See the `pvlib.iotools.cams.CAMS_VARIABLE_MAP` dict for the complete mapping. See Also -------- pvlib.iotools.read_cams, pvlib.iotools.parse_cams Raises ------ requests.HTTPError If the request is invalid, then an XML file is returned by the CAMS service and the error message will be raised as an exception. References ---------- .. [1] `CAMS Radiation Service Info <http://www.soda-pro.com/web-services/radiation/cams-radiation-service/info>`_ .. [2] `CAMS McClear Service Info <http://www.soda-pro.com/web-services/radiation/cams-mcclear/info>`_ .. [3] `CAMS McClear Automatic Access <http://www.soda-pro.com/help/cams-services/cams-mcclear-service/automatic-access>`_ """ try: time_step_str = TIME_STEPS_MAP[time_step] except KeyError: raise ValueError(f'Time step not recognized. Must be one of ' f'{list(TIME_STEPS_MAP.keys())}') if (verbose) and ((time_step != '1min') or (time_ref != 'UT')): verbose = False warnings.warn("Verbose mode only supports 1 min. UT time series!") if identifier not in ['mcclear', 'cams_radiation']: raise ValueError('Identifier must be either mcclear or cams_radiation') # Format verbose variable to the required format: {'true', 'false'} verbose = str(verbose).lower() if altitude is None: # Let SoDa get elevation from the NASA SRTM database altitude = -999 # Start and end date should be in the format: yyyy-mm-dd start = start.strftime('%Y-%m-%d') end = end.strftime('%Y-%m-%d') email = email.replace('@', '%2540') # Format email address identifier = 'get_{}'.format(identifier.lower()) # Format identifier str base_url = f"http://{server}/service/wps" data_inputs_dict = { 'latitude': latitude, 'longitude': longitude, 'altitude': altitude, 'date_begin': start, 'date_end': end, 'time_ref': time_ref, 'summarization': time_step_str, 'username': email, 'verbose': verbose} # Manual formatting of the input parameters seperating each by a semicolon data_inputs = ";".join([f"{key}={value}" for key, value in data_inputs_dict.items()]) params = {'Service': 'WPS', 'Request': 'Execute', 'Identifier': identifier, 'version': '1.0.0', 'RawDataOutput': 'irradiation', } # The DataInputs parameter of the URL has to be manually formatted and # added to the base URL as it contains sub-parameters seperated by # semi-colons, which gets incorrectly formatted by the requests function # if passed using the params argument. res = requests.get(base_url + '?DataInputs=' + data_inputs, params=params, timeout=timeout) # Invalid requests returns an XML error message and the HTTP staus code 200 # as if the request was successful. Therefore, errors cannot be handled # automatic (e.g. res.raise_for_status()) and errors are handled manually if res.headers['Content-Type'] == 'application/xml': errors = res.text.split('ows:ExceptionText')[1][1:-2] raise requests.HTTPError(errors, response=res) # Successful requests returns a csv data file elif res.headers['Content-Type'] == 'application/csv': fbuf = io.StringIO(res.content.decode('utf-8')) data, metadata = parse_cams(fbuf, integrated=integrated, label=label, map_variables=map_variables) return data, metadata
[docs]def parse_cams(fbuf, integrated=False, label=None, map_variables=True): """ Parse a file-like buffer with data in the format of a CAMS Radiation or McClear file. The CAMS services are described in [1]_ and [2]_. Parameters ---------- fbuf: file-like object File-like object containing data to read. integrated: boolean, default False Whether to return radiation parameters as integrated values (Wh/m^2) or as average irradiance values (W/m^2) (pvlib preferred units) label: {'right', 'left'}, default: None Which bin edge label to label time-step with. The default is 'left' for all time steps except for '1M' which has a default of 'right'. map_variables: bool, default: True When true, renames columns of the Dataframe to pvlib variable names where applicable. See variable CAMS_VARIABLE_MAP. Returns ------- data: pandas.DataFrame Timeseries data from CAMS Radiation or McClear metadata: dict Metadata available in the file. See Also -------- pvlib.iotools.read_cams, pvlib.iotools.get_cams References ---------- .. [1] `CAMS Radiation Service Info <http://www.soda-pro.com/web-services/radiation/cams-radiation-service/info>`_ .. [2] `CAMS McClear Service Info <http://www.soda-pro.com/web-services/radiation/cams-mcclear/info>`_ """ metadata = {} # Initial lines starting with # contain metadata while True: line = fbuf.readline().rstrip('\n') if line.startswith('# Observation period'): # The last line of the metadata section contains the column names names = line.lstrip('# ').split(';') break # End of metadata section has been reached elif ': ' in line: metadata[line.split(': ')[0].lstrip('# ')] = line.split(': ')[1] # Convert latitude, longitude, and altitude values from strings to floats for k_old in list(metadata.keys()): k_new = k_old.lstrip().split(' ')[0].lower() if k_new in ['latitude', 'longitude', 'altitude']: metadata[k_new] = float(metadata.pop(k_old)) metadata['radiation_unit'] = \ {True: 'Wh/m^2', False: 'W/m^2'}[integrated] # Determine the time_step from the metadata dictionary time_step = SUMMATION_PERIOD_TO_TIME_STEP[ metadata['Summarization (integration) period']] metadata['time_step'] = time_step data = pd.read_csv(fbuf, sep=';', comment='#', header=None, names=names) obs_period = data['Observation period'].str.split('/') # Set index as the start observation time (left) and localize to UTC if (label == 'left') | ((label is None) & (time_step != '1M')): data.index = pd.to_datetime(obs_period.str[0], utc=True) # Set index as the stop observation time (right) and localize to UTC # default label for monthly data is 'right' following Pandas' convention elif (label == 'right') | ((label is None) & (time_step == '1M')): data.index = pd.to_datetime(obs_period.str[1], utc=True) # For time_steps '1d' and '1M', drop timezone and round to nearest midnight if (time_step == '1d') | (time_step == '1M'): data.index = pd.DatetimeIndex(data.index.date) # For monthly data with 'right' label, the index should be the last # date of the month and not the first date of the following month if (time_step == '1M') & (label != 'left'): data.index = data.index - pd.Timedelta(days=1) if not integrated: # Convert radiation values from Wh/m2 to W/m2 integrated_cols = [c for c in CAMS_INTEGRATED_COLUMNS if c in data.columns] if time_step == '1M': time_delta = (pd.to_datetime(obs_period.str[1]) - pd.to_datetime(obs_period.str[0])) hours = time_delta.dt.total_seconds()/60/60 data[integrated_cols] = data[integrated_cols].\ divide(hours.tolist(), axis='rows') else: data[integrated_cols] = (data[integrated_cols] / TIME_STEPS_IN_HOURS[time_step]) data.index.name = None # Set index name to None if map_variables: data = data.rename(columns=CAMS_VARIABLE_MAP) return data, metadata
[docs]def read_cams(filename, integrated=False, label=None, map_variables=True): """ Read a CAMS Radiation or McClear file into a pandas DataFrame. CAMS radiation and McClear are described in [1]_ and [2]_, respectively. Parameters ---------- filename: str Filename of a file containing data to read. integrated: boolean, default False Whether to return radiation parameters as integrated values (Wh/m^2) or as average irradiance values (W/m^2) (pvlib preferred units) label: {'right', 'left}, default: None Which bin edge label to label time-step with. The default is 'left' for all time steps except for '1M' which has a default of 'right'. map_variables: bool, default: True When true, renames columns of the Dataframe to pvlib variable names where applicable. See variable CAMS_VARIABLE_MAP. Returns ------- data: pandas.DataFrame Timeseries data from CAMS Radiation or McClear :func:`pvlib.iotools.get_cams` for fields metadata: dict Metadata available in the file. See Also -------- pvlib.iotools.parse_cams, pvlib.iotools.get_cams References ---------- .. [1] `CAMS Radiation Service Info <http://www.soda-pro.com/web-services/radiation/cams-radiation-service/info>`_ .. [2] `CAMS McClear Service Info <http://www.soda-pro.com/web-services/radiation/cams-mcclear/info>`_ """ with open(str(filename), 'r') as fbuf: content = parse_cams(fbuf, integrated, label, map_variables) return content