"""Functions to access data from Copernicus Atmosphere Monitoring Service
(CAMS) radiation service.
.. codeauthor:: Adam R. Jensen<adam-r-j@hotmail.com>
"""
import pandas as pd
import requests
import io
import warnings
URL = 'api.soda-solardata.com'
CAMS_INTEGRATED_COLUMNS = [
'TOA', 'Clear sky GHI', 'Clear sky BHI', 'Clear sky DHI', 'Clear sky BNI',
'GHI', 'BHI', 'DHI', 'BNI',
'GHI no corr', 'BHI no corr', 'DHI no corr', 'BNI no corr']
# Dictionary mapping CAMS Radiation and McClear variables to pvlib names
VARIABLE_MAP = {
'TOA': 'ghi_extra',
'Clear sky GHI': 'ghi_clear',
'Clear sky BHI': 'bhi_clear',
'Clear sky DHI': 'dhi_clear',
'Clear sky BNI': 'dni_clear',
'GHI': 'ghi',
'BHI': 'bhi',
'DHI': 'dhi',
'BNI': 'dni',
'sza': 'solar_zenith',
}
# Dictionary mapping time steps to CAMS time step format
TIME_STEPS_MAP = {'1min': 'PT01M', '15min': 'PT15M', '1h': 'PT01H',
'1d': 'P01D', '1M': 'P01M'}
TIME_STEPS_IN_HOURS = {'1min': 1/60, '15min': 15/60, '1h': 1, '1d': 24}
SUMMATION_PERIOD_TO_TIME_STEP = {'0 year 0 month 0 day 0 h 1 min 0 s': '1min',
'0 year 0 month 0 day 0 h 15 min 0 s': '15min', # noqa
'0 year 0 month 0 day 1 h 0 min 0 s': '1h',
'0 year 0 month 1 day 0 h 0 min 0 s': '1d',
'0 year 1 month 0 day 0 h 0 min 0 s': '1M'}
[docs]def get_cams(latitude, longitude, start, end, email, identifier='mcclear',
altitude=None, time_step='1h', time_ref='UT', verbose=False,
integrated=False, label=None, map_variables=True,
server=URL, timeout=30):
"""Retrieve irradiance and clear-sky time series from CAMS.
Time-series of radiation and/or clear-sky global, beam, and
diffuse radiation from CAMS (see [1]_). Data is retrieved from SoDa [2]_.
Time coverage: 2004-01-01 to two days ago
Access: free, but requires registration, see [2]_
Requests: max. 100 per day
Geographical coverage: worldwide for CAMS McClear and approximately -66° to
66° in both latitude and longitude for CAMS Radiation.
Parameters
----------
latitude: float
in decimal degrees, between -90 and 90, north is positive (ISO 19115)
longitude : float
in decimal degrees, between -180 and 180, east is positive (ISO 19115)
start: datetime-like
First day of the requested period
end: datetime-like
Last day of the requested period
email: str
Email address linked to a SoDa account
identifier: {'mcclear', 'cams_radiation'}
Specify whether to retrieve CAMS Radiation or McClear parameters
altitude: float, optional
Altitude in meters. If None, then the altitude is determined from the
NASA SRTM database
time_step: str, {'1min', '15min', '1h', '1d', '1M'}, default: '1h'
Time step of the time series, either 1 minute, 15 minute, hourly,
daily, or monthly.
time_ref: str, {'UT', 'TST'}, default: 'UT'
'UT' (universal time) or 'TST' (True Solar Time)
verbose: boolean, default: False
Verbose mode outputs additional parameters (aerosols). Only available
for 1 minute and universal time. See [1]_ for parameter description.
integrated: boolean, default False
Whether to return radiation parameters as integrated values (Wh/m^2)
or as average irradiance values (W/m^2) (pvlib preferred units)
label: {'right', 'left'}, default: None
Which bin edge label to label time-step with. The default is 'left' for
all time steps except for '1M' which has a default of 'right'.
map_variables: bool, default: True
When true, renames columns of the DataFrame to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.
server: str, default: :const:`pvlib.iotools.sodapro.URL`
Base url of the SoDa Pro CAMS Radiation API.
timeout : int, default: 30
Time in seconds to wait for server response before timeout
Returns
-------
data: pandas.DataFrame
Timeseries data, see Notes for columns
metadata: dict
Metadata of the requested time-series
Notes
-----
In order to use the CAMS services, users must register for a free SoDa
account using an email address [2]_.
The returned data DataFrame includes the following fields:
======================== ====== =========================================
Key, mapped key Format Description
======================== ====== =========================================
**Mapped field names are returned when the map_variables argument is True**
---------------------------------------------------------------------------
Observation period str Beginning/end of time period
TOA, ghi_extra float Horizontal radiation at top of atmosphere
Clear sky GHI, ghi_clear float Clear sky global radiation on horizontal
Clear sky BHI, bhi_clear float Clear sky beam radiation on horizontal
Clear sky DHI, dhi_clear float Clear sky diffuse radiation on horizontal
Clear sky BNI, dni_clear float Clear sky beam radiation normal to sun
GHI, ghi† float Global horizontal radiation
BHI, bhi† float Beam (direct) radiation on horizontal
DHI, dhi† float Diffuse horizontal radiation
BNI, dni† float Beam (direct) radiation normal to the sun
Reliability† float Reliable data fraction in summarization
======================== ====== =========================================
†Parameters only returned if identifier='cams_radiation'. For description
of additional output parameters in verbose mode, see [1]_.
Note that it is recommended to specify the latitude and longitude to at
least the fourth decimal place.
Variables corresponding to standard pvlib variables are renamed,
e.g. `sza` becomes `solar_zenith`. See variable :const:`VARIABLE_MAP` for
the complete mapping.
See Also
--------
pvlib.iotools.read_cams, pvlib.iotools.parse_cams
Raises
------
requests.HTTPError
If the request is invalid, then an XML file is returned by the CAMS
service and the error message will be raised as an exception.
References
----------
.. [1] `CAMS solar radiation documentation
<https://atmosphere.copernicus.eu/solar-radiation>`_
.. [2] `CAMS Radiation Automatic Access (SoDa)
<https://www.soda-pro.com/help/cams-services/cams-radiation-service/automatic-access>`_
"""
try:
time_step_str = TIME_STEPS_MAP[time_step]
except KeyError:
raise ValueError(f'Time step not recognized. Must be one of '
f'{list(TIME_STEPS_MAP.keys())}')
if (verbose) and ((time_step != '1min') or (time_ref != 'UT')):
verbose = False
warnings.warn("Verbose mode only supports 1 min. UT time series!")
if identifier not in ['mcclear', 'cams_radiation']:
raise ValueError('Identifier must be either mcclear or cams_radiation')
# Format verbose variable to the required format: {'true', 'false'}
verbose = str(verbose).lower()
if altitude is None: # Let SoDa get elevation from the NASA SRTM database
altitude = -999
# Start and end date should be in the format: yyyy-mm-dd
start = pd.to_datetime(start).strftime('%Y-%m-%d')
end = pd.to_datetime(end).strftime('%Y-%m-%d')
email = email.replace('@', '%2540') # Format email address
identifier = 'get_{}'.format(identifier.lower()) # Format identifier str
base_url = f"https://{server}/service/wps"
data_inputs_dict = {
'latitude': latitude,
'longitude': longitude,
'altitude': altitude,
'date_begin': start,
'date_end': end,
'time_ref': time_ref,
'summarization': time_step_str,
'username': email,
'verbose': verbose}
# Manual formatting of the input parameters seperating each by a semicolon
data_inputs = ";".join([f"{key}={value}" for key, value in
data_inputs_dict.items()])
params = {'Service': 'WPS',
'Request': 'Execute',
'Identifier': identifier,
'version': '1.0.0',
'RawDataOutput': 'irradiation',
}
# The DataInputs parameter of the URL has to be manually formatted and
# added to the base URL as it contains sub-parameters seperated by
# semi-colons, which gets incorrectly formatted by the requests function
# if passed using the params argument.
res = requests.get(base_url + '?DataInputs=' + data_inputs, params=params,
timeout=timeout)
# Invalid requests returns an XML error message and the HTTP staus code 200
# as if the request was successful. Therefore, errors cannot be handled
# automatic (e.g. res.raise_for_status()) and errors are handled manually
if res.headers['Content-Type'] == 'application/xml':
errors = res.text.split('ows:ExceptionText')[1][1:-2]
raise requests.HTTPError(errors, response=res)
# Successful requests returns a csv data file
elif res.headers['Content-Type'] == 'application/csv':
fbuf = io.StringIO(res.content.decode('utf-8'))
data, metadata = parse_cams(fbuf, integrated=integrated, label=label,
map_variables=map_variables)
return data, metadata
[docs]def parse_cams(fbuf, integrated=False, label=None, map_variables=True):
"""
Parse a file-like buffer with data in the format of a CAMS Radiation or
McClear file. The CAMS solar radiation services are described in [1]_.
Parameters
----------
fbuf: file-like object
File-like object containing data to read.
integrated: boolean, default False
Whether to return radiation parameters as integrated values (Wh/m^2)
or as average irradiance values (W/m^2) (pvlib preferred units)
label: {'right', 'left'}, default: None
Which bin edge label to label time-step with. The default is 'left' for
all time steps except for '1M' which has a default of 'right'.
map_variables: bool, default: True
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.
Returns
-------
data: pandas.DataFrame
Timeseries data from CAMS Radiation or McClear
metadata: dict
Metadata available in the file.
See Also
--------
pvlib.iotools.read_cams, pvlib.iotools.get_cams
References
----------
.. [1] `CAMS solar radiation documentation
<https://atmosphere.copernicus.eu/solar-radiation>`_
"""
metadata = {}
# Initial lines starting with # contain metadata
while True:
line = fbuf.readline().rstrip('\n')
if line.startswith('# Observation period'):
# The last line of the metadata section contains the column names
names = line.lstrip('# ').split(';')
break # End of metadata section has been reached
elif ': ' in line:
metadata[line.split(': ')[0].lstrip('# ')] = line.split(': ')[1]
# Convert latitude, longitude, and altitude values from strings to floats
for k_old in list(metadata.keys()):
k_new = k_old.lstrip().split(' ')[0].lower()
if k_new in ['latitude', 'longitude', 'altitude']:
metadata[k_new] = float(metadata.pop(k_old))
metadata['radiation_unit'] = \
{True: 'Wh/m^2', False: 'W/m^2'}[integrated]
# Determine the time_step from the metadata dictionary
time_step = SUMMATION_PERIOD_TO_TIME_STEP[
metadata['Summarization (integration) period']]
metadata['time_step'] = time_step
data = pd.read_csv(fbuf, sep=';', comment='#', header=None, names=names)
obs_period = data['Observation period'].str.split('/')
# Set index as the start observation time (left) and localize to UTC
if (label == 'left') | ((label is None) & (time_step != '1M')):
data.index = pd.to_datetime(obs_period.str[0], utc=True)
# Set index as the stop observation time (right) and localize to UTC
# default label for monthly data is 'right' following Pandas' convention
elif (label == 'right') | ((label is None) & (time_step == '1M')):
data.index = pd.to_datetime(obs_period.str[1], utc=True)
# For time_steps '1d' and '1M', drop timezone and round to nearest midnight
if (time_step == '1d') | (time_step == '1M'):
data.index = pd.DatetimeIndex(data.index.date)
# For monthly data with 'right' label, the index should be the last
# date of the month and not the first date of the following month
if (time_step == '1M') & (label != 'left'):
data.index = data.index - pd.Timedelta(days=1)
if not integrated: # Convert radiation values from Wh/m2 to W/m2
integrated_cols = [c for c in CAMS_INTEGRATED_COLUMNS
if c in data.columns]
if time_step == '1M':
time_delta = (pd.to_datetime(obs_period.str[1])
- pd.to_datetime(obs_period.str[0]))
hours = time_delta.dt.total_seconds()/60/60
data[integrated_cols] = data[integrated_cols].\
divide(hours.tolist(), axis='rows')
else:
data[integrated_cols] = (data[integrated_cols] /
TIME_STEPS_IN_HOURS[time_step])
data.index.name = None # Set index name to None
if map_variables:
data = data.rename(columns=VARIABLE_MAP)
return data, metadata
[docs]def read_cams(filename, integrated=False, label=None, map_variables=True):
"""
Read a CAMS Radiation or McClear file into a pandas DataFrame.
CAMS Radiation and McClear are described in [1]_.
Parameters
----------
filename: str
Filename of a file containing data to read.
integrated: boolean, default False
Whether to return radiation parameters as integrated values (Wh/m^2)
or as average irradiance values (W/m^2) (pvlib preferred units)
label: {'right', 'left}, default: None
Which bin edge label to label time-step with. The default is 'left' for
all time steps except for '1M' which has a default of 'right'.
map_variables: bool, default: True
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.
Returns
-------
data: pandas.DataFrame
Timeseries data from CAMS Radiation or McClear.
See :func:`pvlib.iotools.get_cams` for fields.
metadata: dict
Metadata available in the file.
See Also
--------
pvlib.iotools.parse_cams, pvlib.iotools.get_cams
References
----------
.. [1] `CAMS solar radiation documentation
<https://atmosphere.copernicus.eu/solar-radiation>`_
"""
with open(str(filename), 'r') as fbuf:
content = parse_cams(fbuf, integrated, label, map_variables)
return content