"""Functions to read and retrieve SolarAnywhere data."""
import requests
import pandas as pd
import numpy as np
import time
import json
URL = 'https://service.solaranywhere.com/api/v2'
# Dictionary mapping SolarAnywhere names to standard pvlib names
# Names with spaces are used in SolarAnywhere files, and names without spaces
# are used by the SolarAnywhere API
VARIABLE_MAP = {
'Global Horizontal Irradiance (GHI) W/m2': 'ghi',
'GlobalHorizontalIrradiance_WattsPerMeterSquared': 'ghi',
'DirectNormalIrradiance_WattsPerMeterSquared': 'dni',
'Direct Normal Irradiance (DNI) W/m2': 'dni',
'Diffuse Horizontal Irradiance (DIF) W/m2': 'dhi',
'DiffuseHorizontalIrradiance_WattsPerMeterSquared': 'dhi',
'AmbientTemperature (deg C)': 'temp_air',
'AmbientTemperature_DegreesC': 'temp_air',
'WindSpeed (m/s)': 'wind_speed',
'WindSpeed_MetersPerSecond': 'wind_speed',
'Relative Humidity (%)': 'relative_humidity',
'RelativeHumidity_Percent': 'relative_humidity',
'Clear Sky GHI': 'ghi_clear',
'ClearSkyGHI_WattsPerMeterSquared': 'ghi_clear',
'Clear Sky DNI': 'dni_clear',
'ClearSkyDNI_WattsPerMeterSquared': 'dni_clear',
'Clear Sky DHI': 'dhi_clear',
'ClearSkyDHI_WattsPerMeterSquared': 'dhi_clear',
'Albedo': 'albedo',
'Albedo_Unitless': 'albedo',
}
DEFAULT_VARIABLES = [
'StartTime', 'ObservationTime', 'EndTime',
'GlobalHorizontalIrradiance_WattsPerMeterSquared',
'DirectNormalIrradiance_WattsPerMeterSquared',
'DiffuseHorizontalIrradiance_WattsPerMeterSquared',
'AmbientTemperature_DegreesC', 'WindSpeed_MetersPerSecond',
'Albedo_Unitless', 'DataVersion'
]
[docs]def get_solaranywhere(latitude, longitude, api_key, start=None, end=None,
source='SolarAnywhereLatest', time_resolution=60,
spatial_resolution=0.01, true_dynamics=False,
probability_of_exceedance=None,
variables=DEFAULT_VARIABLES, missing_data='FillAverage',
url=URL, map_variables=True, timeout=300):
"""Retrieve historical irradiance time series data from SolarAnywhere.
The SolarAnywhere API is described in [1]_ and [2]_. A detailed list of
API options can be found in [3]_.
Parameters
----------
latitude: float
In decimal degrees, north is positive (ISO 19115).
longitude: float
In decimal degrees, east is positive (ISO 19115).
api_key: str
SolarAnywhere API key.
start: datetime like, optional
First timestamp of the requested period. If a timezone is not
specified, UTC is assumed. Not applicable for TMY data.
end: datetime like, optional
Last timestamp of the requested period. If a timezone is not
specified, UTC is assumed. Not applicable for TMY data.
source: str, default: 'SolarAnywhereLatest'
Data source. Options include: 'SolarAnywhereLatest' (historical data),
'SolarAnywhereTGYLatest' (TMY for GHI), 'SolarAnywhereTDYLatest' (TMY
for DNI), or 'SolarAnywherePOELatest' for probability of exceedance.
Specific dataset versions can also be specified, e.g.,
'SolarAnywhere3_2' (see [3]_ for a full list of options).
time_resolution: {60, 30, 15, 5}, default: 60
Time resolution in minutes. For TMY data, time resolution has to be 60
minutes (hourly).
spatial_resolution: {0.1, 0.01, 0.005}, default: 0.01
Spatial resolution in degrees.
true_dynamics: bool, default: False
Whether to apply SolarAnywhere TrueDynamics statistical processing.
Only available for the 5-minute time resolution.
probability_of_exceedance: int, optional
Probability of exceedance in the range of 1 to 99. Only relevant when
requesting probability of exceedance (POE) time series. [%]
variables: list-like, default: :const:`DEFAULT_VARIABLES`
Variables to retrieve (described in [4]_), must include
'ObservationTime'. Available variables depend on whether historical or
TMY data is requested.
missing_data: {'Omit', 'FillAverage'}, default: 'FillAverage'
Method for treating missing data.
url: str, default: :const:`pvlib.iotools.solaranywhere.URL`
Base url of SolarAnywhere API.
map_variables: bool, default: True
When true, renames columns of the DataFrame to pvlib variable names
where applicable. See :const:`VARIABLE_MAP`.
timeout: float, default: 300
Time in seconds to wait for requested data to become available.
Returns
-------
data: pandas.DataFrame
Timeseries data from SolarAnywhere. The index is the observation time
(middle of period).
metadata: dict
Metadata available (includes site latitude, longitude, and altitude).
See Also
--------
pvlib.iotools.read_solaranywhere
Note
----
SolarAnywhere data requests are asynchronous, and it might take several
minutes for the requested data to become available.
Examples
--------
>>> # Retrieve one month of SolarAnywhere data for Atlanta, GA
>>> data, meta = pvlib.iotools.get_solaranywhere(
... latitude=33.765, longitude=-84.395, api_key='redacted',
... start=pd.Timestamp(2020,1,1), end=pd.Timestamp(2020,2,1)) # doctest: +SKIP
References
----------
.. [1] `SolarAnywhere API
<https://www.solaranywhere.com/support/using-solaranywhere/api/>`_
.. [2] `SolarAnywhere irradiance and weather API requests
<https://developers.cleanpower.com/irradiance-and-weather-data/irradiance-and-weather-requests/>`_
.. [3] `SolarAnywhere API options
<https://developers.cleanpower.com/irradiance-and-weather-data/complete-schema/createweatherdatarequest/options/>`_
.. [4] `SolarAnywhere variable definitions
<https://www.solaranywhere.com/support/data-fields/definitions/>`_
""" # noqa: E501
headers = {'content-type': "application/json; charset=utf-8",
'X-Api-Key': api_key,
'Accept': "application/json"}
payload = {
"Sites": [{
"Latitude": latitude,
"Longitude": longitude
}],
"Options": {
"OutputFields": variables,
"SummaryOutputFields": [], # Do not request summary/monthly data
"SpatialResolution_Degrees": spatial_resolution,
"TimeResolution_Minutes": time_resolution,
"WeatherDataSource": source,
"MissingDataHandling": missing_data,
}
}
if true_dynamics:
payload['Options']['ApplyTrueDynamics'] = True
if probability_of_exceedance is not None:
if not isinstance(probability_of_exceedance, int):
raise ValueError('`probability_of_exceedance` must be an integer')
payload['Options']['ProbabilityOfExceedance'] = \
probability_of_exceedance
# Add start/end time if requesting non-TMY data
if (start is not None) or (end is not None):
# Convert start/end to datetime in case they are specified as strings
start = pd.to_datetime(start)
end = pd.to_datetime(end)
# start/end are required to have an associated time zone
if start.tz is None:
start = start.tz_localize('UTC')
if end.tz is None:
end = end.tz_localize('UTC')
payload['Options']["StartTime"] = start.isoformat()
payload['Options']["EndTime"] = end.isoformat()
# Convert the payload dictionary to a JSON string (uses double quotes)
payload = json.dumps(payload)
# Make data request
request = requests.post(url+'/WeatherData', data=payload, headers=headers)
# Raise error if request is not OK
if request.ok is False:
raise ValueError(request.json()['Message'])
# Retrieve weather request ID
weather_request_id = request.json()["WeatherRequestId"]
# The SolarAnywhere API is asynchronous, hence a second request is
# necessary to retrieve the data (WeatherDataResult).
start_time = time.time() # Current time in seconds since the Epoch
# Attempt to retrieve results until the max response time has been exceeded
while True:
results = requests.get(url+'/WeatherDataResult/'+weather_request_id, headers=headers) # noqa: E501
results_json = results.json()
if results_json.get('Status') == 'Done':
if results_json['WeatherDataResults'][0]['Status'] == 'Failure':
raise RuntimeError(results_json['WeatherDataResults'][0]['ErrorMessages'][0]['Message']) # noqa: E501
break
elif (time.time()-start_time) > timeout:
raise TimeoutError('Time exceeded the `timeout`.')
time.sleep(5) # Sleep for 5 seconds before each data retrieval attempt
# Extract time series data
data = pd.DataFrame(results_json['WeatherDataResults'][0]['WeatherDataPeriods']['WeatherDataPeriods']) # noqa: E501
# Set datetime index
data.index = pd.to_datetime(data['ObservationTime'])
if map_variables:
data = data.rename(columns=VARIABLE_MAP)
# Parse metadata
meta = results_json['WeatherDataResults'][0]['WeatherSourceInformation']
meta['time_resolution'] = results_json['WeatherDataResults'][0]['WeatherDataPeriods']['TimeResolution_Minutes'] # noqa: E501
meta['spatial_resolution'] = spatial_resolution
# Rename and convert applicable metadata parameters to floats
meta['latitude'] = float(meta.pop('Latitude'))
meta['longitude'] = float(meta.pop('Longitude'))
meta['altitude'] = float(meta.pop('Elevation_Meters'))
return data, meta
[docs]def read_solaranywhere(filename, map_variables=True, encoding='iso-8859-1'):
"""
Read a SolarAnywhere formatted file into a pandas DataFrame.
The SolarAnywhere file format and variables are described in [1]_. Note,
the SolarAnywhere file format resembles the TMY3 file format but contains
additional variables and metadata.
Parameters
----------
filename: str
Filename
map_variables: bool, default: True
When true, renames columns of the DataFrame to pvlib variable names
where applicable. See :const:`VARIABLE_MAP`.
encoding : str, default : 'iso-8859-1'
Encoding of the file. For SolarAnywhere TMY3 files the 'iso-8859-1'
encoding is recommended due to the usage of special characters.
Returns
-------
data: pandas.DataFrame
Timeseries data from SolarAnywhere.
metadata: dict
Metadata available in the file.
See Also
--------
pvlib.iotools.get_solaranywhere
References
----------
.. [1] `SolarAnywhere historical data file formats
<https://www.solaranywhere.com/support/historical-data/file-formats/>`_
"""
with open(str(filename), 'r', encoding=encoding) as fbuf:
# Extract first line of file which contains the metadata
firstline = fbuf.readline().strip().split(',')
# Read remaining part of file which contains the time series data
data = pd.read_csv(fbuf)
# Parse metadata
meta = {}
meta['USAF'] = int(firstline.pop(0))
meta['name'] = firstline.pop(0)
meta['state'] = firstline.pop(0)
meta['TZ'] = float(firstline.pop(0))
meta['latitude'] = float(firstline.pop(0))
meta['longitude'] = float(firstline.pop(0))
meta['altitude'] = float(firstline.pop(0))
# SolarAnywhere files contain additional metadata than the TMY3 format.
# The additional metadata is specified as key-value pairs, where each entry
# is separated by a slash, and the key-value pairs are separated by a
# colon. E.g., 'Data Version: 3.4 / Type: Typical Year / ...'
for i in ','.join(firstline).replace('"', '').split('/'):
if ':' in i:
k, v = i.split(':')
meta[k.strip()] = v.strip()
meta['LatLon Resolution'] = float(meta['LatLon Resolution'])
# Set index
data.index = pd.to_datetime(data['ObservationTime(LST)'],
format='%m/%d/%Y %H:%M')
# Set timezone
data = data.tz_localize(int(meta['TZ'] * 3600))
# Remove notion of LST in case the index is later converted to another tz
data.index.name = data.index.name.replace('(LST)', '')
# Missing values can be represented as: blanks, 'NaN', or -999
data = data.replace(-999, np.nan)
if map_variables:
data = data.rename(columns=VARIABLE_MAP)
return data, meta