Source code for pvlib.iotools.solaranywhere

"""Functions to read and retrieve SolarAnywhere data."""

import requests
import pandas as pd
import numpy as np
import time
import json

URL = 'https://service.solaranywhere.com/api/v2'

# Dictionary mapping SolarAnywhere names to standard pvlib names
# Names with spaces are used in SolarAnywhere files, and names without spaces
# are used by the SolarAnywhere API
VARIABLE_MAP = {
    'Global Horizontal Irradiance (GHI) W/m2': 'ghi',
    'GlobalHorizontalIrradiance_WattsPerMeterSquared': 'ghi',
    'DirectNormalIrradiance_WattsPerMeterSquared': 'dni',
    'Direct Normal Irradiance (DNI) W/m2': 'dni',
    'Diffuse Horizontal Irradiance (DIF) W/m2': 'dhi',
    'DiffuseHorizontalIrradiance_WattsPerMeterSquared': 'dhi',
    'AmbientTemperature (deg C)': 'temp_air',
    'AmbientTemperature_DegreesC': 'temp_air',
    'WindSpeed (m/s)': 'wind_speed',
    'WindSpeed_MetersPerSecond': 'wind_speed',
    'Relative Humidity (%)': 'relative_humidity',
    'RelativeHumidity_Percent': 'relative_humidity',
    'Clear Sky GHI': 'ghi_clear',
    'ClearSkyGHI_WattsPerMeterSquared': 'ghi_clear',
    'Clear Sky DNI': 'dni_clear',
    'ClearSkyDNI_WattsPerMeterSquared': 'dni_clear',
    'Clear Sky DHI': 'dhi_clear',
    'ClearSkyDHI_WattsPerMeterSquared': 'dhi_clear',
    'Albedo': 'albedo',
    'Albedo_Unitless': 'albedo',
}

DEFAULT_VARIABLES = [
    'StartTime', 'ObservationTime', 'EndTime',
    'GlobalHorizontalIrradiance_WattsPerMeterSquared',
    'DirectNormalIrradiance_WattsPerMeterSquared',
    'DiffuseHorizontalIrradiance_WattsPerMeterSquared',
    'AmbientTemperature_DegreesC', 'WindSpeed_MetersPerSecond',
    'Albedo_Unitless', 'DataVersion'
]


[docs]def get_solaranywhere(latitude, longitude, api_key, start=None, end=None, source='SolarAnywhereLatest', time_resolution=60, spatial_resolution=0.01, true_dynamics=False, probability_of_exceedance=None, variables=DEFAULT_VARIABLES, missing_data='FillAverage', url=URL, map_variables=True, timeout=300): """Retrieve historical irradiance time series data from SolarAnywhere. The SolarAnywhere API is described in [1]_ and [2]_. A detailed list of API options can be found in [3]_. Parameters ---------- latitude: float In decimal degrees, north is positive (ISO 19115). longitude: float In decimal degrees, east is positive (ISO 19115). api_key: str SolarAnywhere API key. start: datetime like, optional First timestamp of the requested period. If a timezone is not specified, UTC is assumed. Not applicable for TMY data. end: datetime like, optional Last timestamp of the requested period. If a timezone is not specified, UTC is assumed. Not applicable for TMY data. source: str, default: 'SolarAnywhereLatest' Data source. Options include: 'SolarAnywhereLatest' (historical data), 'SolarAnywhereTGYLatest' (TMY for GHI), 'SolarAnywhereTDYLatest' (TMY for DNI), or 'SolarAnywherePOELatest' for probability of exceedance. Specific dataset versions can also be specified, e.g., 'SolarAnywhere3_2' (see [3]_ for a full list of options). time_resolution: {60, 30, 15, 5}, default: 60 Time resolution in minutes. For TMY data, time resolution has to be 60 minutes (hourly). spatial_resolution: {0.1, 0.01, 0.005}, default: 0.01 Spatial resolution in degrees. true_dynamics: bool, default: False Whether to apply SolarAnywhere TrueDynamics statistical processing. Only available for the 5-minute time resolution. probability_of_exceedance: int, optional Probability of exceedance in the range of 1 to 99. Only relevant when requesting probability of exceedance (POE) time series. [%] variables: list-like, default: :const:`DEFAULT_VARIABLES` Variables to retrieve (described in [4]_), must include 'ObservationTime'. Available variables depend on whether historical or TMY data is requested. missing_data: {'Omit', 'FillAverage'}, default: 'FillAverage' Method for treating missing data. url: str, default: :const:`pvlib.iotools.solaranywhere.URL` Base url of SolarAnywhere API. map_variables: bool, default: True When true, renames columns of the DataFrame to pvlib variable names where applicable. See :const:`VARIABLE_MAP`. timeout: float, default: 300 Time in seconds to wait for requested data to become available. Returns ------- data: pandas.DataFrame Timeseries data from SolarAnywhere. The index is the observation time (middle of period). metadata: dict Metadata available (includes site latitude, longitude, and altitude). See Also -------- pvlib.iotools.read_solaranywhere Note ---- SolarAnywhere data requests are asynchronous, and it might take several minutes for the requested data to become available. Examples -------- >>> # Retrieve one month of SolarAnywhere data for Atlanta, GA >>> data, meta = pvlib.iotools.get_solaranywhere( ... latitude=33.765, longitude=-84.395, api_key='redacted', ... start=pd.Timestamp(2020,1,1), end=pd.Timestamp(2020,2,1)) # doctest: +SKIP References ---------- .. [1] `SolarAnywhere API <https://www.solaranywhere.com/support/using-solaranywhere/api/>`_ .. [2] `SolarAnywhere irradiance and weather API requests <https://developers.cleanpower.com/irradiance-and-weather-data/irradiance-and-weather-requests/>`_ .. [3] `SolarAnywhere API options <https://developers.cleanpower.com/irradiance-and-weather-data/complete-schema/createweatherdatarequest/options/>`_ .. [4] `SolarAnywhere variable definitions <https://www.solaranywhere.com/support/data-fields/definitions/>`_ """ # noqa: E501 headers = {'content-type': "application/json; charset=utf-8", 'X-Api-Key': api_key, 'Accept': "application/json"} payload = { "Sites": [{ "Latitude": latitude, "Longitude": longitude }], "Options": { "OutputFields": variables, "SummaryOutputFields": [], # Do not request summary/monthly data "SpatialResolution_Degrees": spatial_resolution, "TimeResolution_Minutes": time_resolution, "WeatherDataSource": source, "MissingDataHandling": missing_data, } } if true_dynamics: payload['Options']['ApplyTrueDynamics'] = True if probability_of_exceedance is not None: if not isinstance(probability_of_exceedance, int): raise ValueError('`probability_of_exceedance` must be an integer') payload['Options']['ProbabilityOfExceedance'] = \ probability_of_exceedance # Add start/end time if requesting non-TMY data if (start is not None) or (end is not None): # Convert start/end to datetime in case they are specified as strings start = pd.to_datetime(start) end = pd.to_datetime(end) # start/end are required to have an associated time zone if start.tz is None: start = start.tz_localize('UTC') if end.tz is None: end = end.tz_localize('UTC') payload['Options']["StartTime"] = start.isoformat() payload['Options']["EndTime"] = end.isoformat() # Convert the payload dictionary to a JSON string (uses double quotes) payload = json.dumps(payload) # Make data request request = requests.post(url+'/WeatherData', data=payload, headers=headers) # Raise error if request is not OK if request.ok is False: raise ValueError(request.json()['Message']) # Retrieve weather request ID weather_request_id = request.json()["WeatherRequestId"] # The SolarAnywhere API is asynchronous, hence a second request is # necessary to retrieve the data (WeatherDataResult). start_time = time.time() # Current time in seconds since the Epoch # Attempt to retrieve results until the max response time has been exceeded while True: results = requests.get(url+'/WeatherDataResult/'+weather_request_id, headers=headers) # noqa: E501 results_json = results.json() if results_json.get('Status') == 'Done': if results_json['WeatherDataResults'][0]['Status'] == 'Failure': raise RuntimeError(results_json['WeatherDataResults'][0]['ErrorMessages'][0]['Message']) # noqa: E501 break elif (time.time()-start_time) > timeout: raise TimeoutError('Time exceeded the `timeout`.') time.sleep(5) # Sleep for 5 seconds before each data retrieval attempt # Extract time series data data = pd.DataFrame(results_json['WeatherDataResults'][0]['WeatherDataPeriods']['WeatherDataPeriods']) # noqa: E501 # Set datetime index data.index = pd.to_datetime(data['ObservationTime']) if map_variables: data = data.rename(columns=VARIABLE_MAP) # Parse metadata meta = results_json['WeatherDataResults'][0]['WeatherSourceInformation'] meta['time_resolution'] = results_json['WeatherDataResults'][0]['WeatherDataPeriods']['TimeResolution_Minutes'] # noqa: E501 meta['spatial_resolution'] = spatial_resolution # Rename and convert applicable metadata parameters to floats meta['latitude'] = float(meta.pop('Latitude')) meta['longitude'] = float(meta.pop('Longitude')) meta['altitude'] = float(meta.pop('Elevation_Meters')) return data, meta
[docs]def read_solaranywhere(filename, map_variables=True, encoding='iso-8859-1'): """ Read a SolarAnywhere formatted file into a pandas DataFrame. The SolarAnywhere file format and variables are described in [1]_. Note, the SolarAnywhere file format resembles the TMY3 file format but contains additional variables and metadata. Parameters ---------- filename: str Filename map_variables: bool, default: True When true, renames columns of the DataFrame to pvlib variable names where applicable. See :const:`VARIABLE_MAP`. encoding : str, default : 'iso-8859-1' Encoding of the file. For SolarAnywhere TMY3 files the 'iso-8859-1' encoding is recommended due to the usage of special characters. Returns ------- data: pandas.DataFrame Timeseries data from SolarAnywhere. metadata: dict Metadata available in the file. See Also -------- pvlib.iotools.get_solaranywhere References ---------- .. [1] `SolarAnywhere historical data file formats <https://www.solaranywhere.com/support/historical-data/file-formats/>`_ """ with open(str(filename), 'r', encoding=encoding) as fbuf: # Extract first line of file which contains the metadata firstline = fbuf.readline().strip().split(',') # Read remaining part of file which contains the time series data data = pd.read_csv(fbuf) # Parse metadata meta = {} meta['USAF'] = int(firstline.pop(0)) meta['name'] = firstline.pop(0) meta['state'] = firstline.pop(0) meta['TZ'] = float(firstline.pop(0)) meta['latitude'] = float(firstline.pop(0)) meta['longitude'] = float(firstline.pop(0)) meta['altitude'] = float(firstline.pop(0)) # SolarAnywhere files contain additional metadata than the TMY3 format. # The additional metadata is specified as key-value pairs, where each entry # is separated by a slash, and the key-value pairs are separated by a # colon. E.g., 'Data Version: 3.4 / Type: Typical Year / ...' for i in ','.join(firstline).replace('"', '').split('/'): if ':' in i: k, v = i.split(':') meta[k.strip()] = v.strip() meta['LatLon Resolution'] = float(meta['LatLon Resolution']) # Set index data.index = pd.to_datetime(data['ObservationTime(LST)'], format='%m/%d/%Y %H:%M') # Set timezone data = data.tz_localize(int(meta['TZ'] * 3600)) # Remove notion of LST in case the index is later converted to another tz data.index.name = data.index.name.replace('(LST)', '') # Missing values can be represented as: blanks, 'NaN', or -999 data = data.replace(-999, np.nan) if map_variables: data = data.rename(columns=VARIABLE_MAP) return data, meta