Source code for pvlib.iotools.pvgis

"""
Get, read, and parse data from `PVGIS <https://ec.europa.eu/jrc/en/pvgis>`_.

For more information, see the following links:
* `Interactive Tools <https://re.jrc.ec.europa.eu/pvg_tools/en/tools.html>`_
* `Data downloads <https://ec.europa.eu/jrc/en/PVGIS/downloads/data>`_
* `User manual docs <https://ec.europa.eu/jrc/en/PVGIS/docs/usermanual>`_

More detailed information about the API for TMY and hourly radiation are here:
* `TMY <https://ec.europa.eu/jrc/en/PVGIS/tools/tmy>`_
* `hourly radiation
  <https://ec.europa.eu/jrc/en/PVGIS/tools/hourly-radiation>`_
* `daily radiation <https://ec.europa.eu/jrc/en/PVGIS/tools/daily-radiation>`_
* `monthly radiation
  <https://ec.europa.eu/jrc/en/PVGIS/tools/monthly-radiation>`_
"""
import io
import json
from pathlib import Path
import requests
import pandas as pd
from pvlib.iotools import read_epw, parse_epw
import warnings
from pvlib._deprecation import pvlibDeprecationWarning

URL = 'https://re.jrc.ec.europa.eu/api/'

# Dictionary mapping PVGIS names to pvlib names
PVGIS_VARIABLE_MAP = {
    'G(h)': 'ghi',
    'Gb(n)': 'dni',
    'Gd(h)': 'dhi',
    'G(i)': 'poa_global',
    'Gb(i)': 'poa_direct',
    'Gd(i)': 'poa_sky_diffuse',
    'Gr(i)': 'poa_ground_diffuse',
    'H_sun': 'solar_elevation',
    'T2m': 'temp_air',
    'RH': 'relative_humidity',
    'SP': 'pressure',
    'WS10m': 'wind_speed',
    'WD10m': 'wind_direction',
}


[docs]def get_pvgis_hourly(latitude, longitude, start=None, end=None, raddatabase=None, components=True, surface_tilt=0, surface_azimuth=0, outputformat='json', usehorizon=True, userhorizon=None, pvcalculation=False, peakpower=None, pvtechchoice='crystSi', mountingplace='free', loss=0, trackingtype=0, optimal_surface_tilt=False, optimalangles=False, url=URL, map_variables=True, timeout=30): """Get hourly solar irradiation and modeled PV power output from PVGIS. PVGIS data is freely available at [1]_. Parameters ---------- latitude: float In decimal degrees, between -90 and 90, north is positive (ISO 19115) longitude: float In decimal degrees, between -180 and 180, east is positive (ISO 19115) start: int or datetime like, default: None First year of the radiation time series. Defaults to first year available. end: int or datetime like, default: None Last year of the radiation time series. Defaults to last year available. raddatabase: str, default: None Name of radiation database. Options depend on location, see [3]_. components: bool, default: True Output solar radiation components (beam, diffuse, and reflected). Otherwise only global irradiance is returned. surface_tilt: float, default: 0 Tilt angle from horizontal plane. Ignored for two-axis tracking. surface_azimuth: float, default: 0 Orientation (azimuth angle) of the (fixed) plane. 0=south, 90=west, -90: east. Ignored for tracking systems. usehorizon: bool, default: True Include effects of horizon userhorizon: list of float, default: None Optional user specified elevation of horizon in degrees, at equally spaced azimuth clockwise from north, only valid if `usehorizon` is true, if `usehorizon` is true but `userhorizon` is `None` then PVGIS will calculate the horizon [4]_ pvcalculation: bool, default: False Return estimate of hourly PV production. peakpower: float, default: None Nominal power of PV system in kW. Required if pvcalculation=True. pvtechchoice: {'crystSi', 'CIS', 'CdTe', 'Unknown'}, default: 'crystSi' PV technology. mountingplace: {'free', 'building'}, default: free Type of mounting for PV system. Options of 'free' for free-standing and 'building' for building-integrated. loss: float, default: 0 Sum of PV system losses in percent. Required if pvcalculation=True trackingtype: {0, 1, 2, 3, 4, 5}, default: 0 Type of suntracking. 0=fixed, 1=single horizontal axis aligned north-south, 2=two-axis tracking, 3=vertical axis tracking, 4=single horizontal axis aligned east-west, 5=single inclined axis aligned north-south. optimal_surface_tilt: bool, default: False Calculate the optimum tilt angle. Ignored for two-axis tracking optimalangles: bool, default: False Calculate the optimum tilt and azimuth angles. Ignored for two-axis tracking. outputformat: str, default: 'json' Must be in ``['json', 'csv']``. See PVGIS hourly data documentation [2]_ for more info. url: str, default: const:`pvlib.iotools.pvgis.URL` Base url of PVGIS API. ``seriescalc`` is appended to get hourly data endpoint. map_variables: bool, default: True When true, renames columns of the Dataframe to pvlib variable names where applicable. See variable PVGIS_VARIABLE_MAP. timeout: int, default: 30 Time in seconds to wait for server response before timeout Returns ------- data : pandas.DataFrame Time-series of hourly data, see Notes for fields inputs : dict Dictionary of the request input parameters metadata : dict Dictionary containing metadata Raises ------ requests.HTTPError If the request response status is ``HTTP/1.1 400 BAD REQUEST``, then the error message in the response will be raised as an exception, otherwise raise whatever ``HTTP/1.1`` error occurred Hint ---- PVGIS provides access to a number of different solar radiation datasets, including satellite-based (SARAH, CMSAF, and NSRDB PSM3) and re-analysis products (ERA5 and COSMO). Each data source has a different geographical coverage and time stamp convention, e.g., SARAH and CMSAF provide instantaneous values, whereas values from ERA5 are averages for the hour. Notes ----- data includes the following fields: =========================== ====== ====================================== raw, mapped Format Description =========================== ====== ====================================== *Mapped field names are returned when the map_variables argument is True* --------------------------------------------------------------------------- P† float PV system power (W) G(i), poa_global‡ float Global irradiance on inclined plane (W/m^2) Gb(i), poa_direct‡ float Beam (direct) irradiance on inclined plane (W/m^2) Gd(i), poa_sky_diffuse‡ float Diffuse irradiance on inclined plane (W/m^2) Gr(i), poa_ground_diffuse‡ float Reflected irradiance on inclined plane (W/m^2) H_sun, solar_elevation float Sun height/elevation (degrees) T2m, temp_air float Air temperature at 2 m (degrees Celsius) WS10m, wind_speed float Wind speed at 10 m (m/s) Int int Solar radiation reconstructed (1/0) =========================== ====== ====================================== †P (PV system power) is only returned when pvcalculation=True. ‡Gb(i), Gd(i), and Gr(i) are returned when components=True, otherwise the sum of the three components, G(i), is returned. See Also -------- pvlib.iotools.read_pvgis_hourly, pvlib.iotools.get_pvgis_tmy References ---------- .. [1] `PVGIS <https://ec.europa.eu/jrc/en/pvgis>`_ .. [2] `PVGIS Hourly Radiation <https://ec.europa.eu/jrc/en/PVGIS/tools/hourly-radiation>`_ .. [3] `PVGIS Non-interactive service <https://ec.europa.eu/jrc/en/PVGIS/docs/noninteractive>`_ .. [4] `PVGIS horizon profile tool <https://ec.europa.eu/jrc/en/PVGIS/tools/horizon>`_ """ # noqa: E501 # use requests to format the query string by passing params dictionary params = {'lat': latitude, 'lon': longitude, 'outputformat': outputformat, 'angle': surface_tilt, 'aspect': surface_azimuth, 'pvcalculation': int(pvcalculation), 'pvtechchoice': pvtechchoice, 'mountingplace': mountingplace, 'trackingtype': trackingtype, 'components': int(components), 'usehorizon': int(usehorizon), 'optimalangles': int(optimalangles), 'optimalinclination': int(optimalangles), 'loss': loss} # pvgis only takes 0 for False, and 1 for True, not strings if userhorizon is not None: params['userhorizon'] = ','.join(str(x) for x in userhorizon) if raddatabase is not None: params['raddatabase'] = raddatabase if start is not None: params['startyear'] = start if isinstance(start, int) else start.year if end is not None: params['endyear'] = end if isinstance(end, int) else end.year if peakpower is not None: params['peakpower'] = peakpower # The url endpoint for hourly radiation is 'seriescalc' res = requests.get(url + 'seriescalc', params=params, timeout=timeout) # PVGIS returns really well formatted error messages in JSON for HTTP/1.1 # 400 BAD REQUEST so try to return that if possible, otherwise raise the # HTTP/1.1 error caught by requests if not res.ok: try: err_msg = res.json() except Exception: res.raise_for_status() else: raise requests.HTTPError(err_msg['message']) return read_pvgis_hourly(io.StringIO(res.text), pvgis_format=outputformat, map_variables=map_variables)
def _parse_pvgis_hourly_json(src, map_variables): inputs = src['inputs'] metadata = src['meta'] data = pd.DataFrame(src['outputs']['hourly']) data.index = pd.to_datetime(data['time'], format='%Y%m%d:%H%M', utc=True) data = data.drop('time', axis=1) data = data.astype(dtype={'Int': 'int'}) # The 'Int' column to be integer if map_variables: data = data.rename(columns=PVGIS_VARIABLE_MAP) return data, inputs, metadata def _parse_pvgis_hourly_csv(src, map_variables): # The first 4 rows are latitude, longitude, elevation, radiation database inputs = {} # 'Latitude (decimal degrees): 45.000\r\n' inputs['latitude'] = float(src.readline().split(':')[1]) # 'Longitude (decimal degrees): 8.000\r\n' inputs['longitude'] = float(src.readline().split(':')[1]) # Elevation (m): 1389.0\r\n inputs['elevation'] = float(src.readline().split(':')[1]) # 'Radiation database: \tPVGIS-SARAH\r\n' inputs['radiation_database'] = src.readline().split(':')[1].strip() # Parse through the remaining metadata section (the number of lines for # this section depends on the requested parameters) while True: line = src.readline() if line.startswith('time,'): # The data header starts with 'time,' # The last line of the metadata section contains the column names names = line.strip().split(',') break # Only retrieve metadata from non-empty lines elif line.strip() != '': inputs[line.split(':')[0]] = line.split(':')[1].strip() elif line == '': # If end of file is reached raise ValueError('No data section was detected. File has probably ' 'been modified since being downloaded from PVGIS') # Save the entries from the data section to a list, until an empty line is # reached an empty line. The length of the section depends on the request data_lines = [] while True: line = src.readline() if line.strip() == '': break else: data_lines.append(line.strip().split(',')) data = pd.DataFrame(data_lines, columns=names) data.index = pd.to_datetime(data['time'], format='%Y%m%d:%H%M', utc=True) data = data.drop('time', axis=1) if map_variables: data = data.rename(columns=PVGIS_VARIABLE_MAP) # All columns should have the dtype=float, except 'Int' which should be # integer. It is necessary to convert to float, before converting to int data = data.astype(float).astype(dtype={'Int': 'int'}) # Generate metadata dictionary containing description of parameters metadata = {} for line in src.readlines(): if ':' in line: metadata[line.split(':')[0]] = line.split(':')[1].strip() return data, inputs, metadata
[docs]def read_pvgis_hourly(filename, pvgis_format=None, map_variables=True): """Read a PVGIS hourly file. Parameters ---------- filename : str, pathlib.Path, or file-like buffer Name, path, or buffer of hourly data file downloaded from PVGIS. pvgis_format : str, default None Format of PVGIS file or buffer. Equivalent to the ``outputformat`` parameter in the PVGIS API. If `filename` is a file and `pvgis_format` is ``None`` then the file extension will be used to determine the PVGIS format to parse. If `filename` is a buffer, then `pvgis_format` is required and must be in ``['csv', 'json']``. map_variables: bool, default True When true, renames columns of the DataFrame to pvlib variable names where applicable. See variable PVGIS_VARIABLE_MAP. Returns ------- data : pandas.DataFrame the time series data inputs : dict the inputs metadata : dict metadata Raises ------ ValueError if `pvgis_format` is ``None`` and the file extension is neither ``.csv`` nor ``.json`` or if `pvgis_format` is provided as input but isn't in ``['csv', 'json']`` TypeError if `pvgis_format` is ``None`` and `filename` is a buffer See Also -------- get_pvgis_hourly, read_pvgis_tmy """ # get the PVGIS outputformat if pvgis_format is None: # get the file extension from suffix, but remove the dot and make sure # it's lower case to compare with csv, or json # NOTE: basic format is not supported for PVGIS Hourly as the data # format does not include a header # NOTE: raises TypeError if filename is a buffer outputformat = Path(filename).suffix[1:].lower() else: outputformat = pvgis_format # parse the pvgis file based on the output format, either 'json' or 'csv' # NOTE: json and csv output formats have parsers defined as private # functions in this module # JSON: use Python built-in json module to convert file contents to a # Python dictionary, and pass the dictionary to the # _parse_pvgis_hourly_json() function from this module if outputformat == 'json': try: src = json.load(filename) except AttributeError: # str/path has no .read() attribute with open(str(filename), 'r') as fbuf: src = json.load(fbuf) return _parse_pvgis_hourly_json(src, map_variables=map_variables) # CSV: use _parse_pvgis_hourly_csv() if outputformat == 'csv': try: pvgis_data = _parse_pvgis_hourly_csv( filename, map_variables=map_variables) except AttributeError: # str/path has no .read() attribute with open(str(filename), 'r') as fbuf: pvgis_data = _parse_pvgis_hourly_csv( fbuf, map_variables=map_variables) return pvgis_data # raise exception if pvgis format isn't in ['csv', 'json'] err_msg = ( "pvgis format '{:s}' was unknown, must be either 'json' or 'csv'")\ .format(outputformat) raise ValueError(err_msg)
[docs]def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True, userhorizon=None, startyear=None, endyear=None, url=URL, map_variables=None, timeout=30): """ Get TMY data from PVGIS. For more information see the PVGIS [1]_ TMY tool documentation [2]_. Parameters ---------- latitude : float Latitude in degrees north longitude : float Longitude in degrees east outputformat : str, default 'json' Must be in ``['csv', 'basic', 'epw', 'json']``. See PVGIS TMY tool documentation [2]_ for more info. usehorizon : bool, default True include effects of horizon userhorizon : list of float, default None optional user specified elevation of horizon in degrees, at equally spaced azimuth clockwise from north, only valid if `usehorizon` is true, if `usehorizon` is true but `userhorizon` is `None` then PVGIS will calculate the horizon [3]_ startyear : int, default None first year to calculate TMY endyear : int, default None last year to calculate TMY, must be at least 10 years from first year url : str, default :const:`pvlib.iotools.pvgis.URL` base url of PVGIS API, append ``tmy`` to get TMY endpoint map_variables: bool When true, renames columns of the Dataframe to pvlib variable names where applicable. See variable PVGIS_VARIABLE_MAP. timeout : int, default 30 time in seconds to wait for server response before timeout Returns ------- data : pandas.DataFrame the weather data months_selected : list TMY year for each month, ``None`` for basic and EPW inputs : dict the inputs, ``None`` for basic and EPW metadata : list or dict file metadata, ``None`` for basic Raises ------ requests.HTTPError if the request response status is ``HTTP/1.1 400 BAD REQUEST``, then the error message in the response will be raised as an exception, otherwise raise whatever ``HTTP/1.1`` error occurred See also -------- read_pvgis_tmy References ---------- .. [1] `PVGIS <https://ec.europa.eu/jrc/en/pvgis>`_ .. [2] `PVGIS TMY tool <https://ec.europa.eu/jrc/en/PVGIS/tools/tmy>`_ .. [3] `PVGIS horizon profile tool <https://ec.europa.eu/jrc/en/PVGIS/tools/horizon>`_ """ # use requests to format the query string by passing params dictionary params = {'lat': latitude, 'lon': longitude, 'outputformat': outputformat} # pvgis only likes 0 for False, and 1 for True, not strings, also the # default for usehorizon is already 1 (ie: True), so only set if False if not usehorizon: params['usehorizon'] = 0 if userhorizon is not None: params['userhorizon'] = ','.join(str(x) for x in userhorizon) if startyear is not None: params['startyear'] = startyear if endyear is not None: params['endyear'] = endyear res = requests.get(url + 'tmy', params=params, timeout=timeout) # PVGIS returns really well formatted error messages in JSON for HTTP/1.1 # 400 BAD REQUEST so try to return that if possible, otherwise raise the # HTTP/1.1 error caught by requests if not res.ok: try: err_msg = res.json() except Exception: res.raise_for_status() else: raise requests.HTTPError(err_msg['message']) # initialize data to None in case API fails to respond to bad outputformat data = None, None, None, None if outputformat == 'json': src = res.json() data, months_selected, inputs, meta = _parse_pvgis_tmy_json(src) elif outputformat == 'csv': with io.BytesIO(res.content) as src: data, months_selected, inputs, meta = _parse_pvgis_tmy_csv(src) elif outputformat == 'basic': with io.BytesIO(res.content) as src: data, months_selected, inputs, meta = _parse_pvgis_tmy_basic(src) elif outputformat == 'epw': with io.StringIO(res.content.decode('utf-8')) as src: data, meta = parse_epw(src) months_selected, inputs = None, None else: # this line is never reached because if outputformat is not valid then # the response is HTTP/1.1 400 BAD REQUEST which is handled earlier pass if map_variables is None: warnings.warn( 'PVGIS variable names will be renamed to pvlib conventions by ' 'default starting in pvlib 0.10.0. Specify map_variables=True ' 'to enable that behavior now, or specify map_variables=False ' 'to hide this warning.', pvlibDeprecationWarning ) map_variables = False if map_variables: data = data.rename(columns=PVGIS_VARIABLE_MAP) return data, months_selected, inputs, meta
def _parse_pvgis_tmy_json(src): inputs = src['inputs'] meta = src['meta'] months_selected = src['outputs']['months_selected'] data = pd.DataFrame(src['outputs']['tmy_hourly']) data.index = pd.to_datetime( data['time(UTC)'], format='%Y%m%d:%H%M', utc=True) data = data.drop('time(UTC)', axis=1) return data, months_selected, inputs, meta def _parse_pvgis_tmy_csv(src): # the first 3 rows are latitude, longitude, elevation inputs = {} # 'Latitude (decimal degrees): 45.000\r\n' inputs['latitude'] = float(src.readline().split(b':')[1]) # 'Longitude (decimal degrees): 8.000\r\n' inputs['longitude'] = float(src.readline().split(b':')[1]) # Elevation (m): 1389.0\r\n inputs['elevation'] = float(src.readline().split(b':')[1]) # then there's a 13 row comma separated table with two columns: month, year # which contains the year used for that month in the src.readline() # get "month,year\r\n" months_selected = [] for month in range(12): months_selected.append( {'month': month+1, 'year': int(src.readline().split(b',')[1])}) # then there's the TMY (typical meteorological year) data # first there's a header row: # time(UTC),T2m,RH,G(h),Gb(n),Gd(h),IR(h),WS10m,WD10m,SP headers = [h.decode('utf-8').strip() for h in src.readline().split(b',')] data = pd.DataFrame( [src.readline().split(b',') for _ in range(8760)], columns=headers) dtidx = data['time(UTC)'].apply(lambda dt: dt.decode('utf-8')) dtidx = pd.to_datetime(dtidx, format='%Y%m%d:%H%M', utc=True) data = data.drop('time(UTC)', axis=1) data = pd.DataFrame(data, dtype=float) data.index = dtidx # finally there's some meta data meta = [line.decode('utf-8').strip() for line in src.readlines()] return data, months_selected, inputs, meta def _parse_pvgis_tmy_basic(src): data = pd.read_csv(src) data.index = pd.to_datetime( data['time(UTC)'], format='%Y%m%d:%H%M', utc=True) data = data.drop('time(UTC)', axis=1) return data, None, None, None
[docs]def read_pvgis_tmy(filename, pvgis_format=None, map_variables=None): """ Read a file downloaded from PVGIS. Parameters ---------- filename : str, pathlib.Path, or file-like buffer Name, path, or buffer of file downloaded from PVGIS. pvgis_format : str, default None Format of PVGIS file or buffer. Equivalent to the ``outputformat`` parameter in the PVGIS TMY API. If `filename` is a file and `pvgis_format` is ``None`` then the file extension will be used to determine the PVGIS format to parse. For PVGIS files from the API with ``outputformat='basic'``, please set `pvgis_format` to ``'basic'``. If `filename` is a buffer, then `pvgis_format` is required and must be in ``['csv', 'epw', 'json', 'basic']``. map_variables: bool When true, renames columns of the Dataframe to pvlib variable names where applicable. See variable PVGIS_VARIABLE_MAP. Returns ------- data : pandas.DataFrame the weather data months_selected : list TMY year for each month, ``None`` for basic and EPW inputs : dict the inputs, ``None`` for basic and EPW metadata : list or dict file metadata, ``None`` for basic Raises ------ ValueError if `pvgis_format` is ``None`` and the file extension is neither ``.csv``, ``.json``, nor ``.epw``, or if `pvgis_format` is provided as input but isn't in ``['csv', 'epw', 'json', 'basic']`` TypeError if `pvgis_format` is ``None`` and `filename` is a buffer See also -------- get_pvgis_tmy """ # get the PVGIS outputformat if pvgis_format is None: # get the file extension from suffix, but remove the dot and make sure # it's lower case to compare with epw, csv, or json # NOTE: raises TypeError if filename is a buffer outputformat = Path(filename).suffix[1:].lower() else: outputformat = pvgis_format # parse the pvgis file based on the output format, either 'epw', 'json', # 'csv', or 'basic' # EPW: use the EPW parser from the pvlib.iotools epw.py module if outputformat == 'epw': try: data, meta = parse_epw(filename) except AttributeError: # str/path has no .read() attribute data, meta = read_epw(filename) months_selected, inputs = None, None # NOTE: json, csv, and basic output formats have parsers defined as private # functions in this module # JSON: use Python built-in json module to convert file contents to a # Python dictionary, and pass the dictionary to the _parse_pvgis_tmy_json() # function from this module elif outputformat == 'json': try: src = json.load(filename) except AttributeError: # str/path has no .read() attribute with open(str(filename), 'r') as fbuf: src = json.load(fbuf) data, months_selected, inputs, meta = _parse_pvgis_tmy_json(src) # CSV or basic: use the correct parser from this module # eg: _parse_pvgis_tmy_csv() or _parse_pvgist_tmy_basic() elif outputformat in ['csv', 'basic']: # get the correct parser function for this output format from globals() pvgis_parser = globals()['_parse_pvgis_tmy_{:s}'.format(outputformat)] # NOTE: pvgis_parse() is a pvgis parser function from this module, # either _parse_pvgis_tmy_csv() or _parse_pvgist_tmy_basic() try: data, months_selected, inputs, meta = pvgis_parser(filename) except AttributeError: # str/path has no .read() attribute with open(str(filename), 'rb') as fbuf: data, months_selected, inputs, meta = pvgis_parser(fbuf) else: # raise exception if pvgis format isn't in ['csv','basic','epw','json'] err_msg = ( "pvgis format '{:s}' was unknown, must be either 'epw', 'json', " "'csv', or 'basic'").format(outputformat) raise ValueError(err_msg) if map_variables is None: warnings.warn( 'PVGIS variable names will be renamed to pvlib conventions by ' 'default starting in pvlib 0.10.0. Specify map_variables=True ' 'to enable that behavior now, or specify map_variables=False ' 'to hide this warning.', pvlibDeprecationWarning ) map_variables = False if map_variables: data = data.rename(columns=PVGIS_VARIABLE_MAP) return data, months_selected, inputs, meta