"""Functions to read NREL MIDC data.
"""
import io
import requests
import pandas as pd
# MIDC_VARIABLE_MAP maps some variables of interest at each MIDC site to their
# pvlib counterparts. The mapping dictionary for a site can be found by looking
# up the Site's id in the dictionary. It is not a comprehensive list, and may
# not be the best fit for your application, but should serve as a base for
# creating your own mappings.
#
# In particular, these mappings coincide with the raw ddata files.
# All site's field list can be found at:
# https://midcdmz.nrel.gov/apps/daily.pl?site=<SITE ID>&live=1
# Where id is the key found in this dictionary
MIDC_VARIABLE_MAP = {
'BMS': {
'Global CMP22 (vent/cor) [W/m^2]': 'ghi',
'Direct CHP1-1 [W/m^2]': 'dni_chp1',
# NIP was mapped to dni for pvlib<=0.10.5
'Direct NIP [W/m^2]': 'dni_nip',
'Diffuse CM22-1 (vent/cor) [W/m^2]': 'dhi',
'Avg Wind Speed @ 6ft [m/s]': 'wind_speed',
'Tower Dry Bulb Temp [deg C]': 'temp_air',
'Tower RH [%]': 'relative_humidity'},
'UOSMRL': {
'Global CMP22 [W/m^2]': 'ghi',
'Direct CHP1 [W/m^2]': 'dni_chp1',
'Diffuse [W/m^2]': 'dhi',
# NIP was mapped to dni for pvlib<=0.10.5
'Direct NIP [W/m^2]': 'dni_nip',
# Schenk was mapped to dhi for pvlib<=0.10.5
# 'Diffuse Schenk [W/m^2]': 'dhi',
'Air Temperature [deg C]': 'temp_air',
'Relative Humidity [%]': 'relative_humidity',
'Avg Wind Speed @ 10m [m/s]': 'wind_speed'},
'HSU': {
'Global Horiz [W/m^2]': 'ghi',
'Direct Normal (calc) [W/m^2]': 'dni',
'Diffuse Horiz (band_corr) [W/m^2]': 'dhi'},
'UTPASRL': {
'Global Horizontal [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horizontal [W/m^2]': 'dhi',
'CHP1 Temp [deg C]': 'temp_air'},
'UAT': {
'Global Horiz (platform) [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horiz [W/m^2]': 'dhi',
'Air Temperature [deg C]': 'temp_air',
'Rel Humidity [%]': 'relative_humidity',
'Avg Wind Speed @ 3m [m/s]': 'wind_speed'},
'STAC': {
'Global Horizontal [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horizontal [W/m^2]': 'dhi',
'Avg Wind Speed @ 10m [m/s]': 'wind_speed',
'Air Temperature [deg C]': 'temp_air',
'Rel Humidity [%]': 'relative_humidity'},
'UNLV': {
'Global Horiz [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horiz (calc) [W/m^2]': 'dhi',
'Dry Bulb Temp [deg C]': 'temp_air',
'Avg Wind Speed @ 30ft [m/s]': 'wind_speed'},
'ORNL': {
'Global Horizontal [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horizontal [W/m^2]': 'dhi',
'Air Temperature [deg C]': 'temp_air',
'Rel Humidity [%]': 'relative_humidity',
'Avg Wind Speed @ 42ft [m/s]': 'wind_speed'},
'NELHA': {
'Global Horizontal [W/m^2]': 'ghi',
'Air Temperature [W/m^2]': 'temp_air',
'Avg Wind Speed @ 10m [m/s]': 'wind_speed',
'Rel Humidity [%]': 'relative_humidity'},
'ULL': {
'Global Horizontal [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horizontal [W/m^2]': 'dhi',
'Air Temperature [deg C]': 'temp_air',
'Rel Humidity [%]': 'relative_humidity',
'Avg Wind Speed @ 3m [m/s]': 'wind_speed'},
'NWTC': {
'Global Horizontal [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horizontal [W/m^2]': 'dhi',
# PSP instrument was removed Feb. 2021
# PSP was mapped to ghi for pvlib<=0.10.5
# 'Global PSP [W/m^2]': 'ghi',
'Temperature @ 2m [deg C]': 'temp_air',
'Avg Wind Speed @ 2m [m/s]': 'wind_speed',
'Relative Humidity [%]': 'relative_humidity'},
}
# Maps problematic timezones to 'Etc/GMT' for parsing.
TZ_MAP = {
'PST': 'Etc/GMT+8',
'CST': 'Etc/GMT+6',
}
def _format_index(data):
"""Create DatetimeIndex for the Dataframe localized to the timezone provided
as the label of the second (time) column.
Parameters
----------
data: Dataframe
Must contain 'DATE (MM/DD/YYYY)' column, second column must be labeled
with the timezone and contain times in 'HH:MM' format.
Returns
-------
data: Dataframe
Dataframe with DatetimeIndex localized to the provided timezone.
"""
tz_raw = data.columns[1]
timezone = TZ_MAP.get(tz_raw, tz_raw)
datetime = data['DATE (MM/DD/YYYY)'] + data[tz_raw]
datetime = pd.to_datetime(datetime, format='%m/%d/%Y%H:%M')
data = data.set_index(datetime)
data = data.tz_localize(timezone)
return data
def _format_index_raw(data):
"""Create DatetimeIndex for the Dataframe localized to the timezone provided
as the label of the third column.
Parameters
----------
data: Dataframe
Must contain columns 'Year' and 'DOY'. Timezone must be found as the
label of the third (time) column.
Returns
-------
data: Dataframe
The data with a Datetime index localized to the provided timezone.
"""
tz_raw = data.columns[3]
timezone = TZ_MAP.get(tz_raw, tz_raw)
year = data.Year.apply(str)
jday = data.DOY.apply(lambda x: '{:03d}'.format(x))
time = data[tz_raw].apply(lambda x: '{:04d}'.format(x))
index = pd.to_datetime(year + jday + time, format="%Y%j%H%M")
data = data.set_index(index)
data = data.tz_localize(timezone)
return data
[docs]
def read_midc(filename, variable_map={}, raw_data=False, **kwargs):
"""Read in National Renewable Energy Laboratory Measurement and
Instrumentation Data Center weather data. The MIDC is described in [1]_.
Parameters
----------
filename: string or file-like object
Filename, url, or file-like object of data to read.
variable_map: dictionary
Dictionary for mapping MIDC field names to pvlib names. Used to rename
the columns of the resulting DataFrame. Does not map names by default.
See Notes for an example.
raw_data: boolean
Set to true to use format_index_raw to correctly format the date/time
columns of MIDC raw data files.
kwargs : dict
Additional keyword arguments to pass to `pandas.read_csv`
Returns
-------
data: Dataframe
A dataframe with DatetimeIndex localized to the provided timezone.
Notes
-----
The `variable_map` argument should map fields from MIDC data to pvlib
names.
E.g. if a MIDC file contains the variable 'Global Horizontal [W/m^2]',
passing the dictionary below will rename the column to 'ghi' in
the returned Dataframe.
{'Global Horizontal [W/m^2]': 'ghi'}
See the MIDC_VARIABLE_MAP for collection of mappings by site.
For a full list of pvlib variable names see the
:ref:`nomenclature`.
Be sure to check the units for the variables you will use on the
`MIDC site <https://midcdmz.nrel.gov/>`_.
References
----------
.. [1] NREL: Measurement and Instrumentation Data Center
`https://midcdmz.nrel.gov/ <https://midcdmz.nrel.gov/>`_
"""
data = pd.read_csv(filename, **kwargs)
if raw_data:
data = _format_index_raw(data)
else:
data = _format_index(data)
data = data.rename(columns=variable_map)
return data
[docs]
def read_midc_raw_data_from_nrel(site, start, end, variable_map={},
timeout=30):
"""Request and read MIDC data directly from the raw data api.
Parameters
----------
site: string
The MIDC station id.
start: datetime-like
Start date for requested data.
end: datetime-like
End date for requested data.
variable_map: dict
A dictionary mapping MIDC field names to pvlib names. Used to
rename columns of the resulting DataFrame. See Notes of
:py:func:`pvlib.iotools.read_midc` for example.
timeout : float, default 30
Number of seconds to wait to connect/read from the API before
failing.
Returns
-------
data:
Dataframe with DatetimeIndex localized to the station location.
Raises
------
requests.HTTPError
For any error in retrieving the CSV file from the MIDC API
requests.Timeout
If data is not received in within ``timeout`` seconds
Notes
-----
Requests spanning an instrumentation change will yield an error. See the
MIDC raw data api page
`here <https://midcdmz.nrel.gov/apps/data_api_doc.pl?_idtextlist>`_
for more details and considerations.
"""
args = {'site': site,
'begin': pd.to_datetime(start).strftime('%Y%m%d'),
'end': pd.to_datetime(end).strftime('%Y%m%d')}
url = 'https://midcdmz.nrel.gov/apps/data_api.pl'
# NOTE: just use requests.get(url, params=args) to build querystring
# number of header columns and data columns do not always match,
# so first parse the header to determine the number of data columns
# to parse
csv_request = requests.get(url, timeout=timeout, params=args)
csv_request.raise_for_status()
raw_csv = io.StringIO(csv_request.text)
first_row = pd.read_csv(raw_csv, nrows=0)
col_length = len(first_row.columns)
raw_csv.seek(0)
return read_midc(raw_csv, variable_map=variable_map, raw_data=True,
usecols=range(col_length))