"""Functions to read NREL MIDC data.
"""
from functools import partial
import pandas as pd
# VARIABLE_MAP is a dictionary mapping partial MIDC field names to their
# pvlib names. See docstring of read_midc for description.
VARIABLE_MAP = {
'Direct': 'dni',
'Global': 'ghi',
'Diffuse': 'dhi',
'Airmass': 'airmass',
'Azimuth Angle': 'solar_azimuth',
'Zenith Angle': 'solar_zenith',
'Air Temperature': 'temp_air',
'Temperature': 'temp_air',
'Dew Point Temp': 'temp_dew',
'Relative Humidity': 'relative_humidity',
}
# Maps problematic timezones to 'Etc/GMT' for parsing.
TZ_MAP = {
'PST': 'Etc/GMT+8',
'CST': 'Etc/GMT+6',
}
def map_midc_to_pvlib(variable_map, field_name):
"""A mapper function to rename Dataframe columns to their pvlib counterparts.
Parameters
----------
variable_map: Dictionary
A dictionary for mapping MIDC field name to pvlib name. See
VARIABLE_MAP for default value and description of how to construct
this argument.
field_name: string
The Column to map.
Returns
-------
label: string
The pvlib variable name associated with the MIDC field or the input if
a mapping does not exist.
Notes
-----
Will fail if field_name to be mapped matches an entry in VARIABLE_MAP and
does not contain brackets. This should not be an issue unless MIDC file
headers are updated.
"""
new_field_name = field_name
for midc_name, pvlib_name in variable_map.items():
if field_name.startswith(midc_name):
# extract the instrument and units field and then remove units
instrument_units = field_name[len(midc_name):]
units_index = instrument_units.find('[')
instrument = instrument_units[:units_index - 1]
new_field_name = pvlib_name + instrument.replace(' ', '_')
break
return new_field_name
def format_index(data):
"""Create DatetimeIndex for the Dataframe localized to the timezone provided
as the label of the second (time) column.
Parameters
----------
data: Dataframe
Must contain 'DATE (MM/DD/YYYY)' column, second column must be labeled
with the timezone and contain times in 'HH:MM' format.
Returns
-------
data: Dataframe
Dataframe with DatetimeIndex localized to the provided timezone.
"""
tz_raw = data.columns[1]
timezone = TZ_MAP.get(tz_raw, tz_raw)
datetime = data['DATE (MM/DD/YYYY)'] + data[tz_raw]
datetime = pd.to_datetime(datetime, format='%m/%d/%Y%H:%M')
data = data.set_index(datetime)
data = data.tz_localize(timezone)
return data
def format_index_raw(data):
"""Create DatetimeIndex for the Dataframe localized to the timezone provided
as the label of the third column.
Parameters
----------
data: Dataframe
Must contain columns 'Year' and 'DOY'. Timezone must be found as the
label of the third (time) column.
Returns
-------
data: Dataframe
The data with a Datetime index localized to the provided timezone.
"""
tz_raw = data.columns[3]
timezone = TZ_MAP.get(tz_raw, tz_raw)
year = data.Year.apply(str)
jday = data.DOY.apply(lambda x: '{:03d}'.format(x))
time = data[tz_raw].apply(lambda x: '{:04d}'.format(x))
index = pd.to_datetime(year + jday + time, format="%Y%j%H%M")
data = data.set_index(index)
data = data.tz_localize(timezone)
return data
[docs]def read_midc(filename, variable_map=VARIABLE_MAP, raw_data=False):
"""Read in National Renewable Energy Laboratory Measurement and
Instrumentation Data Center [1]_ weather data.
Parameters
----------
filename: string
Filename or url of data to read.
variable_map: dictionary
Dictionary for mapping MIDC field names to pvlib names. See variable
`VARIABLE_MAP` for default and Notes section below for a description of
its format.
raw_data: boolean
Set to true to use format_index_raw to correctly format the date/time
columns of MIDC raw data files.
Returns
-------
data: Dataframe
A dataframe with DatetimeIndex localized to the provided timezone.
Notes
-----
Keys of the `variable_map` dictionary should include the first part
of a MIDC field name which indicates the variable being measured.
e.g. 'Global PSP [W/m^2]' is entered as a key of 'Global'
The 'PSP' indicating instrument is appended to the pvlib variable name
after mapping to differentiate measurements of the same variable. For a
full list of pvlib variable names see the `Variable Style Rules
<https://pvlib-python.readthedocs.io/en/latest/variables_style_rules.html>`_.
Be sure to check the units for the variables you will use on the
`MIDC site <https://midcdmz.nrel.gov/>`_.
References
----------
.. [1] NREL: Measurement and Instrumentation Data Center
`https://midcdmz.nrel.gov/ <https://midcdmz.nrel.gov/>`_
"""
data = pd.read_csv(filename)
if raw_data:
data = format_index_raw(data)
else:
data = format_index(data)
mapper = partial(map_midc_to_pvlib, variable_map)
data = data.rename(columns=mapper)
return data
[docs]def read_midc_raw_data_from_nrel(site, start, end):
"""Request and read MIDC data directly from the raw data api.
Parameters
----------
site: string
The MIDC station id.
start: datetime
Start date for requested data.
end: datetime
End date for requested data.
Returns
-------
data:
Dataframe with DatetimeIndex localized to the station location.
Notes
-----
Requests spanning an instrumentation change will yield an error. See the
MIDC raw data api page
`here <https://midcdmz.nrel.gov/apps/data_api_doc.pl?_idtextlist>`_
for more details and considerations.
"""
args = {'site': site,
'begin': start.strftime('%Y%m%d'),
'end': end.strftime('%Y%m%d')}
endpoint = 'https://midcdmz.nrel.gov/apps/data_api.pl?'
url = endpoint + '&'.join(['{}={}'.format(k, v) for k, v in args.items()])
return read_midc(url, raw_data=True)