Spaces:
Runtime error
Runtime error
Hugo Massonnat commited on
Commit ·
5f29f14
1
Parent(s): 6a1c12b
monthly aggregation of historic weather data
Browse files- data_pipelines/historical_weather_data.py +75 -38
- requirements.txt +2 -1
data_pipelines/historical_weather_data.py
CHANGED
|
@@ -4,17 +4,19 @@ import requests_cache
|
|
| 4 |
import pandas as pd
|
| 5 |
from retry_requests import retry
|
| 6 |
|
|
|
|
|
|
|
| 7 |
|
| 8 |
def download_historical_weather_data(
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
) -> pd.DataFrame:
|
| 14 |
# Setup the Open-Meteo API client with cache and retry on error
|
| 15 |
-
cache_session = requests_cache.CachedSession('.cache', expire_after
|
| 16 |
-
retry_session = retry(cache_session, retries
|
| 17 |
-
openmeteo = openmeteo_requests.Client(session
|
| 18 |
|
| 19 |
# Make sure all required weather variables are listed here
|
| 20 |
# The order of variables in hourly or daily is important to assign them correctly below
|
|
@@ -22,12 +24,11 @@ def download_historical_weather_data(
|
|
| 22 |
params = {
|
| 23 |
"latitude": latitude,
|
| 24 |
"longitude": longitude,
|
| 25 |
-
"start_date": f"{start_year}-
|
| 26 |
-
"end_date": f"{end_year}-
|
| 27 |
-
"
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
"et0_fao_evapotranspiration"],
|
| 31 |
"timezone": "GMT"
|
| 32 |
}
|
| 33 |
responses = openmeteo.weather_api(url, params=params)
|
|
@@ -39,32 +40,68 @@ def download_historical_weather_data(
|
|
| 39 |
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
|
| 40 |
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
|
| 41 |
|
| 42 |
-
# Process
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
"
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
inclusive="left"
|
| 58 |
),
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
-
|
|
|
|
|
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
from retry_requests import retry
|
| 6 |
|
| 7 |
+
from compute_et0_adjusted import compute_et0_future
|
| 8 |
+
|
| 9 |
|
| 10 |
def download_historical_weather_data(
|
| 11 |
+
latitude: float,
|
| 12 |
+
longitude: float,
|
| 13 |
+
start_year: int,
|
| 14 |
+
end_year: int,
|
| 15 |
) -> pd.DataFrame:
|
| 16 |
# Setup the Open-Meteo API client with cache and retry on error
|
| 17 |
+
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
|
| 18 |
+
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
|
| 19 |
+
openmeteo = openmeteo_requests.Client(session=retry_session)
|
| 20 |
|
| 21 |
# Make sure all required weather variables are listed here
|
| 22 |
# The order of variables in hourly or daily is important to assign them correctly below
|
|
|
|
| 24 |
params = {
|
| 25 |
"latitude": latitude,
|
| 26 |
"longitude": longitude,
|
| 27 |
+
"start_date": f"{start_year}-02-08",
|
| 28 |
+
"end_date": f"{end_year}-02-22",
|
| 29 |
+
"hourly": ["temperature_2m", "relative_humidity_2m",
|
| 30 |
+
"precipitation", "et0_fao_evapotranspiration",
|
| 31 |
+
"wind_speed_10m", "shortwave_radiation"],
|
|
|
|
| 32 |
"timezone": "GMT"
|
| 33 |
}
|
| 34 |
responses = openmeteo.weather_api(url, params=params)
|
|
|
|
| 40 |
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
|
| 41 |
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
|
| 42 |
|
| 43 |
+
# Process hourly data. The order of variables needs to be the same as requested.
|
| 44 |
+
hourly = response.Hourly()
|
| 45 |
+
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
|
| 46 |
+
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
|
| 47 |
+
hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
|
| 48 |
+
hourly_et0_fao_evapotranspiration = hourly.Variables(3).ValuesAsNumpy()
|
| 49 |
+
hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()
|
| 50 |
+
hourly_shortwave_radiation = hourly.Variables(5).ValuesAsNumpy()
|
| 51 |
+
|
| 52 |
+
hourly_data = {
|
| 53 |
+
"temperature_2m": hourly_temperature_2m,
|
| 54 |
+
"relative_humidity_2m": hourly_relative_humidity_2m,
|
| 55 |
+
"precipitation": hourly_precipitation,
|
| 56 |
+
"et0_fao_evapotranspiration": hourly_et0_fao_evapotranspiration,
|
| 57 |
+
"wind_speed_10m": hourly_wind_speed_10m,
|
| 58 |
+
"shortwave_radiation": hourly_shortwave_radiation,
|
| 59 |
+
}
|
| 60 |
+
hourly_dataframe = pd.DataFrame(
|
| 61 |
+
index=pd.date_range(
|
| 62 |
+
start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
|
| 63 |
+
end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
|
| 64 |
+
freq=pd.Timedelta(seconds=hourly.Interval()),
|
| 65 |
inclusive="left"
|
| 66 |
),
|
| 67 |
+
data=hourly_data,
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
return hourly_dataframe
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def aggregate_hourly_weather_data(
|
| 74 |
+
hourly_data: pd.DataFrame,
|
| 75 |
+
) -> pd.DataFrame:
|
| 76 |
+
resampled_data = hourly_data.resample("1ME").agg({
|
| 77 |
+
"temperature_2m": ["min", "max"],
|
| 78 |
+
"relative_humidity_2m": ["min", "max"],
|
| 79 |
+
"wind_speed_10m": "mean",
|
| 80 |
+
"shortwave_radiation": "mean",
|
| 81 |
+
})
|
| 82 |
+
|
| 83 |
+
monthly_data = pd.DataFrame.from_dict({
|
| 84 |
+
"month": resampled_data.index.month,
|
| 85 |
+
"year": resampled_data.index.year,
|
| 86 |
+
"air_temperature_min": resampled_data[("temperature_2m", "min")],
|
| 87 |
+
"air_temperature_max": resampled_data[("temperature_2m", "max")],
|
| 88 |
+
"relative_humidity_min": resampled_data[("relative_humidity_2m", "min")],
|
| 89 |
+
"relative_humidity_max": resampled_data[("relative_humidity_2m", "max")],
|
| 90 |
+
"wind_speed": resampled_data[("wind_speed_10m", "mean")],
|
| 91 |
+
"irradiance": resampled_data[("shortwave_radiation", "mean")],
|
| 92 |
+
})
|
| 93 |
+
|
| 94 |
+
return monthly_data
|
| 95 |
+
|
| 96 |
|
| 97 |
+
if __name__ == '__main__':
|
| 98 |
+
latitude = 47
|
| 99 |
+
longitude = 3
|
| 100 |
+
start_year = 2020
|
| 101 |
+
end_year = 2021
|
| 102 |
+
df = download_historical_weather_data(latitude, longitude, start_year, end_year)
|
| 103 |
+
monthly_df = aggregate_hourly_weather_data(df)
|
| 104 |
|
| 105 |
+
for i in range(len(monthly_df)):
|
| 106 |
+
et0 = compute_et0_future(monthly_df.iloc[i], latitude, longitude)
|
| 107 |
+
print(et0)
|
requirements.txt
CHANGED
|
@@ -16,4 +16,5 @@ openmeteo_requests
|
|
| 16 |
requests_cache
|
| 17 |
retry_requests
|
| 18 |
fuzzywuzzy
|
| 19 |
-
plotly
|
|
|
|
|
|
| 16 |
requests_cache
|
| 17 |
retry_requests
|
| 18 |
fuzzywuzzy
|
| 19 |
+
plotly
|
| 20 |
+
pvlib
|