nakas commited on
Commit
6a8c36c
·
verified ·
1 Parent(s): 9aa1340

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -58
app.py CHANGED
@@ -139,11 +139,10 @@ def parse_weather_data(data):
139
 
140
  def calculate_total_new_snow(df):
141
  """
142
- Calculate total new snow with strict anomaly filtering:
143
- 1. Daily reset at 9 AM
144
- 2. Maximum 3 inch per hour increase
145
- 3. Complete removal of anomalous days
146
- 4. Continuous validation of accumulation rates
147
 
148
  Parameters:
149
  df (pandas.DataFrame): DataFrame with datetime and snowfall_3hr columns
@@ -162,73 +161,72 @@ def calculate_total_new_snow(df):
162
  lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date()
163
  )
164
 
165
- def validate_snow_sequence(values):
166
  """
167
- Validate a sequence of snow measurements within a day
168
- Returns cleaned measurements with anomalies removed
169
  """
170
- if len(values) == 0:
171
- return []
172
 
173
- valid_values = []
174
- current_valid = values[0] if values[0] <= 3.0 else 0.0
175
 
176
- for i in range(1, len(values)):
177
- change = values[i] - values[i-1]
178
-
179
- # Check if this measurement is valid
180
- if change >= 0 and change <= 3.0:
181
- current_valid = values[i]
182
- else:
183
- # If invalid, maintain the last valid measurement
184
- values[i] = current_valid
185
 
186
- valid_values.append(current_valid)
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- return valid_values
 
 
 
 
189
 
190
  def process_daily_snow(group):
191
  """Process snow measurements for a single day period"""
192
- if len(group) == 0:
 
 
 
193
  return 0.0
194
-
195
- # Sort measurements by time
196
- group = group.sort_values('datetime')
197
 
198
- # Get the snow measurements
199
- measurements = group['snowfall_3hr'].values
200
 
201
- # If we see any extremely high values (>21 inches), invalidate the entire day
202
- if any(m > 21.0 for m in measurements):
203
- return 0.0
204
-
205
- # Clean the measurements
206
- valid_measurements = validate_snow_sequence(measurements)
207
 
208
- # Calculate the daily total from valid measurements
209
- if valid_measurements:
210
- # Get the maximum valid measurement for the day
211
- daily_total = max(valid_measurements)
212
-
213
- # Additional sanity check - if total is too high, return 0
214
- if daily_total > 12.0: # Max reasonable daily total
215
- return 0.0
216
-
217
- return daily_total
218
-
219
- return 0.0
220
 
221
- # Process each day
222
  daily_totals = snow_df.groupby('day_group').apply(process_daily_snow)
223
 
224
- # Remove any remaining anomalous days (days with unusually high totals)
225
- daily_totals = daily_totals.where(daily_totals <= 12.0, 0.0)
226
-
227
  return daily_totals.sum()
228
 
229
  def create_daily_snow_plot(df, ax):
230
  """
231
- Create an improved daily snow plot with strict anomaly filtering
 
232
  """
233
  # Create a copy of the dataframe
234
  snow_df = df[['datetime', 'snowfall_3hr']].copy()
@@ -242,9 +240,6 @@ def create_daily_snow_plot(df, ax):
242
  daily_snow = snow_df.groupby('day_group').apply(process_daily_snow).reset_index()
243
  daily_snow.columns = ['date', 'new_snow']
244
 
245
- # Additional filtering for the plot
246
- daily_snow = daily_snow[daily_snow['new_snow'] <= 12.0] # Remove any remaining anomalous days
247
-
248
  # Create the bar plot
249
  ax.bar(daily_snow['date'], daily_snow['new_snow'], color='blue')
250
  ax.set_title('Daily New Snow (9 AM Reset)', pad=20)
@@ -252,9 +247,6 @@ def create_daily_snow_plot(df, ax):
252
  ax.set_ylabel('New Snow (inches)')
253
  ax.tick_params(axis='x', rotation=45)
254
  ax.grid(True, axis='y', linestyle='--', alpha=0.7)
255
-
256
- # Set reasonable y-axis limits
257
- ax.set_ylim(0, max(12, daily_snow['new_snow'].max() * 1.1))
258
 
259
  def create_daily_snow_plot(df, ax):
260
  """
 
139
 
140
  def calculate_total_new_snow(df):
141
  """
142
+ Calculate total new snow by:
143
+ 1. Looking at values before 9 AM each day
144
+ 2. Ensuring reasonable hourly change rates (max 3 inches/hour)
145
+ 3. Allowing high totals if they accumulate gradually
 
146
 
147
  Parameters:
148
  df (pandas.DataFrame): DataFrame with datetime and snowfall_3hr columns
 
161
  lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date()
162
  )
163
 
164
+ def validate_sequence(values, times):
165
  """
166
+ Validate a sequence of snow readings by checking hourly change rates.
167
+ Returns the most reliable final value.
168
  """
169
+ if len(values) <= 1:
170
+ return values[-1] if len(values) > 0 else 0.0
171
 
172
+ # Calculate hour-to-hour changes
173
+ changes = np.diff(values)
174
 
175
+ # If any change is too large (> 3 inches per hour), we need to find the reliable sequence
176
+ if any(change > 3.0 for change in changes):
177
+ # Find the longest sequence of reasonable changes
178
+ valid_indices = [0] # Start with first reading
179
+ current_value = values[0]
 
 
 
 
180
 
181
+ for i in range(1, len(values)):
182
+ change = values[i] - values[i-1]
183
+ if 0 <= change <= 3.0: # Allow only positive changes up to 3 inches
184
+ valid_indices.append(i)
185
+ current_value = values[i]
186
+ else:
187
+ # Check if this might be a reset (value lower than previous)
188
+ if values[i] < values[i-1]:
189
+ # If it looks like a reset, start new sequence
190
+ current_value = values[i]
191
+ valid_indices.append(i)
192
+ else:
193
+ # If it's a spike, ignore it
194
+ values[i] = current_value
195
 
196
+ # Return the last valid value
197
+ return values[valid_indices[-1]]
198
+ else:
199
+ # If all changes are reasonable, use the last value
200
+ return values[-1]
201
 
202
  def process_daily_snow(group):
203
  """Process snow measurements for a single day period"""
204
+ # Only look at measurements before 9 AM
205
+ morning_data = group[group['datetime'].dt.hour < 9].copy()
206
+
207
+ if len(morning_data) == 0:
208
  return 0.0
 
 
 
209
 
210
+ # Sort by time to ensure proper sequence
211
+ morning_data = morning_data.sort_values('datetime')
212
 
213
+ values = morning_data['snowfall_3hr'].values
214
+ times = morning_data['datetime'].values
 
 
 
 
215
 
216
+ # Validate the sequence and get the final value
217
+ daily_total = validate_sequence(values, times)
218
+
219
+ return daily_total
 
 
 
 
 
 
 
 
220
 
221
+ # Calculate daily snow totals
222
  daily_totals = snow_df.groupby('day_group').apply(process_daily_snow)
223
 
 
 
 
224
  return daily_totals.sum()
225
 
226
  def create_daily_snow_plot(df, ax):
227
  """
228
+ Create an improved daily snow plot focusing on pre-9 AM totals
229
+ with validation of hourly change rates
230
  """
231
  # Create a copy of the dataframe
232
  snow_df = df[['datetime', 'snowfall_3hr']].copy()
 
240
  daily_snow = snow_df.groupby('day_group').apply(process_daily_snow).reset_index()
241
  daily_snow.columns = ['date', 'new_snow']
242
 
 
 
 
243
  # Create the bar plot
244
  ax.bar(daily_snow['date'], daily_snow['new_snow'], color='blue')
245
  ax.set_title('Daily New Snow (9 AM Reset)', pad=20)
 
247
  ax.set_ylabel('New Snow (inches)')
248
  ax.tick_params(axis='x', rotation=45)
249
  ax.grid(True, axis='y', linestyle='--', alpha=0.7)
 
 
 
250
 
251
  def create_daily_snow_plot(df, ax):
252
  """