Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -270,16 +270,30 @@ def predict_prices(data, model, tokenizer, prediction_days=30):
|
|
| 270 |
# Tokenize the input
|
| 271 |
input_sequence = prices[-context_length:]
|
| 272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
# Create prediction input
|
| 274 |
-
#
|
| 275 |
-
|
| 276 |
-
#
|
| 277 |
-
prediction_input = torch.tensor(input_sequence).unsqueeze(0).float().to(model.device)
|
| 278 |
|
| 279 |
# Generate predictions
|
| 280 |
with torch.no_grad():
|
| 281 |
-
#
|
| 282 |
-
#
|
| 283 |
forecast = model.generate(
|
| 284 |
prediction_input,
|
| 285 |
max_new_tokens=prediction_days,
|
|
@@ -287,13 +301,15 @@ def predict_prices(data, model, tokenizer, prediction_days=30):
|
|
| 287 |
)
|
| 288 |
|
| 289 |
# Handle complex Chronos output: [batch_size, num_samples, prediction_length]
|
| 290 |
-
|
| 291 |
-
# 1. Get the actual tensor from the tuple/list if necessary
|
| 292 |
output_tensor = forecast[0] if isinstance(forecast, (tuple, list)) else forecast
|
| 293 |
|
| 294 |
-
#
|
| 295 |
-
#
|
| 296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
|
| 298 |
# Handle case where predictions is a single scalar (convert to array for safety)
|
| 299 |
if predictions.ndim == 0:
|
|
@@ -321,7 +337,7 @@ def predict_prices(data, model, tokenizer, prediction_days=30):
|
|
| 321 |
'mean_30d': predicted_mean,
|
| 322 |
'change_pct': change_pct,
|
| 323 |
'summary': f"""
|
| 324 |
-
AI Model: Amazon Chronos-Bolt
|
| 325 |
Prediction Period: {pred_len} days
|
| 326 |
Expected Change: {change_pct:.2f}%
|
| 327 |
Confidence: Medium (based on historical patterns)
|
|
|
|
| 270 |
# Tokenize the input
|
| 271 |
input_sequence = prices[-context_length:]
|
| 272 |
|
| 273 |
+
# --- CRITICAL FIX: Simulate Quantization ---
|
| 274 |
+
# 1. Normalize prices (0 to 1)
|
| 275 |
+
price_min = np.min(input_sequence)
|
| 276 |
+
price_max = np.max(input_sequence)
|
| 277 |
+
|
| 278 |
+
if price_max == price_min:
|
| 279 |
+
normalized_sequence = np.zeros_like(input_sequence)
|
| 280 |
+
else:
|
| 281 |
+
normalized_sequence = (input_sequence - price_min) / (price_max - price_min)
|
| 282 |
+
|
| 283 |
+
# 2. Scale to a token space (max vocab size 4096) and convert to Long
|
| 284 |
+
VOCAB_SIZE = 4096
|
| 285 |
+
# Convert to Long/Int to satisfy model embedding layer
|
| 286 |
+
token_indices = (normalized_sequence * (VOCAB_SIZE - 1)).astype(np.long)
|
| 287 |
+
|
| 288 |
# Create prediction input
|
| 289 |
+
# Pass tokens to the model
|
| 290 |
+
prediction_input = torch.tensor(token_indices).unsqueeze(0).to(model.device)
|
| 291 |
+
# --- END CRITICAL FIX ---
|
|
|
|
| 292 |
|
| 293 |
# Generate predictions
|
| 294 |
with torch.no_grad():
|
| 295 |
+
# Use max_new_tokens for generation length.
|
| 296 |
+
# do_sample is necessary for generating probabilistic time-series forecasts
|
| 297 |
forecast = model.generate(
|
| 298 |
prediction_input,
|
| 299 |
max_new_tokens=prediction_days,
|
|
|
|
| 301 |
)
|
| 302 |
|
| 303 |
# Handle complex Chronos output: [batch_size, num_samples, prediction_length]
|
|
|
|
|
|
|
| 304 |
output_tensor = forecast[0] if isinstance(forecast, (tuple, list)) else forecast
|
| 305 |
|
| 306 |
+
# Average across the samples and convert to a simple 1D numpy array
|
| 307 |
+
# Note: The output is still in TOKEN SPACE. We must INVERSE-SCALE it back to PRICE SPACE.
|
| 308 |
+
predictions_tokens = output_tensor.float().mean(dim=1).squeeze().cpu().numpy()
|
| 309 |
+
|
| 310 |
+
# --- CRITICAL INVERSE-SCALE FIX ---
|
| 311 |
+
# Inverse normalize the predicted tokens back to the price range
|
| 312 |
+
predictions = (predictions_tokens / (VOCAB_SIZE - 1)) * (price_max - price_min) + price_min
|
| 313 |
|
| 314 |
# Handle case where predictions is a single scalar (convert to array for safety)
|
| 315 |
if predictions.ndim == 0:
|
|
|
|
| 337 |
'mean_30d': predicted_mean,
|
| 338 |
'change_pct': change_pct,
|
| 339 |
'summary': f"""
|
| 340 |
+
AI Model: Amazon Chronos-Bolt (Simulated Quantization)
|
| 341 |
Prediction Period: {pred_len} days
|
| 342 |
Expected Change: {change_pct:.2f}%
|
| 343 |
Confidence: Medium (based on historical patterns)
|