Spaces:
Running
Running
Sync from GitHub (tests passed)
Browse files- app/ai_engine.py +77 -15
- app/inference.py +102 -11
- app/schemas.py +12 -0
- app/settings.py +13 -0
app/ai_engine.py
CHANGED
|
@@ -9,6 +9,7 @@ Sentiment Analysis:
|
|
| 9 |
Usage:
|
| 10 |
python -m app.ai_engine --run-all --target-symbol HG=F
|
| 11 |
python -m app.ai_engine --score-only
|
|
|
|
| 12 |
python -m app.ai_engine --train-only --target-symbol HG=F
|
| 13 |
"""
|
| 14 |
|
|
@@ -45,7 +46,7 @@ logger = logging.getLogger(__name__)
|
|
| 45 |
_FINBERT_OUTPUT_LOGGED = False
|
| 46 |
_FINBERT_MISSING_LABELS_WARNED = False
|
| 47 |
|
| 48 |
-
HYBRID_SCORING_VERSION = "
|
| 49 |
HYBRID_FALLBACK_429_MODEL_NAME = "hybrid_fallback_429"
|
| 50 |
HYBRID_FALLBACK_PARSE_MODEL_NAME = "hybrid_fallback_parse"
|
| 51 |
LLM_LABELS = {"BULLISH", "BEARISH", "NEUTRAL"}
|
|
@@ -328,31 +329,72 @@ def _build_hybrid_reasoning_payload(
|
|
| 328 |
label: str,
|
| 329 |
llm_confidence: float,
|
| 330 |
finbert_strength: float,
|
|
|
|
| 331 |
llm_reasoning: str,
|
| 332 |
llm_model: str,
|
|
|
|
| 333 |
) -> str:
|
| 334 |
payload = {
|
| 335 |
"label": label,
|
| 336 |
"llm_confidence": round(max(0.0, min(1.0, llm_confidence)), 4),
|
| 337 |
"finbert_strength": round(max(0.0, min(1.0, finbert_strength)), 4),
|
|
|
|
| 338 |
"llm_reasoning": _sanitize_reasoning_text(llm_reasoning),
|
| 339 |
"llm_model": llm_model,
|
|
|
|
| 340 |
"scoring_version": HYBRID_SCORING_VERSION,
|
| 341 |
}
|
| 342 |
return json.dumps(payload, ensure_ascii=True)
|
| 343 |
|
| 344 |
|
| 345 |
-
def _compute_hybrid_score(
|
| 346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
normalized_label = str(label).upper().strip()
|
| 348 |
-
if normalized_label
|
| 349 |
-
|
| 350 |
|
| 351 |
-
sign = 1.0 if normalized_label == "BULLISH" else -1.0
|
| 352 |
confidence = max(0.0, min(1.0, float(llm_confidence)))
|
| 353 |
strength = max(0.0, min(1.0, float(finbert_strength)))
|
| 354 |
-
|
| 355 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
|
| 357 |
|
| 358 |
def _extract_chat_message_content(data: dict[str, Any]) -> str:
|
|
@@ -660,8 +702,8 @@ def score_unscored_articles(
|
|
| 660 |
Strategy:
|
| 661 |
- Primary direction: OpenRouter LLM label + confidence
|
| 662 |
- Intensity: FinBERT probabilities for every article
|
| 663 |
-
-
|
| 664 |
-
-
|
| 665 |
- Chunk size: 12 articles for lower free-tier rate-limit pressure
|
| 666 |
- Run budget: cap LLM-scored articles per run, overflow uses FinBERT
|
| 667 |
|
|
@@ -686,6 +728,12 @@ def score_unscored_articles(
|
|
| 686 |
total_chunks = (len(unscored) + chunk_size - 1) // chunk_size
|
| 687 |
llm_model = settings.resolved_scoring_model
|
| 688 |
llm_budget_remaining = max(0, settings.max_llm_articles_per_run)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 689 |
budget_exhausted_logged = False
|
| 690 |
logger.info("LLM scoring budget for this run: %s articles", llm_budget_remaining)
|
| 691 |
llm_success = 0
|
|
@@ -712,7 +760,7 @@ def score_unscored_articles(
|
|
| 712 |
non_llm_candidates = chunk
|
| 713 |
if settings.openrouter_api_key and llm_budget_remaining <= 0 and not budget_exhausted_logged:
|
| 714 |
logger.info(
|
| 715 |
-
"LLM budget exhausted (%s articles). Remaining chunks use
|
| 716 |
settings.max_llm_articles_per_run,
|
| 717 |
)
|
| 718 |
budget_exhausted_logged = True
|
|
@@ -801,24 +849,33 @@ def score_unscored_articles(
|
|
| 801 |
label = "NEUTRAL"
|
| 802 |
|
| 803 |
llm_confidence = float(llm_result.get("llm_confidence", 0.0))
|
|
|
|
| 804 |
finbert_strength = float(
|
| 805 |
finbert.get(
|
| 806 |
"finbert_strength",
|
| 807 |
-
abs(
|
| 808 |
)
|
| 809 |
)
|
| 810 |
-
final_score = _compute_hybrid_score(
|
| 811 |
label=label,
|
| 812 |
llm_confidence=llm_confidence,
|
| 813 |
finbert_strength=finbert_strength,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 814 |
)
|
| 815 |
|
| 816 |
reasoning_payload = _build_hybrid_reasoning_payload(
|
| 817 |
label=label,
|
| 818 |
llm_confidence=llm_confidence,
|
| 819 |
finbert_strength=finbert_strength,
|
|
|
|
| 820 |
llm_reasoning=llm_result.get("llm_reasoning", ""),
|
| 821 |
llm_model=llm_result.get("llm_model", llm_model),
|
|
|
|
| 822 |
)
|
| 823 |
|
| 824 |
sentiment = NewsSentiment(
|
|
@@ -1364,6 +1421,11 @@ def main():
|
|
| 1364 |
action="store_true",
|
| 1365 |
help="Only run XGBoost training"
|
| 1366 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1367 |
parser.add_argument(
|
| 1368 |
"--target-symbol",
|
| 1369 |
type=str,
|
|
@@ -1387,8 +1449,8 @@ def main():
|
|
| 1387 |
logging.getLogger().setLevel(logging.DEBUG)
|
| 1388 |
|
| 1389 |
# Determine what to run
|
| 1390 |
-
score = args.run_all or args.score_only
|
| 1391 |
-
aggregate = args.run_all or args.aggregate_only
|
| 1392 |
train = args.run_all or args.train_only
|
| 1393 |
|
| 1394 |
if not (score or aggregate or train):
|
|
|
|
| 9 |
Usage:
|
| 10 |
python -m app.ai_engine --run-all --target-symbol HG=F
|
| 11 |
python -m app.ai_engine --score-only
|
| 12 |
+
python -m app.ai_engine --refresh-sentiment
|
| 13 |
python -m app.ai_engine --train-only --target-symbol HG=F
|
| 14 |
"""
|
| 15 |
|
|
|
|
| 46 |
_FINBERT_OUTPUT_LOGGED = False
|
| 47 |
_FINBERT_MISSING_LABELS_WARNED = False
|
| 48 |
|
| 49 |
+
HYBRID_SCORING_VERSION = "hybrid_v2"
|
| 50 |
HYBRID_FALLBACK_429_MODEL_NAME = "hybrid_fallback_429"
|
| 51 |
HYBRID_FALLBACK_PARSE_MODEL_NAME = "hybrid_fallback_parse"
|
| 52 |
LLM_LABELS = {"BULLISH", "BEARISH", "NEUTRAL"}
|
|
|
|
| 329 |
label: str,
|
| 330 |
llm_confidence: float,
|
| 331 |
finbert_strength: float,
|
| 332 |
+
finbert_polarity: float,
|
| 333 |
llm_reasoning: str,
|
| 334 |
llm_model: str,
|
| 335 |
+
soft_neutral_applied: bool = False,
|
| 336 |
) -> str:
|
| 337 |
payload = {
|
| 338 |
"label": label,
|
| 339 |
"llm_confidence": round(max(0.0, min(1.0, llm_confidence)), 4),
|
| 340 |
"finbert_strength": round(max(0.0, min(1.0, finbert_strength)), 4),
|
| 341 |
+
"finbert_polarity": round(max(-1.0, min(1.0, finbert_polarity)), 4),
|
| 342 |
"llm_reasoning": _sanitize_reasoning_text(llm_reasoning),
|
| 343 |
"llm_model": llm_model,
|
| 344 |
+
"soft_neutral_applied": bool(soft_neutral_applied),
|
| 345 |
"scoring_version": HYBRID_SCORING_VERSION,
|
| 346 |
}
|
| 347 |
return json.dumps(payload, ensure_ascii=True)
|
| 348 |
|
| 349 |
|
| 350 |
+
def _compute_hybrid_score(
|
| 351 |
+
*,
|
| 352 |
+
label: str,
|
| 353 |
+
llm_confidence: float,
|
| 354 |
+
finbert_strength: float,
|
| 355 |
+
finbert_polarity: Optional[float] = None,
|
| 356 |
+
non_neutral_boost: float = 1.35,
|
| 357 |
+
soft_neutral_polarity_threshold: float = 0.12,
|
| 358 |
+
soft_neutral_max_mag: float = 0.25,
|
| 359 |
+
soft_neutral_scale: float = 0.8,
|
| 360 |
+
return_metadata: bool = False,
|
| 361 |
+
) -> float | tuple[float, bool]:
|
| 362 |
+
"""Compute final hybrid impact score in [-1, 1] with boosted non-neutral and soft-neutral rules."""
|
| 363 |
normalized_label = str(label).upper().strip()
|
| 364 |
+
if normalized_label not in LLM_LABELS:
|
| 365 |
+
normalized_label = "NEUTRAL"
|
| 366 |
|
|
|
|
| 367 |
confidence = max(0.0, min(1.0, float(llm_confidence)))
|
| 368 |
strength = max(0.0, min(1.0, float(finbert_strength)))
|
| 369 |
+
polarity_value = float(finbert_polarity) if finbert_polarity is not None else 0.0
|
| 370 |
+
polarity = max(-1.0, min(1.0, polarity_value))
|
| 371 |
+
soft_neutral_applied = False
|
| 372 |
+
|
| 373 |
+
if normalized_label == "NEUTRAL":
|
| 374 |
+
abs_polarity = abs(polarity)
|
| 375 |
+
if abs_polarity < max(0.0, float(soft_neutral_polarity_threshold)):
|
| 376 |
+
final_score = 0.0
|
| 377 |
+
else:
|
| 378 |
+
neutral_core = (0.6 * abs_polarity) + (0.4 * strength)
|
| 379 |
+
neutral_mag = min(
|
| 380 |
+
max(0.0, float(soft_neutral_max_mag)),
|
| 381 |
+
neutral_core * max(0.0, float(soft_neutral_scale)),
|
| 382 |
+
)
|
| 383 |
+
sign = 1.0 if polarity > 0 else -1.0
|
| 384 |
+
final_score = sign * neutral_mag
|
| 385 |
+
soft_neutral_applied = True
|
| 386 |
+
|
| 387 |
+
if return_metadata:
|
| 388 |
+
return final_score, soft_neutral_applied
|
| 389 |
+
return final_score
|
| 390 |
+
|
| 391 |
+
sign = 1.0 if normalized_label == "BULLISH" else -1.0
|
| 392 |
+
base_mag = max(0.0, min(1.0, (0.7 * confidence) + (0.3 * strength)))
|
| 393 |
+
boosted_mag = min(1.0, base_mag * max(0.0, float(non_neutral_boost)))
|
| 394 |
+
final_score = sign * boosted_mag
|
| 395 |
+
if return_metadata:
|
| 396 |
+
return final_score, soft_neutral_applied
|
| 397 |
+
return final_score
|
| 398 |
|
| 399 |
|
| 400 |
def _extract_chat_message_content(data: dict[str, Any]) -> str:
|
|
|
|
| 702 |
Strategy:
|
| 703 |
- Primary direction: OpenRouter LLM label + confidence
|
| 704 |
- Intensity: FinBERT probabilities for every article
|
| 705 |
+
- Non-neutral boost: (0.7*llm_conf + 0.3*finbert_strength) * boost
|
| 706 |
+
- Soft neutral: NEUTRAL labels can emit small directional score from FinBERT polarity
|
| 707 |
- Chunk size: 12 articles for lower free-tier rate-limit pressure
|
| 708 |
- Run budget: cap LLM-scored articles per run, overflow uses FinBERT
|
| 709 |
|
|
|
|
| 728 |
total_chunks = (len(unscored) + chunk_size - 1) // chunk_size
|
| 729 |
llm_model = settings.resolved_scoring_model
|
| 730 |
llm_budget_remaining = max(0, settings.max_llm_articles_per_run)
|
| 731 |
+
non_neutral_boost = float(getattr(settings, "sentiment_non_neutral_boost", 1.35))
|
| 732 |
+
soft_neutral_polarity_threshold = float(
|
| 733 |
+
getattr(settings, "sentiment_soft_neutral_polarity_threshold", 0.12)
|
| 734 |
+
)
|
| 735 |
+
soft_neutral_max_mag = float(getattr(settings, "sentiment_soft_neutral_max_mag", 0.25))
|
| 736 |
+
soft_neutral_scale = float(getattr(settings, "sentiment_soft_neutral_scale", 0.8))
|
| 737 |
budget_exhausted_logged = False
|
| 738 |
logger.info("LLM scoring budget for this run: %s articles", llm_budget_remaining)
|
| 739 |
llm_success = 0
|
|
|
|
| 760 |
non_llm_candidates = chunk
|
| 761 |
if settings.openrouter_api_key and llm_budget_remaining <= 0 and not budget_exhausted_logged:
|
| 762 |
logger.info(
|
| 763 |
+
"LLM budget exhausted (%s articles). Remaining chunks use soft-neutral FinBERT fallback.",
|
| 764 |
settings.max_llm_articles_per_run,
|
| 765 |
)
|
| 766 |
budget_exhausted_logged = True
|
|
|
|
| 849 |
label = "NEUTRAL"
|
| 850 |
|
| 851 |
llm_confidence = float(llm_result.get("llm_confidence", 0.0))
|
| 852 |
+
finbert_polarity = float(finbert["prob_positive"]) - float(finbert["prob_negative"])
|
| 853 |
finbert_strength = float(
|
| 854 |
finbert.get(
|
| 855 |
"finbert_strength",
|
| 856 |
+
abs(finbert_polarity),
|
| 857 |
)
|
| 858 |
)
|
| 859 |
+
final_score, soft_neutral_applied = _compute_hybrid_score(
|
| 860 |
label=label,
|
| 861 |
llm_confidence=llm_confidence,
|
| 862 |
finbert_strength=finbert_strength,
|
| 863 |
+
finbert_polarity=finbert_polarity,
|
| 864 |
+
non_neutral_boost=non_neutral_boost,
|
| 865 |
+
soft_neutral_polarity_threshold=soft_neutral_polarity_threshold,
|
| 866 |
+
soft_neutral_max_mag=soft_neutral_max_mag,
|
| 867 |
+
soft_neutral_scale=soft_neutral_scale,
|
| 868 |
+
return_metadata=True,
|
| 869 |
)
|
| 870 |
|
| 871 |
reasoning_payload = _build_hybrid_reasoning_payload(
|
| 872 |
label=label,
|
| 873 |
llm_confidence=llm_confidence,
|
| 874 |
finbert_strength=finbert_strength,
|
| 875 |
+
finbert_polarity=finbert_polarity,
|
| 876 |
llm_reasoning=llm_result.get("llm_reasoning", ""),
|
| 877 |
llm_model=llm_result.get("llm_model", llm_model),
|
| 878 |
+
soft_neutral_applied=soft_neutral_applied,
|
| 879 |
)
|
| 880 |
|
| 881 |
sentiment = NewsSentiment(
|
|
|
|
| 1421 |
action="store_true",
|
| 1422 |
help="Only run XGBoost training"
|
| 1423 |
)
|
| 1424 |
+
parser.add_argument(
|
| 1425 |
+
"--refresh-sentiment",
|
| 1426 |
+
action="store_true",
|
| 1427 |
+
help="Run sentiment scoring + daily aggregation (no training)"
|
| 1428 |
+
)
|
| 1429 |
parser.add_argument(
|
| 1430 |
"--target-symbol",
|
| 1431 |
type=str,
|
|
|
|
| 1449 |
logging.getLogger().setLevel(logging.DEBUG)
|
| 1450 |
|
| 1451 |
# Determine what to run
|
| 1452 |
+
score = args.run_all or args.score_only or args.refresh_sentiment
|
| 1453 |
+
aggregate = args.run_all or args.aggregate_only or args.refresh_sentiment
|
| 1454 |
train = args.run_all or args.train_only
|
| 1455 |
|
| 1456 |
if not (score or aggregate or train):
|
app/inference.py
CHANGED
|
@@ -263,6 +263,74 @@ def get_sentiment_label(sentiment_index: float) -> str:
|
|
| 263 |
return "Neutral"
|
| 264 |
|
| 265 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
def build_features_for_prediction(
|
| 267 |
session: Session,
|
| 268 |
target_symbol: str,
|
|
@@ -431,19 +499,41 @@ def generate_analysis_report(
|
|
| 431 |
|
| 432 |
logger.info(f"Model prediction: raw_output={model_output:.6f}, target_type={target_type}")
|
| 433 |
|
| 434 |
-
# Compute
|
| 435 |
if target_type == "simple_return":
|
| 436 |
-
|
| 437 |
-
predicted_price = baseline_price * (1 + predicted_return)
|
| 438 |
elif target_type == "log_return":
|
| 439 |
import math
|
| 440 |
-
|
| 441 |
-
predicted_price = baseline_price * math.exp(model_output)
|
| 442 |
elif target_type == "price":
|
| 443 |
-
|
| 444 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
|
| 446 |
-
# Validate prediction
|
| 447 |
prediction_invalid = False
|
| 448 |
if predicted_return < -1.0:
|
| 449 |
logger.error(f"Invalid prediction: return {predicted_return:.4f} < -100%")
|
|
@@ -460,9 +550,6 @@ def generate_analysis_report(
|
|
| 460 |
session, target_symbol, predicted_price
|
| 461 |
)
|
| 462 |
|
| 463 |
-
# Get data quality
|
| 464 |
-
data_quality = get_data_quality_stats(session, target_symbol)
|
| 465 |
-
|
| 466 |
# Build influencer descriptions
|
| 467 |
descriptions = get_feature_descriptions()
|
| 468 |
top_influencers = []
|
|
@@ -493,6 +580,10 @@ def generate_analysis_report(
|
|
| 493 |
"baseline_price": round(baseline_price, 4),
|
| 494 |
"baseline_price_date": baseline_price_date,
|
| 495 |
"predicted_return": round(predicted_return, 6),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
"predicted_return_pct": round(predicted_return * 100, 2),
|
| 497 |
"predicted_price": round(predicted_price, 4),
|
| 498 |
"target_type": target_type,
|
|
|
|
| 263 |
return "Neutral"
|
| 264 |
|
| 265 |
|
| 266 |
+
def _sign(value: float) -> int:
|
| 267 |
+
"""Return numeric sign (-1, 0, 1)."""
|
| 268 |
+
if value > 0:
|
| 269 |
+
return 1
|
| 270 |
+
if value < 0:
|
| 271 |
+
return -1
|
| 272 |
+
return 0
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
def _clamp(value: float, lower: float, upper: float) -> float:
|
| 276 |
+
"""Clamp value to [lower, upper]."""
|
| 277 |
+
return max(lower, min(upper, value))
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
def _apply_sentiment_adjustment(
|
| 281 |
+
raw_predicted_return: float,
|
| 282 |
+
sentiment_index: float,
|
| 283 |
+
news_count_7d: int,
|
| 284 |
+
) -> tuple[float, float, bool, bool]:
|
| 285 |
+
"""
|
| 286 |
+
Apply aggressive-but-capped sentiment multiplier to raw predicted return.
|
| 287 |
+
|
| 288 |
+
Returns:
|
| 289 |
+
(adjusted_return, multiplier, adjustment_applied, capped)
|
| 290 |
+
"""
|
| 291 |
+
settings = get_settings()
|
| 292 |
+
news_ref = max(1.0, float(settings.inference_sentiment_news_ref))
|
| 293 |
+
power_ref = max(1e-6, float(settings.inference_sentiment_power_ref))
|
| 294 |
+
news_floor = max(1, int(round(news_ref * 0.4))) # default: 12 when ref is 30
|
| 295 |
+
|
| 296 |
+
news_intensity = min(1.0, max(0.0, float(news_count_7d) / news_ref))
|
| 297 |
+
sentiment_power = float(np.tanh(abs(float(sentiment_index)) / power_ref))
|
| 298 |
+
|
| 299 |
+
raw_sign = _sign(float(raw_predicted_return))
|
| 300 |
+
sentiment_sign = _sign(float(sentiment_index))
|
| 301 |
+
direction = 1.0 if raw_sign == 0 or raw_sign == sentiment_sign else -1.0
|
| 302 |
+
|
| 303 |
+
multiplier = 1.0 + (direction * sentiment_power * news_intensity)
|
| 304 |
+
multiplier = _clamp(
|
| 305 |
+
multiplier,
|
| 306 |
+
float(settings.inference_sentiment_multiplier_min),
|
| 307 |
+
float(settings.inference_sentiment_multiplier_max),
|
| 308 |
+
)
|
| 309 |
+
|
| 310 |
+
use_tiny_floor = (
|
| 311 |
+
abs(float(raw_predicted_return)) < float(settings.inference_tiny_signal_threshold)
|
| 312 |
+
and abs(float(sentiment_index)) >= power_ref
|
| 313 |
+
and int(news_count_7d) >= news_floor
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
if use_tiny_floor:
|
| 317 |
+
adjusted_return = float(sentiment_sign) * float(settings.inference_tiny_signal_floor)
|
| 318 |
+
else:
|
| 319 |
+
adjusted_return = float(raw_predicted_return) * multiplier
|
| 320 |
+
|
| 321 |
+
cap = abs(float(settings.inference_return_cap))
|
| 322 |
+
capped = False
|
| 323 |
+
if adjusted_return > cap:
|
| 324 |
+
adjusted_return = cap
|
| 325 |
+
capped = True
|
| 326 |
+
elif adjusted_return < -cap:
|
| 327 |
+
adjusted_return = -cap
|
| 328 |
+
capped = True
|
| 329 |
+
|
| 330 |
+
adjustment_applied = use_tiny_floor or capped or abs(multiplier - 1.0) > 1e-9
|
| 331 |
+
return adjusted_return, multiplier, adjustment_applied, capped
|
| 332 |
+
|
| 333 |
+
|
| 334 |
def build_features_for_prediction(
|
| 335 |
session: Session,
|
| 336 |
target_symbol: str,
|
|
|
|
| 499 |
|
| 500 |
logger.info(f"Model prediction: raw_output={model_output:.6f}, target_type={target_type}")
|
| 501 |
|
| 502 |
+
# Compute raw predicted return based on target_type
|
| 503 |
if target_type == "simple_return":
|
| 504 |
+
raw_predicted_return = model_output
|
|
|
|
| 505 |
elif target_type == "log_return":
|
| 506 |
import math
|
| 507 |
+
raw_predicted_return = math.exp(model_output) - 1
|
|
|
|
| 508 |
elif target_type == "price":
|
| 509 |
+
raw_predicted_return = (model_output / baseline_price) - 1 if baseline_price > 0 else 0
|
| 510 |
+
else:
|
| 511 |
+
raw_predicted_return = 0.0
|
| 512 |
+
|
| 513 |
+
# Data quality feeds sentiment multiplier intensity.
|
| 514 |
+
data_quality = get_data_quality_stats(session, target_symbol)
|
| 515 |
+
news_count_7d = int(data_quality.get("news_count_7d") or 0)
|
| 516 |
+
|
| 517 |
+
predicted_return, sentiment_multiplier, adjustment_applied, predicted_return_capped = (
|
| 518 |
+
_apply_sentiment_adjustment(
|
| 519 |
+
raw_predicted_return=float(raw_predicted_return),
|
| 520 |
+
sentiment_index=float(current_sentiment),
|
| 521 |
+
news_count_7d=news_count_7d,
|
| 522 |
+
)
|
| 523 |
+
)
|
| 524 |
+
logger.info(
|
| 525 |
+
"Sentiment adjustment: raw=%.6f adjusted=%.6f multiplier=%.4f applied=%s capped=%s news_count_7d=%s sentiment=%.4f",
|
| 526 |
+
raw_predicted_return,
|
| 527 |
+
predicted_return,
|
| 528 |
+
sentiment_multiplier,
|
| 529 |
+
adjustment_applied,
|
| 530 |
+
predicted_return_capped,
|
| 531 |
+
news_count_7d,
|
| 532 |
+
current_sentiment,
|
| 533 |
+
)
|
| 534 |
+
predicted_price = baseline_price * (1 + predicted_return)
|
| 535 |
|
| 536 |
+
# Validate prediction after sentiment adjustment/cap.
|
| 537 |
prediction_invalid = False
|
| 538 |
if predicted_return < -1.0:
|
| 539 |
logger.error(f"Invalid prediction: return {predicted_return:.4f} < -100%")
|
|
|
|
| 550 |
session, target_symbol, predicted_price
|
| 551 |
)
|
| 552 |
|
|
|
|
|
|
|
|
|
|
| 553 |
# Build influencer descriptions
|
| 554 |
descriptions = get_feature_descriptions()
|
| 555 |
top_influencers = []
|
|
|
|
| 580 |
"baseline_price": round(baseline_price, 4),
|
| 581 |
"baseline_price_date": baseline_price_date,
|
| 582 |
"predicted_return": round(predicted_return, 6),
|
| 583 |
+
"raw_predicted_return": round(raw_predicted_return, 6),
|
| 584 |
+
"sentiment_multiplier": round(sentiment_multiplier, 4),
|
| 585 |
+
"sentiment_adjustment_applied": bool(adjustment_applied),
|
| 586 |
+
"predicted_return_capped": bool(predicted_return_capped),
|
| 587 |
"predicted_return_pct": round(predicted_return * 100, 2),
|
| 588 |
"predicted_price": round(predicted_price, 4),
|
| 589 |
"target_type": target_type,
|
app/schemas.py
CHANGED
|
@@ -35,6 +35,18 @@ class AnalysisReport(BaseModel):
|
|
| 35 |
# Core prediction data (nullable for degraded modes)
|
| 36 |
current_price: Optional[float] = Field(0.0, description="Most recent closing price")
|
| 37 |
predicted_return: Optional[float] = Field(0.0, description="Predicted next-day return")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
predicted_price: Optional[float] = Field(0.0, description="Predicted next-day price")
|
| 39 |
confidence_lower: Optional[float] = Field(0.0, description="Lower bound of confidence interval")
|
| 40 |
confidence_upper: Optional[float] = Field(0.0, description="Upper bound of confidence interval")
|
|
|
|
| 35 |
# Core prediction data (nullable for degraded modes)
|
| 36 |
current_price: Optional[float] = Field(0.0, description="Most recent closing price")
|
| 37 |
predicted_return: Optional[float] = Field(0.0, description="Predicted next-day return")
|
| 38 |
+
raw_predicted_return: Optional[float] = Field(
|
| 39 |
+
None, description="Raw model output converted to return before sentiment adjustment"
|
| 40 |
+
)
|
| 41 |
+
sentiment_multiplier: Optional[float] = Field(
|
| 42 |
+
None, description="Sentiment-driven multiplier applied to raw predicted return"
|
| 43 |
+
)
|
| 44 |
+
sentiment_adjustment_applied: Optional[bool] = Field(
|
| 45 |
+
None, description="Whether sentiment adjustment layer altered predicted return"
|
| 46 |
+
)
|
| 47 |
+
predicted_return_capped: Optional[bool] = Field(
|
| 48 |
+
None, description="Whether final predicted return was clipped by safety cap"
|
| 49 |
+
)
|
| 50 |
predicted_price: Optional[float] = Field(0.0, description="Predicted next-day price")
|
| 51 |
confidence_lower: Optional[float] = Field(0.0, description="Lower bound of confidence interval")
|
| 52 |
confidence_upper: Optional[float] = Field(0.0, description="Upper bound of confidence interval")
|
app/settings.py
CHANGED
|
@@ -49,6 +49,10 @@ class Settings(BaseSettings):
|
|
| 49 |
# Sentiment aggregation
|
| 50 |
sentiment_tau_hours: float = 12.0
|
| 51 |
sentiment_missing_fill: float = 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
# API settings
|
| 54 |
analysis_ttl_minutes: int = 30
|
|
@@ -81,6 +85,15 @@ class Settings(BaseSettings):
|
|
| 81 |
|
| 82 |
# Twelve Data (Live Price)
|
| 83 |
twelvedata_api_key: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
# LLM Sentiment Analysis
|
| 86 |
# Deprecated - kept for backward compatibility
|
|
|
|
| 49 |
# Sentiment aggregation
|
| 50 |
sentiment_tau_hours: float = 12.0
|
| 51 |
sentiment_missing_fill: float = 0.0
|
| 52 |
+
sentiment_non_neutral_boost: float = 1.35
|
| 53 |
+
sentiment_soft_neutral_polarity_threshold: float = 0.12
|
| 54 |
+
sentiment_soft_neutral_max_mag: float = 0.25
|
| 55 |
+
sentiment_soft_neutral_scale: float = 0.8
|
| 56 |
|
| 57 |
# API settings
|
| 58 |
analysis_ttl_minutes: int = 30
|
|
|
|
| 85 |
|
| 86 |
# Twelve Data (Live Price)
|
| 87 |
twelvedata_api_key: Optional[str] = None
|
| 88 |
+
|
| 89 |
+
# Inference sentiment adjustment (aggressive but capped)
|
| 90 |
+
inference_sentiment_multiplier_max: float = 2.0
|
| 91 |
+
inference_sentiment_multiplier_min: float = 0.5
|
| 92 |
+
inference_sentiment_news_ref: int = 30
|
| 93 |
+
inference_sentiment_power_ref: float = 0.20
|
| 94 |
+
inference_tiny_signal_threshold: float = 0.0015
|
| 95 |
+
inference_tiny_signal_floor: float = 0.0025
|
| 96 |
+
inference_return_cap: float = 0.02
|
| 97 |
|
| 98 |
# LLM Sentiment Analysis
|
| 99 |
# Deprecated - kept for backward compatibility
|