Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -96,60 +96,48 @@ LEGAL_KEYWORDS = ["article","law","contract","clause","jurisdiction","court",
|
|
| 96 |
ACADEMIC_KEYWORDS = ["abstract","methodology","hypothesis","conclusion","references","doi","journal"]
|
| 97 |
|
| 98 |
# ============================================================
|
| 99 |
-
#
|
| 100 |
# ============================================================
|
| 101 |
ECON_POSITIVE = [
|
| 102 |
-
# English
|
| 103 |
"growth","recovery","surplus","improvement","stability","increase",
|
| 104 |
"expansion","acceleration","resilience","upturn","robust","favorable",
|
| 105 |
"strengthened","progress","rebound","optimistic","confidence","boom",
|
| 106 |
"prosper","thrive","advance","gain","rise","positive","upward",
|
| 107 |
"exceed","outperform","strong","healthy","dynamic","sustainable",
|
| 108 |
-
# French
|
| 109 |
"croissance","reprise","amélioration","stabilité","excédent","hausse",
|
| 110 |
"expansion","dynamique","favorable","progrès","rebond","solide",
|
| 111 |
-
# Arabic
|
| 112 |
"تعافي","نمو","استقرار","فائض","تحسّن","ارتفاع","توسع","إيجابي",
|
| 113 |
"تقدم","قوي","ازدهار","انتعاش","تحسين","قوة",
|
| 114 |
]
|
| 115 |
ECON_NEGATIVE = [
|
| 116 |
-
# English
|
| 117 |
"deficit","recession","inflation","decline","contraction","debt",
|
| 118 |
"crisis","deterioration","slowdown","downturn","unemployment","pressure",
|
| 119 |
"risk","vulnerability","shock","uncertainty","war","sanctions",
|
| 120 |
"drought","collapse","default","volatile","instability","weak",
|
| 121 |
"fragile","pessimistic","loss","shrink","fall","negative","downward",
|
| 122 |
"slump","stagnation","turbulence","disruption","imbalance","burden",
|
| 123 |
-
# French
|
| 124 |
"déficit","récession","crise","ralentissement","chômage","incertitude",
|
| 125 |
"guerre","effondrement","instabilité","baisse","fragilité","pression",
|
| 126 |
-
# Arabic
|
| 127 |
"عجز","تضخم","ركود","انكماش","أزمة","تدهور","بطالة","انخفاض",
|
| 128 |
"ضغط","مخاطر","صدمة","عدم استقرار","هشاشة","ديون","عقوبات",
|
| 129 |
]
|
| 130 |
|
| 131 |
-
#
|
| 132 |
ECON_TRIGGER = [
|
| 133 |
-
# سلبي
|
| 134 |
"deficit","risk","crisis","recession","shock","uncertainty",
|
| 135 |
"slowdown","pressure","vulnerable","weak","deteriorat","downturn",
|
| 136 |
"contraction","debt","unemployment","inflation","collapse","volatile",
|
| 137 |
"instability","fragile","stagnation","disruption","sanctions","drought",
|
| 138 |
-
# إيجابي
|
| 139 |
"growth","recovery","improvement","surplus","stable","expansion",
|
| 140 |
"resilience","rebound","strengthened","acceleration","robust",
|
| 141 |
"favorable","progress","increase","upturn","confidence","boom",
|
| 142 |
-
# مؤشرات محايدة
|
| 143 |
"gdp","forecast","outlook","trade","fiscal","monetary","exchange",
|
| 144 |
"interest","budget","revenue","expenditure","policy","reform",
|
| 145 |
-
# عربي
|
| 146 |
"التضخم","الناتج","النمو","العجز","المخاطر","التوقعات","الميزانية",
|
| 147 |
-
# فرنسي
|
| 148 |
"croissance","déficit","récession","prévision","taux","politique",
|
| 149 |
]
|
| 150 |
|
| 151 |
def economic_lexicon_score(text: str) -> float:
|
| 152 |
-
"""Loughran-McDonald Extended Lexicon → [-1, +1]"""
|
| 153 |
text_lower = text.lower()
|
| 154 |
pos = sum(1 for w in ECON_POSITIVE if w in text_lower)
|
| 155 |
neg = sum(1 for w in ECON_NEGATIVE if w in text_lower)
|
|
@@ -182,11 +170,10 @@ def detect_document_type(texts: list) -> dict:
|
|
| 182 |
}
|
| 183 |
|
| 184 |
# ============================================================
|
| 185 |
-
#
|
| 186 |
# ============================================================
|
| 187 |
WEIGHTS = {"finbert": 0.40, "xlm": 0.30, "lexicon": 0.30}
|
| 188 |
|
| 189 |
-
# — FinBERT (Financial Text Expert) —
|
| 190 |
print("⏳ Loading FinBERT (ProsusAI)...")
|
| 191 |
try:
|
| 192 |
finbert_pipe = pipeline(
|
|
@@ -203,7 +190,6 @@ except Exception as e:
|
|
| 203 |
finbert_pipe = None
|
| 204 |
FINBERT_OK = False
|
| 205 |
|
| 206 |
-
# — XLM-RoBERTa (Multilingual) —
|
| 207 |
print("⏳ Loading XLM-RoBERTa...")
|
| 208 |
try:
|
| 209 |
xlm_pipe = pipeline(
|
|
@@ -226,7 +212,6 @@ def normalize_clf(raw):
|
|
| 226 |
return raw if isinstance(raw, list) else [raw]
|
| 227 |
|
| 228 |
def clf_finbert(text: str) -> float:
|
| 229 |
-
"""FinBERT → [-1, +1] | labels: positive / negative / neutral"""
|
| 230 |
if not FINBERT_OK or finbert_pipe is None:
|
| 231 |
return 0.0
|
| 232 |
try:
|
|
@@ -238,13 +223,11 @@ def clf_finbert(text: str) -> float:
|
|
| 238 |
return 0.0
|
| 239 |
|
| 240 |
def clf_xlm(text: str) -> float:
|
| 241 |
-
"""XLM-RoBERTa → [-1, +1] | labels: LABEL_0/1/2 or positive/neutral/negative"""
|
| 242 |
if not XLM_OK or xlm_pipe is None:
|
| 243 |
return 0.0
|
| 244 |
try:
|
| 245 |
items = normalize_clf(xlm_pipe(text[:512]))
|
| 246 |
d = {r["label"]: float(r["score"]) for r in items}
|
| 247 |
-
# XLM labels: LABEL_0=neg, LABEL_1=neu, LABEL_2=pos
|
| 248 |
pos = d.get("LABEL_2", d.get("positive", d.get("Positive", 0.0)))
|
| 249 |
neg = d.get("LABEL_0", d.get("negative", d.get("Negative", 0.0)))
|
| 250 |
return round(pos - neg, 4)
|
|
@@ -254,10 +237,8 @@ def clf_xlm(text: str) -> float:
|
|
| 254 |
|
| 255 |
def sentiment_score_numeric(text: str) -> float:
|
| 256 |
"""
|
| 257 |
-
|
| 258 |
-
40% FinBERT
|
| 259 |
-
+ 30% XLM-RoBERTa (multilingual: AR/FR/EN)
|
| 260 |
-
+ 30% Loughran-McDonald Lexicon (economic terms)
|
| 261 |
→ [-1, +1]
|
| 262 |
"""
|
| 263 |
fb = clf_finbert(text)
|
|
@@ -271,7 +252,6 @@ def sentiment_score_numeric(text: str) -> float:
|
|
| 271 |
)
|
| 272 |
|
| 273 |
def run_sentiment(text: str):
|
| 274 |
-
"""Ensemble sentiment → (label_str, confidence)"""
|
| 275 |
score = sentiment_score_numeric(text)
|
| 276 |
if score > 0.05:
|
| 277 |
sent = "Positive 😊"
|
|
@@ -282,7 +262,6 @@ def run_sentiment(text: str):
|
|
| 282 |
return sent, round(min(abs(score), 1.0), 4)
|
| 283 |
|
| 284 |
def run_sentiment_detailed(text: str) -> str:
|
| 285 |
-
"""Detailed breakdown of each model's contribution"""
|
| 286 |
fb = clf_finbert(text)
|
| 287 |
xlm = clf_xlm(text)
|
| 288 |
lex = economic_lexicon_score(text)
|
|
@@ -477,7 +456,7 @@ def build_index(files):
|
|
| 477 |
for fname, info in PER_FILE_INFO.items():
|
| 478 |
n = sum(1 for m in KB_META if m["name"] == fname)
|
| 479 |
yr = str(info.get("year","N/A"))
|
| 480 |
-
yrb = f"{yr} ✅" if yr not in ["None","N/A"
|
| 481 |
badge = " 🟢" if info["is_economic"] else ""
|
| 482 |
tbl += f"| `{fname}` | {yrb} | {info['type']}{badge} | {info['confidence']:.0%} | {n} |\n"
|
| 483 |
|
|
@@ -560,31 +539,21 @@ def rag_retrieve(query, k=5, top_n=3):
|
|
| 560 |
return []
|
| 561 |
|
| 562 |
# ============================================================
|
| 563 |
-
#
|
| 564 |
# ============================================================
|
| 565 |
def get_economic_chunks(texts: list, max_chunks: int = 40) -> list:
|
| 566 |
-
"""
|
| 567 |
-
Smart sampler:
|
| 568 |
-
1) فلتر chunks اقتصادية (ECON_TRIGGER)
|
| 569 |
-
2) عيّنة من بداية + وسط + نهاية التقرير
|
| 570 |
-
3) حد أقصى max_chunks
|
| 571 |
-
"""
|
| 572 |
n = len(texts)
|
| 573 |
econ = [t for t in texts if any(kw in t.lower() for kw in ECON_TRIGGER)]
|
| 574 |
-
|
| 575 |
if len(econ) < 10:
|
| 576 |
start = texts[:min(10, n)]
|
| 577 |
mid = texts[n//2-5 : n//2+5] if n > 20 else []
|
| 578 |
end = texts[-min(10, n):]
|
| 579 |
econ = list(dict.fromkeys(start + mid + end))
|
| 580 |
-
|
| 581 |
-
# عيّنة منتظمة من كل التقرير
|
| 582 |
if len(econ) > max_chunks:
|
| 583 |
step = max(1, len(econ) // max_chunks)
|
| 584 |
sample = econ[::step][:max_chunks]
|
| 585 |
else:
|
| 586 |
sample = econ
|
| 587 |
-
|
| 588 |
return sample
|
| 589 |
|
| 590 |
# ============================================================
|
|
@@ -629,7 +598,6 @@ def smart_answer(question, history):
|
|
| 629 |
rag_context = rag_context[:2000]
|
| 630 |
has_good_rag = bool(results) and results[0]["sem"] >= 0.25
|
| 631 |
answer_text = llm_groq(question, rag_context, history, lang)
|
| 632 |
-
|
| 633 |
if has_good_rag:
|
| 634 |
src = ", ".join(f"`{r['file']}` p.{r['page']}" for r in results)
|
| 635 |
badge = f"\n\n📄 **{'المصدر' if lang=='ar' else 'Source'}:** {src}"
|
|
@@ -646,7 +614,6 @@ def smart_answer(question, history):
|
|
| 646 |
def predict_with_rag(text):
|
| 647 |
text = "" if text is None else str(text).strip()
|
| 648 |
if not text: raise gr.Error("⚠️ Enter text first.")
|
| 649 |
-
|
| 650 |
lang = detect_lang(text)
|
| 651 |
qterms = [t for t in re.findall(r"\w+", text.lower()) if len(t)>2]
|
| 652 |
|
|
@@ -680,10 +647,9 @@ def predict_with_rag(text):
|
|
| 680 |
flag = "🇸🇦" if h["lang"]=="ar" else "🇺🇸"
|
| 681 |
md += f"- 🔑 **`{h['word']}`** → 📄 `{h['file']}` p.{h['page']} {flag}\n\n > {h['sentence']}\n\n"
|
| 682 |
|
| 683 |
-
|
| 684 |
-
detail = run_sentiment_detailed(text)
|
| 685 |
sent, conf = run_sentiment(text)
|
| 686 |
-
md
|
| 687 |
|
| 688 |
md += "---\n## 📍 Exact Location\n\n"
|
| 689 |
seen2 = set()
|
|
@@ -741,7 +707,7 @@ def get_worldbank_data(country_code, indicator, start_year, end_year):
|
|
| 741 |
return pd.DataFrame()
|
| 742 |
|
| 743 |
# ============================================================
|
| 744 |
-
#
|
| 745 |
# ============================================================
|
| 746 |
def build_doc_sentiment_index():
|
| 747 |
if not KB_TEXTS or not KB_META: return None, None
|
|
@@ -828,7 +794,7 @@ def run_economic_forecast(country_code, target_var, start_year, end_year):
|
|
| 828 |
global_mean = float(df_yearly["sentiment"].mean())
|
| 829 |
merged["sentiment"] = merged["sentiment"].fillna(global_mean)
|
| 830 |
has_yearly = True
|
| 831 |
-
mode_msg = "✅ **Yearly Ensemble Sentiment** (FinBERT+XLM+Lexicon)"
|
| 832 |
else:
|
| 833 |
global_sent = (
|
| 834 |
float(pd.to_numeric(df_files["sentiment"], errors="coerce").mean())
|
|
@@ -839,13 +805,14 @@ def run_economic_forecast(country_code, target_var, start_year, end_year):
|
|
| 839 |
has_yearly = False
|
| 840 |
mode_msg = "⚠️ **Global Sentiment** — rename files like `WEO_2020.pdf`"
|
| 841 |
|
| 842 |
-
# ── 4) ✅
|
|
|
|
| 843 |
if merged["sentiment"].std() > 1e-6:
|
| 844 |
-
scaler = MinMaxScaler(feature_range=(-
|
| 845 |
merged["sentiment"] = scaler.fit_transform(
|
| 846 |
merged["sentiment"].values.reshape(-1, 1)
|
| 847 |
).flatten().round(4)
|
| 848 |
-
print(f"Sentiment normalized: {dict(zip(merged['year'], merged['sentiment']))}")
|
| 849 |
|
| 850 |
# ── 5) Train / Test ───────────────────────────────────────
|
| 851 |
series = merged["value"].values.astype(float)
|
|
@@ -890,9 +857,12 @@ def run_economic_forecast(country_code, target_var, start_year, end_year):
|
|
| 890 |
# Panel 1 — Forecast
|
| 891 |
ax1 = axes[0]
|
| 892 |
ax1.plot(years, series, "o-", color="#2196F3", label="Actual", lw=2, ms=5)
|
| 893 |
-
ax1.plot(test_years, pred_arima, "s--", color="#FF5722",
|
| 894 |
-
|
| 895 |
-
ax1.
|
|
|
|
|
|
|
|
|
|
| 896 |
ax1.set_title(
|
| 897 |
f"📈 {target_var} — {country_code} "
|
| 898 |
f"({'Yearly' if has_yearly else 'Global'} Ensemble Sentiment)",
|
|
@@ -904,15 +874,18 @@ def run_economic_forecast(country_code, target_var, start_year, end_year):
|
|
| 904 |
# Panel 2 — Ensemble Sentiment Timeline
|
| 905 |
ax2 = axes[1]
|
| 906 |
s_vals = merged["sentiment"].values
|
| 907 |
-
s_clrs = ["#4CAF50" if s>0.05 else "#FF5722" if s<-0.05 else "#FFC107"
|
|
|
|
| 908 |
ax2.bar(years, s_vals, color=s_clrs, edgecolor="white", width=0.6)
|
| 909 |
ax2.axhline(y=0, color="black", lw=0.8)
|
| 910 |
ax2.set_title(
|
| 911 |
-
f"📊 Ensemble Sentiment Index
|
| 912 |
-
f"
|
|
|
|
| 913 |
fontsize=10, fontweight="bold"
|
| 914 |
)
|
| 915 |
-
ax2.set_xlabel("Year")
|
|
|
|
| 916 |
ax2.grid(True, alpha=0.3, axis="y")
|
| 917 |
ax2.legend(handles=[
|
| 918 |
Patch(facecolor="#4CAF50", label="Optimistic (>0.05)"),
|
|
@@ -920,21 +893,25 @@ def run_economic_forecast(country_code, target_var, start_year, end_year):
|
|
| 920 |
Patch(facecolor="#FF5722", label="Pessimistic (<-0.05)"),
|
| 921 |
], loc="upper right", fontsize=8)
|
| 922 |
|
| 923 |
-
# Panel 3 — RMSE
|
| 924 |
ax3 = axes[2]
|
| 925 |
clrs = ["#FF5722" if rmse_a <= rmse_s else "#4CAF50",
|
| 926 |
"#4CAF50" if rmse_s <= rmse_a else "#FF5722"]
|
| 927 |
bars = ax3.bar(
|
| 928 |
["ARIMA(1,1,1)", "SARIMAX\n+Ensemble"],
|
| 929 |
-
[rmse_a, rmse_s],
|
|
|
|
| 930 |
)
|
| 931 |
for bar, val in zip(bars, [rmse_a, rmse_s]):
|
| 932 |
ax3.text(
|
| 933 |
-
bar.get_x()+bar.get_width()/2,
|
| 934 |
-
|
|
|
|
|
|
|
| 935 |
)
|
| 936 |
ax3.set_title("RMSE Comparison (lower = better)", fontsize=11)
|
| 937 |
-
ax3.set_ylabel("RMSE")
|
|
|
|
| 938 |
|
| 939 |
plt.tight_layout(pad=3.0)
|
| 940 |
img_path = "/tmp/forecast_plot.png"
|
|
@@ -945,15 +922,18 @@ def run_economic_forecast(country_code, target_var, start_year, end_year):
|
|
| 945 |
sent_table = ""
|
| 946 |
if df_files is not None and len(df_files) > 0:
|
| 947 |
sent_table = "\n---\n### 📄 Ensemble Sentiment per File\n\n"
|
| 948 |
-
sent_table += "| 📄 File | 📅 Year | 😊 Score | 📦 Chunks | Label |\n
|
|
|
|
| 949 |
for _, row in df_files.iterrows():
|
| 950 |
-
yrb = f"{row['year']} ✅"
|
|
|
|
| 951 |
sent_table += (
|
| 952 |
f"| `{row['file']}` | {yrb} "
|
| 953 |
f"| `{row['sentiment']:+.4f}` "
|
| 954 |
f"| {row['n_chunks']} | {row['label']} |\n"
|
| 955 |
)
|
| 956 |
|
|
|
|
| 957 |
arrow = "✅ Improved" if impr_rmse > 0 else "❌ No improvement"
|
| 958 |
result_md = (
|
| 959 |
f"## 📊 Forecast — Ensemble Sentiment\n\n"
|
|
@@ -966,9 +946,9 @@ def run_economic_forecast(country_code, target_var, start_year, end_year):
|
|
| 966 |
f"| 🧪 Test | **{len(test_y)}** samples |\n\n"
|
| 967 |
f"---\n### 🏆 Model Comparison\n\n"
|
| 968 |
f"| Model | RMSE | MAE | MAPE |\n|---|---|---|---|\n"
|
| 969 |
-
f"| ARIMA(1,1,1)
|
| 970 |
-
f"| SARIMAX+Ensemble
|
| 971 |
-
f"| **Improvement**
|
| 972 |
f"**{arrow}** by adding Ensemble Sentiment Index.\n"
|
| 973 |
f"{sent_table}"
|
| 974 |
)
|
|
@@ -1009,9 +989,11 @@ def get_stats():
|
|
| 1009 |
def get_top_keywords():
|
| 1010 |
if not KB_TEXTS: return "_No files indexed yet._"
|
| 1011 |
all_words = re.findall(r"\b\w{4,}\b", " ".join(KB_TEXTS).lower())
|
| 1012 |
-
stopwords = {
|
| 1013 |
-
|
| 1014 |
-
|
|
|
|
|
|
|
| 1015 |
top = Counter(w for w in all_words if w not in stopwords).most_common(20)
|
| 1016 |
return "### 🔑 Top Keywords\n\n" + "\n".join(f"- **{w}**: {c}" for w,c in top)
|
| 1017 |
|
|
@@ -1070,7 +1052,7 @@ with gr.Blocks(
|
|
| 1070 |
gr.Markdown(
|
| 1071 |
"**Upload PDF / DOCX / TXT / CSV**\n\n"
|
| 1072 |
"> 💡 Name files like `WEO_2020.pdf` — year in filename required!\n"
|
| 1073 |
-
"> ✅ Year must show ✅ not ⚠️
|
| 1074 |
)
|
| 1075 |
files = gr.File(label="📂 Files",
|
| 1076 |
file_types=[".pdf",".txt",".csv",".docx"],
|
|
@@ -1087,36 +1069,42 @@ with gr.Blocks(
|
|
| 1087 |
load_btn.click(load_saved_index, outputs=persist_status)
|
| 1088 |
|
| 1089 |
with gr.Tab("🎭 2 · Sentiment + Search"):
|
| 1090 |
-
inp = gr.Textbox(
|
| 1091 |
-
|
| 1092 |
-
|
|
|
|
|
|
|
| 1093 |
run_btn = gr.Button("🔍 Analyze", variant="primary")
|
| 1094 |
with gr.Row():
|
| 1095 |
out_sent = gr.Textbox(label="🎭 Ensemble Sentiment", interactive=False)
|
| 1096 |
out_conf = gr.Number(label="📊 Score", precision=4)
|
| 1097 |
out_full = gr.Markdown()
|
| 1098 |
-
run_btn.click(predict_with_rag,
|
|
|
|
|
|
|
| 1099 |
gr.Markdown("---")
|
| 1100 |
with gr.Row():
|
| 1101 |
dl_btn = gr.Button("⬇️ Download Report", variant="secondary")
|
| 1102 |
rep_file = gr.File(label="report.md")
|
| 1103 |
-
dl_btn.click(generate_report,
|
|
|
|
|
|
|
| 1104 |
|
| 1105 |
with gr.Tab("💬 3 · Smart Chatbot"):
|
| 1106 |
chatbot = gr.Chatbot(height=430, type="messages",
|
| 1107 |
placeholder="Ask anything… / اسأل أي شيء…")
|
| 1108 |
msg = gr.Textbox(placeholder="Question…", label="💬")
|
| 1109 |
with gr.Row():
|
| 1110 |
-
send_btn = gr.Button("Send ➤",
|
| 1111 |
-
clear_btn = gr.Button("🗑️ Clear",
|
| 1112 |
-
stats_btn = gr.Button("📊 Stats",
|
| 1113 |
stats_box = gr.Markdown(visible=False)
|
| 1114 |
with gr.Row():
|
| 1115 |
export_btn = gr.Button("💾 Export Chat", variant="secondary")
|
| 1116 |
export_file = gr.File(label="chat_history.txt")
|
| 1117 |
msg.submit(chat_text, inputs=[msg,chatbot], outputs=[msg,chatbot])
|
| 1118 |
send_btn.click(chat_text, inputs=[msg,chatbot], outputs=[msg,chatbot])
|
| 1119 |
-
clear_btn.click(lambda: ([],""), outputs=[chatbot,msg])
|
| 1120 |
stats_btn.click(
|
| 1121 |
lambda: (get_stats(), gr.update(visible=True)),
|
| 1122 |
outputs=[stats_box, stats_box]
|
|
@@ -1129,7 +1117,8 @@ with gr.Blocks(
|
|
| 1129 |
transcript = gr.Textbox(label="📝 Transcript", interactive=False)
|
| 1130 |
audio_out = gr.Audio(label="🔊 Answer", type="filepath")
|
| 1131 |
voice_btn.click(
|
| 1132 |
-
chat_voice,
|
|
|
|
| 1133 |
outputs=[chatbot, audio_out, transcript]
|
| 1134 |
)
|
| 1135 |
|
|
@@ -1150,7 +1139,7 @@ with gr.Blocks(
|
|
| 1150 |
"| 🏦 Sentiment 1 | **FinBERT** — ProsusAI (40%) |\n"
|
| 1151 |
"| 🌍 Sentiment 2 | **XLM-RoBERTa** — Cardiff NLP (30%) |\n"
|
| 1152 |
"| 📖 Sentiment 3 | **Loughran-McDonald Lexicon** (30%) |\n"
|
| 1153 |
-
"| ⚡ Ensemble | Weighted 40/30/30 +
|
| 1154 |
"| 🔍 Embeddings | paraphrase-multilingual-MiniLM-L12-v2 |\n"
|
| 1155 |
"| 📊 Reranker | cross-encoder/ms-marco-MiniLM-L-6-v2 |\n"
|
| 1156 |
"| 🤖 LLM | Llama-3.3-70B via Groq |\n"
|
|
@@ -1175,18 +1164,27 @@ with gr.Blocks(
|
|
| 1175 |
"2. **Build Index** → verify Year = ✅\n"
|
| 1176 |
"3. Select country + variable + year range\n"
|
| 1177 |
"4. **Run Forecast** → compare RMSE\n\n"
|
| 1178 |
-
"> 🏦 FinBERT(40%) + 🌍 XLM(30%) + 📖 Lexicon(30%)
|
|
|
|
| 1179 |
)
|
| 1180 |
with gr.Row():
|
| 1181 |
-
country_input = gr.Textbox(
|
| 1182 |
-
|
|
|
|
|
|
|
| 1183 |
target_input = gr.Dropdown(
|
| 1184 |
-
choices=[
|
| 1185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1186 |
)
|
| 1187 |
with gr.Row():
|
| 1188 |
start_year = gr.Slider(minimum=2000, maximum=2020,
|
| 1189 |
-
value=
|
| 1190 |
end_year = gr.Slider(minimum=2010, maximum=2024,
|
| 1191 |
value=2023, step=1, label="📅 End Year")
|
| 1192 |
forecast_btn = gr.Button("📈 Run Forecast", variant="primary", size="lg")
|
|
@@ -1199,4 +1197,7 @@ with gr.Blocks(
|
|
| 1199 |
outputs=[forecast_result, forecast_plot]
|
| 1200 |
)
|
| 1201 |
|
|
|
|
|
|
|
|
|
|
| 1202 |
app.launch(server_name="0.0.0.0", server_port=7860, show_api=False)
|
|
|
|
| 96 |
ACADEMIC_KEYWORDS = ["abstract","methodology","hypothesis","conclusion","references","doi","journal"]
|
| 97 |
|
| 98 |
# ============================================================
|
| 99 |
+
# LOUGHRAN-MCDONALD EXTENDED LEXICON
|
| 100 |
# ============================================================
|
| 101 |
ECON_POSITIVE = [
|
|
|
|
| 102 |
"growth","recovery","surplus","improvement","stability","increase",
|
| 103 |
"expansion","acceleration","resilience","upturn","robust","favorable",
|
| 104 |
"strengthened","progress","rebound","optimistic","confidence","boom",
|
| 105 |
"prosper","thrive","advance","gain","rise","positive","upward",
|
| 106 |
"exceed","outperform","strong","healthy","dynamic","sustainable",
|
|
|
|
| 107 |
"croissance","reprise","amélioration","stabilité","excédent","hausse",
|
| 108 |
"expansion","dynamique","favorable","progrès","rebond","solide",
|
|
|
|
| 109 |
"تعافي","نمو","استقرار","فائض","تحسّن","ارتفاع","توسع","إيجابي",
|
| 110 |
"تقدم","قوي","ازدهار","انتعاش","تحسين","قوة",
|
| 111 |
]
|
| 112 |
ECON_NEGATIVE = [
|
|
|
|
| 113 |
"deficit","recession","inflation","decline","contraction","debt",
|
| 114 |
"crisis","deterioration","slowdown","downturn","unemployment","pressure",
|
| 115 |
"risk","vulnerability","shock","uncertainty","war","sanctions",
|
| 116 |
"drought","collapse","default","volatile","instability","weak",
|
| 117 |
"fragile","pessimistic","loss","shrink","fall","negative","downward",
|
| 118 |
"slump","stagnation","turbulence","disruption","imbalance","burden",
|
|
|
|
| 119 |
"déficit","récession","crise","ralentissement","chômage","incertitude",
|
| 120 |
"guerre","effondrement","instabilité","baisse","fragilité","pression",
|
|
|
|
| 121 |
"عجز","تضخم","ركود","انكماش","أزمة","تدهور","بطالة","انخفاض",
|
| 122 |
"ضغط","مخاطر","صدمة","عدم استقرار","هشاشة","ديون","عقوبات",
|
| 123 |
]
|
| 124 |
|
| 125 |
+
# ECON_TRIGGER — متوازن إيجابي + سلبي + مؤشرات
|
| 126 |
ECON_TRIGGER = [
|
|
|
|
| 127 |
"deficit","risk","crisis","recession","shock","uncertainty",
|
| 128 |
"slowdown","pressure","vulnerable","weak","deteriorat","downturn",
|
| 129 |
"contraction","debt","unemployment","inflation","collapse","volatile",
|
| 130 |
"instability","fragile","stagnation","disruption","sanctions","drought",
|
|
|
|
| 131 |
"growth","recovery","improvement","surplus","stable","expansion",
|
| 132 |
"resilience","rebound","strengthened","acceleration","robust",
|
| 133 |
"favorable","progress","increase","upturn","confidence","boom",
|
|
|
|
| 134 |
"gdp","forecast","outlook","trade","fiscal","monetary","exchange",
|
| 135 |
"interest","budget","revenue","expenditure","policy","reform",
|
|
|
|
| 136 |
"التضخم","الناتج","النمو","العجز","المخاطر","التوقعات","الميزانية",
|
|
|
|
| 137 |
"croissance","déficit","récession","prévision","taux","politique",
|
| 138 |
]
|
| 139 |
|
| 140 |
def economic_lexicon_score(text: str) -> float:
|
|
|
|
| 141 |
text_lower = text.lower()
|
| 142 |
pos = sum(1 for w in ECON_POSITIVE if w in text_lower)
|
| 143 |
neg = sum(1 for w in ECON_NEGATIVE if w in text_lower)
|
|
|
|
| 170 |
}
|
| 171 |
|
| 172 |
# ============================================================
|
| 173 |
+
# ENSEMBLE: FinBERT (40%) + XLM-RoBERTa (30%) + Lexicon (30%)
|
| 174 |
# ============================================================
|
| 175 |
WEIGHTS = {"finbert": 0.40, "xlm": 0.30, "lexicon": 0.30}
|
| 176 |
|
|
|
|
| 177 |
print("⏳ Loading FinBERT (ProsusAI)...")
|
| 178 |
try:
|
| 179 |
finbert_pipe = pipeline(
|
|
|
|
| 190 |
finbert_pipe = None
|
| 191 |
FINBERT_OK = False
|
| 192 |
|
|
|
|
| 193 |
print("⏳ Loading XLM-RoBERTa...")
|
| 194 |
try:
|
| 195 |
xlm_pipe = pipeline(
|
|
|
|
| 212 |
return raw if isinstance(raw, list) else [raw]
|
| 213 |
|
| 214 |
def clf_finbert(text: str) -> float:
|
|
|
|
| 215 |
if not FINBERT_OK or finbert_pipe is None:
|
| 216 |
return 0.0
|
| 217 |
try:
|
|
|
|
| 223 |
return 0.0
|
| 224 |
|
| 225 |
def clf_xlm(text: str) -> float:
|
|
|
|
| 226 |
if not XLM_OK or xlm_pipe is None:
|
| 227 |
return 0.0
|
| 228 |
try:
|
| 229 |
items = normalize_clf(xlm_pipe(text[:512]))
|
| 230 |
d = {r["label"]: float(r["score"]) for r in items}
|
|
|
|
| 231 |
pos = d.get("LABEL_2", d.get("positive", d.get("Positive", 0.0)))
|
| 232 |
neg = d.get("LABEL_0", d.get("negative", d.get("Negative", 0.0)))
|
| 233 |
return round(pos - neg, 4)
|
|
|
|
| 237 |
|
| 238 |
def sentiment_score_numeric(text: str) -> float:
|
| 239 |
"""
|
| 240 |
+
Weighted Ensemble:
|
| 241 |
+
40% FinBERT + 30% XLM-RoBERTa + 30% Loughran-McDonald
|
|
|
|
|
|
|
| 242 |
→ [-1, +1]
|
| 243 |
"""
|
| 244 |
fb = clf_finbert(text)
|
|
|
|
| 252 |
)
|
| 253 |
|
| 254 |
def run_sentiment(text: str):
|
|
|
|
| 255 |
score = sentiment_score_numeric(text)
|
| 256 |
if score > 0.05:
|
| 257 |
sent = "Positive 😊"
|
|
|
|
| 262 |
return sent, round(min(abs(score), 1.0), 4)
|
| 263 |
|
| 264 |
def run_sentiment_detailed(text: str) -> str:
|
|
|
|
| 265 |
fb = clf_finbert(text)
|
| 266 |
xlm = clf_xlm(text)
|
| 267 |
lex = economic_lexicon_score(text)
|
|
|
|
| 456 |
for fname, info in PER_FILE_INFO.items():
|
| 457 |
n = sum(1 for m in KB_META if m["name"] == fname)
|
| 458 |
yr = str(info.get("year","N/A"))
|
| 459 |
+
yrb = f"{yr} ✅" if yr not in ["None","N/A"] else "N/A ⚠️"
|
| 460 |
badge = " 🟢" if info["is_economic"] else ""
|
| 461 |
tbl += f"| `{fname}` | {yrb} | {info['type']}{badge} | {info['confidence']:.0%} | {n} |\n"
|
| 462 |
|
|
|
|
| 539 |
return []
|
| 540 |
|
| 541 |
# ============================================================
|
| 542 |
+
# SMART ECONOMIC CHUNK SAMPLER
|
| 543 |
# ============================================================
|
| 544 |
def get_economic_chunks(texts: list, max_chunks: int = 40) -> list:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
n = len(texts)
|
| 546 |
econ = [t for t in texts if any(kw in t.lower() for kw in ECON_TRIGGER)]
|
|
|
|
| 547 |
if len(econ) < 10:
|
| 548 |
start = texts[:min(10, n)]
|
| 549 |
mid = texts[n//2-5 : n//2+5] if n > 20 else []
|
| 550 |
end = texts[-min(10, n):]
|
| 551 |
econ = list(dict.fromkeys(start + mid + end))
|
|
|
|
|
|
|
| 552 |
if len(econ) > max_chunks:
|
| 553 |
step = max(1, len(econ) // max_chunks)
|
| 554 |
sample = econ[::step][:max_chunks]
|
| 555 |
else:
|
| 556 |
sample = econ
|
|
|
|
| 557 |
return sample
|
| 558 |
|
| 559 |
# ============================================================
|
|
|
|
| 598 |
rag_context = rag_context[:2000]
|
| 599 |
has_good_rag = bool(results) and results[0]["sem"] >= 0.25
|
| 600 |
answer_text = llm_groq(question, rag_context, history, lang)
|
|
|
|
| 601 |
if has_good_rag:
|
| 602 |
src = ", ".join(f"`{r['file']}` p.{r['page']}" for r in results)
|
| 603 |
badge = f"\n\n📄 **{'المصدر' if lang=='ar' else 'Source'}:** {src}"
|
|
|
|
| 614 |
def predict_with_rag(text):
|
| 615 |
text = "" if text is None else str(text).strip()
|
| 616 |
if not text: raise gr.Error("⚠️ Enter text first.")
|
|
|
|
| 617 |
lang = detect_lang(text)
|
| 618 |
qterms = [t for t in re.findall(r"\w+", text.lower()) if len(t)>2]
|
| 619 |
|
|
|
|
| 647 |
flag = "🇸🇦" if h["lang"]=="ar" else "🇺🇸"
|
| 648 |
md += f"- 🔑 **`{h['word']}`** → 📄 `{h['file']}` p.{h['page']} {flag}\n\n > {h['sentence']}\n\n"
|
| 649 |
|
| 650 |
+
detail = run_sentiment_detailed(text)
|
|
|
|
| 651 |
sent, conf = run_sentiment(text)
|
| 652 |
+
md += f"---\n{detail}\n\n"
|
| 653 |
|
| 654 |
md += "---\n## 📍 Exact Location\n\n"
|
| 655 |
seen2 = set()
|
|
|
|
| 707 |
return pd.DataFrame()
|
| 708 |
|
| 709 |
# ============================================================
|
| 710 |
+
# YEARLY SENTIMENT INDEX
|
| 711 |
# ============================================================
|
| 712 |
def build_doc_sentiment_index():
|
| 713 |
if not KB_TEXTS or not KB_META: return None, None
|
|
|
|
| 794 |
global_mean = float(df_yearly["sentiment"].mean())
|
| 795 |
merged["sentiment"] = merged["sentiment"].fillna(global_mean)
|
| 796 |
has_yearly = True
|
| 797 |
+
mode_msg = "✅ **Yearly Ensemble Sentiment** (FinBERT 40%+XLM 30%+Lexicon 30%)"
|
| 798 |
else:
|
| 799 |
global_sent = (
|
| 800 |
float(pd.to_numeric(df_files["sentiment"], errors="coerce").mean())
|
|
|
|
| 805 |
has_yearly = False
|
| 806 |
mode_msg = "⚠️ **Global Sentiment** — rename files like `WEO_2020.pdf`"
|
| 807 |
|
| 808 |
+
# ── 4) ✅ FIXED Normalization — feature_range=(-0.3, 0.3) ──
|
| 809 |
+
# يُلطّف تأثير Sentiment ويمنع over-prediction في Panel 1
|
| 810 |
if merged["sentiment"].std() > 1e-6:
|
| 811 |
+
scaler = MinMaxScaler(feature_range=(-0.3, 0.3))
|
| 812 |
merged["sentiment"] = scaler.fit_transform(
|
| 813 |
merged["sentiment"].values.reshape(-1, 1)
|
| 814 |
).flatten().round(4)
|
| 815 |
+
print(f"Sentiment normalized [-0.3,+0.3]: {dict(zip(merged['year'], merged['sentiment']))}")
|
| 816 |
|
| 817 |
# ── 5) Train / Test ───────────────────────────────────────
|
| 818 |
series = merged["value"].values.astype(float)
|
|
|
|
| 857 |
# Panel 1 — Forecast
|
| 858 |
ax1 = axes[0]
|
| 859 |
ax1.plot(years, series, "o-", color="#2196F3", label="Actual", lw=2, ms=5)
|
| 860 |
+
ax1.plot(test_years, pred_arima, "s--", color="#FF5722",
|
| 861 |
+
label="ARIMA(1,1,1)", lw=2)
|
| 862 |
+
ax1.plot(test_years, pred_sarimax, "^-.", color="#4CAF50",
|
| 863 |
+
label="SARIMAX+Ensemble", lw=2)
|
| 864 |
+
ax1.axvline(x=years[split-1], color="gray", linestyle=":",
|
| 865 |
+
alpha=0.7, label="Train│Test")
|
| 866 |
ax1.set_title(
|
| 867 |
f"📈 {target_var} — {country_code} "
|
| 868 |
f"({'Yearly' if has_yearly else 'Global'} Ensemble Sentiment)",
|
|
|
|
| 874 |
# Panel 2 — Ensemble Sentiment Timeline
|
| 875 |
ax2 = axes[1]
|
| 876 |
s_vals = merged["sentiment"].values
|
| 877 |
+
s_clrs = ["#4CAF50" if s>0.05 else "#FF5722" if s<-0.05 else "#FFC107"
|
| 878 |
+
for s in s_vals]
|
| 879 |
ax2.bar(years, s_vals, color=s_clrs, edgecolor="white", width=0.6)
|
| 880 |
ax2.axhline(y=0, color="black", lw=0.8)
|
| 881 |
ax2.set_title(
|
| 882 |
+
f"📊 Ensemble Sentiment Index "
|
| 883 |
+
f"(FinBERT 40% + XLM 30% + Lexicon 30%)\n"
|
| 884 |
+
f"{'per-year' if has_yearly else 'global'} — normalized [-0.3, +0.3]",
|
| 885 |
fontsize=10, fontweight="bold"
|
| 886 |
)
|
| 887 |
+
ax2.set_xlabel("Year")
|
| 888 |
+
ax2.set_ylabel("Sentiment Score (normalized)")
|
| 889 |
ax2.grid(True, alpha=0.3, axis="y")
|
| 890 |
ax2.legend(handles=[
|
| 891 |
Patch(facecolor="#4CAF50", label="Optimistic (>0.05)"),
|
|
|
|
| 893 |
Patch(facecolor="#FF5722", label="Pessimistic (<-0.05)"),
|
| 894 |
], loc="upper right", fontsize=8)
|
| 895 |
|
| 896 |
+
# Panel 3 — RMSE Comparison
|
| 897 |
ax3 = axes[2]
|
| 898 |
clrs = ["#FF5722" if rmse_a <= rmse_s else "#4CAF50",
|
| 899 |
"#4CAF50" if rmse_s <= rmse_a else "#FF5722"]
|
| 900 |
bars = ax3.bar(
|
| 901 |
["ARIMA(1,1,1)", "SARIMAX\n+Ensemble"],
|
| 902 |
+
[rmse_a, rmse_s],
|
| 903 |
+
color=clrs, width=0.4, edgecolor="white"
|
| 904 |
)
|
| 905 |
for bar, val in zip(bars, [rmse_a, rmse_s]):
|
| 906 |
ax3.text(
|
| 907 |
+
bar.get_x()+bar.get_width()/2,
|
| 908 |
+
bar.get_height()+0.01,
|
| 909 |
+
f"{val:.4f}",
|
| 910 |
+
ha="center", va="bottom", fontweight="bold", fontsize=11
|
| 911 |
)
|
| 912 |
ax3.set_title("RMSE Comparison (lower = better)", fontsize=11)
|
| 913 |
+
ax3.set_ylabel("RMSE")
|
| 914 |
+
ax3.grid(True, alpha=0.3, axis="y")
|
| 915 |
|
| 916 |
plt.tight_layout(pad=3.0)
|
| 917 |
img_path = "/tmp/forecast_plot.png"
|
|
|
|
| 922 |
sent_table = ""
|
| 923 |
if df_files is not None and len(df_files) > 0:
|
| 924 |
sent_table = "\n---\n### 📄 Ensemble Sentiment per File\n\n"
|
| 925 |
+
sent_table += "| 📄 File | 📅 Year | 😊 Score | 📦 Chunks | Label |\n"
|
| 926 |
+
sent_table += "|---|---|---|---|---|\n"
|
| 927 |
for _, row in df_files.iterrows():
|
| 928 |
+
yrb = (f"{row['year']} ✅"
|
| 929 |
+
if str(row['year']) not in ["N/A","None"] else "N/A ⚠️")
|
| 930 |
sent_table += (
|
| 931 |
f"| `{row['file']}` | {yrb} "
|
| 932 |
f"| `{row['sentiment']:+.4f}` "
|
| 933 |
f"| {row['n_chunks']} | {row['label']} |\n"
|
| 934 |
)
|
| 935 |
|
| 936 |
+
# ── 10) Result Text ────────────────────────────────────────
|
| 937 |
arrow = "✅ Improved" if impr_rmse > 0 else "❌ No improvement"
|
| 938 |
result_md = (
|
| 939 |
f"## 📊 Forecast — Ensemble Sentiment\n\n"
|
|
|
|
| 946 |
f"| 🧪 Test | **{len(test_y)}** samples |\n\n"
|
| 947 |
f"---\n### 🏆 Model Comparison\n\n"
|
| 948 |
f"| Model | RMSE | MAE | MAPE |\n|---|---|---|---|\n"
|
| 949 |
+
f"| ARIMA(1,1,1) | `{rmse_a:.4f}` | `{mae_a:.4f}` | `{mape_a:.1f}%` |\n"
|
| 950 |
+
f"| SARIMAX+Ensemble | `{rmse_s:.4f}` | `{mae_s:.4f}` | `{mape_s:.1f}%` |\n"
|
| 951 |
+
f"| **Improvement** | **{impr_rmse:+.1f}%** | **{impr_mae:+.1f}%** | **{impr_mape:+.1f}%** |\n\n"
|
| 952 |
f"**{arrow}** by adding Ensemble Sentiment Index.\n"
|
| 953 |
f"{sent_table}"
|
| 954 |
)
|
|
|
|
| 989 |
def get_top_keywords():
|
| 990 |
if not KB_TEXTS: return "_No files indexed yet._"
|
| 991 |
all_words = re.findall(r"\b\w{4,}\b", " ".join(KB_TEXTS).lower())
|
| 992 |
+
stopwords = {
|
| 993 |
+
"this","that","with","from","have","been","will","your",
|
| 994 |
+
"they","their","which","when","what","also","more","some",
|
| 995 |
+
"than","then","were","would","could","into","over","such"
|
| 996 |
+
}
|
| 997 |
top = Counter(w for w in all_words if w not in stopwords).most_common(20)
|
| 998 |
return "### 🔑 Top Keywords\n\n" + "\n".join(f"- **{w}**: {c}" for w,c in top)
|
| 999 |
|
|
|
|
| 1052 |
gr.Markdown(
|
| 1053 |
"**Upload PDF / DOCX / TXT / CSV**\n\n"
|
| 1054 |
"> 💡 Name files like `WEO_2020.pdf` — year in filename required!\n"
|
| 1055 |
+
"> ✅ Year must show ✅ not ⚠️"
|
| 1056 |
)
|
| 1057 |
files = gr.File(label="📂 Files",
|
| 1058 |
file_types=[".pdf",".txt",".csv",".docx"],
|
|
|
|
| 1069 |
load_btn.click(load_saved_index, outputs=persist_status)
|
| 1070 |
|
| 1071 |
with gr.Tab("🎭 2 · Sentiment + Search"):
|
| 1072 |
+
inp = gr.Textbox(
|
| 1073 |
+
lines=2,
|
| 1074 |
+
placeholder="Type text… | اكتب نصاً… | Saisissez un texte…",
|
| 1075 |
+
label="📝 Input (Ensemble: FinBERT 40% + XLM 30% + Lexicon 30%)"
|
| 1076 |
+
)
|
| 1077 |
run_btn = gr.Button("🔍 Analyze", variant="primary")
|
| 1078 |
with gr.Row():
|
| 1079 |
out_sent = gr.Textbox(label="🎭 Ensemble Sentiment", interactive=False)
|
| 1080 |
out_conf = gr.Number(label="📊 Score", precision=4)
|
| 1081 |
out_full = gr.Markdown()
|
| 1082 |
+
run_btn.click(predict_with_rag,
|
| 1083 |
+
inputs=inp,
|
| 1084 |
+
outputs=[out_sent, out_conf, out_full])
|
| 1085 |
gr.Markdown("---")
|
| 1086 |
with gr.Row():
|
| 1087 |
dl_btn = gr.Button("⬇️ Download Report", variant="secondary")
|
| 1088 |
rep_file = gr.File(label="report.md")
|
| 1089 |
+
dl_btn.click(generate_report,
|
| 1090 |
+
inputs=[inp, out_sent, out_conf, out_full],
|
| 1091 |
+
outputs=rep_file)
|
| 1092 |
|
| 1093 |
with gr.Tab("💬 3 · Smart Chatbot"):
|
| 1094 |
chatbot = gr.Chatbot(height=430, type="messages",
|
| 1095 |
placeholder="Ask anything… / اسأل أي شيء…")
|
| 1096 |
msg = gr.Textbox(placeholder="Question…", label="💬")
|
| 1097 |
with gr.Row():
|
| 1098 |
+
send_btn = gr.Button("Send ➤", variant="primary")
|
| 1099 |
+
clear_btn = gr.Button("🗑️ Clear", variant="secondary")
|
| 1100 |
+
stats_btn = gr.Button("📊 Stats", variant="secondary")
|
| 1101 |
stats_box = gr.Markdown(visible=False)
|
| 1102 |
with gr.Row():
|
| 1103 |
export_btn = gr.Button("💾 Export Chat", variant="secondary")
|
| 1104 |
export_file = gr.File(label="chat_history.txt")
|
| 1105 |
msg.submit(chat_text, inputs=[msg,chatbot], outputs=[msg,chatbot])
|
| 1106 |
send_btn.click(chat_text, inputs=[msg,chatbot], outputs=[msg,chatbot])
|
| 1107 |
+
clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg])
|
| 1108 |
stats_btn.click(
|
| 1109 |
lambda: (get_stats(), gr.update(visible=True)),
|
| 1110 |
outputs=[stats_box, stats_box]
|
|
|
|
| 1117 |
transcript = gr.Textbox(label="📝 Transcript", interactive=False)
|
| 1118 |
audio_out = gr.Audio(label="🔊 Answer", type="filepath")
|
| 1119 |
voice_btn.click(
|
| 1120 |
+
chat_voice,
|
| 1121 |
+
inputs=[audio_in, chatbot],
|
| 1122 |
outputs=[chatbot, audio_out, transcript]
|
| 1123 |
)
|
| 1124 |
|
|
|
|
| 1139 |
"| 🏦 Sentiment 1 | **FinBERT** — ProsusAI (40%) |\n"
|
| 1140 |
"| 🌍 Sentiment 2 | **XLM-RoBERTa** — Cardiff NLP (30%) |\n"
|
| 1141 |
"| 📖 Sentiment 3 | **Loughran-McDonald Lexicon** (30%) |\n"
|
| 1142 |
+
"| ⚡ Ensemble | Weighted 40/30/30 + MinMax[-0.3,+0.3] |\n"
|
| 1143 |
"| 🔍 Embeddings | paraphrase-multilingual-MiniLM-L12-v2 |\n"
|
| 1144 |
"| 📊 Reranker | cross-encoder/ms-marco-MiniLM-L-6-v2 |\n"
|
| 1145 |
"| 🤖 LLM | Llama-3.3-70B via Groq |\n"
|
|
|
|
| 1164 |
"2. **Build Index** → verify Year = ✅\n"
|
| 1165 |
"3. Select country + variable + year range\n"
|
| 1166 |
"4. **Run Forecast** → compare RMSE\n\n"
|
| 1167 |
+
"> 🏦 FinBERT(40%) + 🌍 XLM(30%) + 📖 Lexicon(30%)\n"
|
| 1168 |
+
"> Normalized to **[-0.3, +0.3]** for smooth forecasting"
|
| 1169 |
)
|
| 1170 |
with gr.Row():
|
| 1171 |
+
country_input = gr.Textbox(
|
| 1172 |
+
value="DZ", label="🌍 Country Code",
|
| 1173 |
+
placeholder="DZ / US / FR / MA / TN / EG"
|
| 1174 |
+
)
|
| 1175 |
target_input = gr.Dropdown(
|
| 1176 |
+
choices=[
|
| 1177 |
+
"Inflation (CPI %)",
|
| 1178 |
+
"GDP Growth (%)",
|
| 1179 |
+
"Unemployment (%)",
|
| 1180 |
+
"Exchange Rate"
|
| 1181 |
+
],
|
| 1182 |
+
value="Inflation (CPI %)",
|
| 1183 |
+
label="🎯 Target Variable"
|
| 1184 |
)
|
| 1185 |
with gr.Row():
|
| 1186 |
start_year = gr.Slider(minimum=2000, maximum=2020,
|
| 1187 |
+
value=2000, step=1, label="📅 Start Year")
|
| 1188 |
end_year = gr.Slider(minimum=2010, maximum=2024,
|
| 1189 |
value=2023, step=1, label="📅 End Year")
|
| 1190 |
forecast_btn = gr.Button("📈 Run Forecast", variant="primary", size="lg")
|
|
|
|
| 1197 |
outputs=[forecast_result, forecast_plot]
|
| 1198 |
)
|
| 1199 |
|
| 1200 |
+
# ============================================================
|
| 1201 |
+
# LAUNCH
|
| 1202 |
+
# ============================================================
|
| 1203 |
app.launch(server_name="0.0.0.0", server_port=7860, show_api=False)
|