Spaces:
Sleeping
Sleeping
FEAT : added recomendation route
Browse files- .gitignore +1 -1
- api/main.py +5 -9
- api/rag/figures/eval_bleu.pdf +0 -0
- api/rag/figures/eval_bleu.png +0 -0
- api/rag/figures/training_logs.json +0 -306
- api/rag/figures/training_loss.pdf +0 -0
- api/rag/figures/training_loss.png +0 -0
- api/rag/rag.ipynb +0 -0
- api/rag/translated_schemes_kn.json +0 -0
- api/rag/translator.ipynb +0 -0
- api/routes/rag_route.py +0 -16
- api/routes/recommend_route.py +54 -0
- api/services/rag_service.py +0 -93
- api/services/recommend_service.py +221 -0
- requirements.txt +1 -20
.gitignore
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
|
| 2 |
**/__pycache__/
|
| 3 |
.env
|
| 4 |
api/rag/translator-en-kn-merged/
|
|
|
|
| 1 |
+
chatur/
|
| 2 |
**/__pycache__/
|
| 3 |
.env
|
| 4 |
api/rag/translator-en-kn-merged/
|
api/main.py
CHANGED
|
@@ -6,9 +6,9 @@ import logging
|
|
| 6 |
from api.routes import endpoints
|
| 7 |
from api.services.scheme_service import load_all_schemes_into_cache, is_cache_loading, cached_all_schemes
|
| 8 |
|
| 9 |
-
|
|
|
|
| 10 |
from api.routes import central_endpoints
|
| 11 |
-
# MODIFIED IMPORT: Added _central_schemes_cache to get more stats
|
| 12 |
from api.services.central_services import (
|
| 13 |
load_central_schemes_into_cache,
|
| 14 |
get_central_cache_loading_status,
|
|
@@ -16,9 +16,9 @@ from api.services.central_services import (
|
|
| 16 |
_central_schemes_cache
|
| 17 |
)
|
| 18 |
|
| 19 |
-
|
| 20 |
from api.core.firebase_utils import db, initialize_firebase
|
| 21 |
-
|
| 22 |
from fastapi.middleware.cors import CORSMiddleware
|
| 23 |
|
| 24 |
# Configure logging
|
|
@@ -55,7 +55,7 @@ app.include_router(
|
|
| 55 |
prefix="/{lang}/central",
|
| 56 |
tags=["Central Schemes"]
|
| 57 |
)
|
| 58 |
-
app.include_router(
|
| 59 |
|
| 60 |
@app.get("/")
|
| 61 |
def root():
|
|
@@ -63,10 +63,6 @@ def root():
|
|
| 63 |
return {"message": "Welcome to Chathur API"}
|
| 64 |
|
| 65 |
# --- Cache Status and Refresh Endpoints ---
|
| 66 |
-
|
| 67 |
-
# REMOVED: Combined /cache_status endpoint
|
| 68 |
-
|
| 69 |
-
# NEW: Separate endpoint for state scheme cache status
|
| 70 |
@app.get("/state_cache_status")
|
| 71 |
def get_state_cache_status():
|
| 72 |
"""Returns the current status of the state scheme cache."""
|
|
|
|
| 6 |
from api.routes import endpoints
|
| 7 |
from api.services.scheme_service import load_all_schemes_into_cache, is_cache_loading, cached_all_schemes
|
| 8 |
|
| 9 |
+
from api.routes import recommend_route
|
| 10 |
+
|
| 11 |
from api.routes import central_endpoints
|
|
|
|
| 12 |
from api.services.central_services import (
|
| 13 |
load_central_schemes_into_cache,
|
| 14 |
get_central_cache_loading_status,
|
|
|
|
| 16 |
_central_schemes_cache
|
| 17 |
)
|
| 18 |
|
| 19 |
+
|
| 20 |
from api.core.firebase_utils import db, initialize_firebase
|
| 21 |
+
|
| 22 |
from fastapi.middleware.cors import CORSMiddleware
|
| 23 |
|
| 24 |
# Configure logging
|
|
|
|
| 55 |
prefix="/{lang}/central",
|
| 56 |
tags=["Central Schemes"]
|
| 57 |
)
|
| 58 |
+
app.include_router(recommend_route.router)
|
| 59 |
|
| 60 |
@app.get("/")
|
| 61 |
def root():
|
|
|
|
| 63 |
return {"message": "Welcome to Chathur API"}
|
| 64 |
|
| 65 |
# --- Cache Status and Refresh Endpoints ---
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
@app.get("/state_cache_status")
|
| 67 |
def get_state_cache_status():
|
| 68 |
"""Returns the current status of the state scheme cache."""
|
api/rag/figures/eval_bleu.pdf
DELETED
|
Binary file (12.5 kB)
|
|
|
api/rag/figures/eval_bleu.png
DELETED
|
Binary file (47.4 kB)
|
|
|
api/rag/figures/training_logs.json
DELETED
|
@@ -1,306 +0,0 @@
|
|
| 1 |
-
[
|
| 2 |
-
{
|
| 3 |
-
"loss": 8.1255,
|
| 4 |
-
"grad_norm": 5.886991024017334,
|
| 5 |
-
"learning_rate": 4.755e-05,
|
| 6 |
-
"epoch": 0.1,
|
| 7 |
-
"step": 50
|
| 8 |
-
},
|
| 9 |
-
{
|
| 10 |
-
"loss": 2.1223,
|
| 11 |
-
"grad_norm": 1.780342936515808,
|
| 12 |
-
"learning_rate": 4.5050000000000004e-05,
|
| 13 |
-
"epoch": 0.2,
|
| 14 |
-
"step": 100
|
| 15 |
-
},
|
| 16 |
-
{
|
| 17 |
-
"loss": 1.4172,
|
| 18 |
-
"grad_norm": 1.2484183311462402,
|
| 19 |
-
"learning_rate": 4.2550000000000004e-05,
|
| 20 |
-
"epoch": 0.3,
|
| 21 |
-
"step": 150
|
| 22 |
-
},
|
| 23 |
-
{
|
| 24 |
-
"loss": 1.0609,
|
| 25 |
-
"grad_norm": 1.4256188869476318,
|
| 26 |
-
"learning_rate": 4.0050000000000004e-05,
|
| 27 |
-
"epoch": 0.4,
|
| 28 |
-
"step": 200
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"eval_loss": 0.8911033868789673,
|
| 32 |
-
"eval_score": 0.06293457344434858,
|
| 33 |
-
"eval_counts": [
|
| 34 |
-
163,
|
| 35 |
-
1,
|
| 36 |
-
0,
|
| 37 |
-
0
|
| 38 |
-
],
|
| 39 |
-
"eval_totals": [
|
| 40 |
-
3683,
|
| 41 |
-
3483,
|
| 42 |
-
3283,
|
| 43 |
-
3084
|
| 44 |
-
],
|
| 45 |
-
"eval_precisions": [
|
| 46 |
-
4.425739885962531,
|
| 47 |
-
0.02871088142405972,
|
| 48 |
-
0.015229972586049346,
|
| 49 |
-
0.008106355382619975
|
| 50 |
-
],
|
| 51 |
-
"eval_bp": 1.0,
|
| 52 |
-
"eval_sys_len": 3683,
|
| 53 |
-
"eval_ref_len": 1623,
|
| 54 |
-
"eval_bleu": 0.06293457344434858,
|
| 55 |
-
"eval_runtime": 109.0083,
|
| 56 |
-
"eval_samples_per_second": 1.835,
|
| 57 |
-
"eval_steps_per_second": 0.459,
|
| 58 |
-
"epoch": 0.4,
|
| 59 |
-
"step": 200
|
| 60 |
-
},
|
| 61 |
-
{
|
| 62 |
-
"loss": 0.938,
|
| 63 |
-
"grad_norm": 0.9899176955223083,
|
| 64 |
-
"learning_rate": 3.7550000000000005e-05,
|
| 65 |
-
"epoch": 0.5,
|
| 66 |
-
"step": 250
|
| 67 |
-
},
|
| 68 |
-
{
|
| 69 |
-
"loss": 0.8151,
|
| 70 |
-
"grad_norm": 0.8253363966941833,
|
| 71 |
-
"learning_rate": 3.505e-05,
|
| 72 |
-
"epoch": 0.6,
|
| 73 |
-
"step": 300
|
| 74 |
-
},
|
| 75 |
-
{
|
| 76 |
-
"loss": 0.8122,
|
| 77 |
-
"grad_norm": 1.7979626655578613,
|
| 78 |
-
"learning_rate": 3.2550000000000005e-05,
|
| 79 |
-
"epoch": 0.7,
|
| 80 |
-
"step": 350
|
| 81 |
-
},
|
| 82 |
-
{
|
| 83 |
-
"loss": 0.8516,
|
| 84 |
-
"grad_norm": 0.5633005499839783,
|
| 85 |
-
"learning_rate": 3.0050000000000002e-05,
|
| 86 |
-
"epoch": 0.8,
|
| 87 |
-
"step": 400
|
| 88 |
-
},
|
| 89 |
-
{
|
| 90 |
-
"eval_loss": 0.7273606657981873,
|
| 91 |
-
"eval_score": 0.45057594789546845,
|
| 92 |
-
"eval_counts": [
|
| 93 |
-
208,
|
| 94 |
-
5,
|
| 95 |
-
2,
|
| 96 |
-
0
|
| 97 |
-
],
|
| 98 |
-
"eval_totals": [
|
| 99 |
-
1368,
|
| 100 |
-
1168,
|
| 101 |
-
968,
|
| 102 |
-
774
|
| 103 |
-
],
|
| 104 |
-
"eval_precisions": [
|
| 105 |
-
15.2046783625731,
|
| 106 |
-
0.4280821917808219,
|
| 107 |
-
0.2066115702479339,
|
| 108 |
-
0.06459948320413436
|
| 109 |
-
],
|
| 110 |
-
"eval_bp": 0.8299386398864602,
|
| 111 |
-
"eval_sys_len": 1368,
|
| 112 |
-
"eval_ref_len": 1623,
|
| 113 |
-
"eval_bleu": 0.45057594789546845,
|
| 114 |
-
"eval_runtime": 77.3509,
|
| 115 |
-
"eval_samples_per_second": 2.586,
|
| 116 |
-
"eval_steps_per_second": 0.646,
|
| 117 |
-
"epoch": 0.8,
|
| 118 |
-
"step": 400
|
| 119 |
-
},
|
| 120 |
-
{
|
| 121 |
-
"loss": 0.9177,
|
| 122 |
-
"grad_norm": 0.6352578997612,
|
| 123 |
-
"learning_rate": 2.7550000000000002e-05,
|
| 124 |
-
"epoch": 0.9,
|
| 125 |
-
"step": 450
|
| 126 |
-
},
|
| 127 |
-
{
|
| 128 |
-
"loss": 0.7974,
|
| 129 |
-
"grad_norm": 0.8983929753303528,
|
| 130 |
-
"learning_rate": 2.5050000000000002e-05,
|
| 131 |
-
"epoch": 1.0,
|
| 132 |
-
"step": 500
|
| 133 |
-
},
|
| 134 |
-
{
|
| 135 |
-
"loss": 0.7734,
|
| 136 |
-
"grad_norm": 0.6885063648223877,
|
| 137 |
-
"learning_rate": 2.2550000000000003e-05,
|
| 138 |
-
"epoch": 1.1,
|
| 139 |
-
"step": 550
|
| 140 |
-
},
|
| 141 |
-
{
|
| 142 |
-
"loss": 0.8068,
|
| 143 |
-
"grad_norm": 0.9066347479820251,
|
| 144 |
-
"learning_rate": 2.0050000000000003e-05,
|
| 145 |
-
"epoch": 1.2,
|
| 146 |
-
"step": 600
|
| 147 |
-
},
|
| 148 |
-
{
|
| 149 |
-
"eval_loss": 0.6409754157066345,
|
| 150 |
-
"eval_score": 2.2308463972371086,
|
| 151 |
-
"eval_counts": [
|
| 152 |
-
281,
|
| 153 |
-
33,
|
| 154 |
-
11,
|
| 155 |
-
6
|
| 156 |
-
],
|
| 157 |
-
"eval_totals": [
|
| 158 |
-
1269,
|
| 159 |
-
1069,
|
| 160 |
-
870,
|
| 161 |
-
686
|
| 162 |
-
],
|
| 163 |
-
"eval_precisions": [
|
| 164 |
-
22.14342001576044,
|
| 165 |
-
3.086997193638915,
|
| 166 |
-
1.264367816091954,
|
| 167 |
-
0.8746355685131195
|
| 168 |
-
],
|
| 169 |
-
"eval_bp": 0.7565703085029857,
|
| 170 |
-
"eval_sys_len": 1269,
|
| 171 |
-
"eval_ref_len": 1623,
|
| 172 |
-
"eval_bleu": 2.2308463972371086,
|
| 173 |
-
"eval_runtime": 53.7294,
|
| 174 |
-
"eval_samples_per_second": 3.722,
|
| 175 |
-
"eval_steps_per_second": 0.931,
|
| 176 |
-
"epoch": 1.2,
|
| 177 |
-
"step": 600
|
| 178 |
-
},
|
| 179 |
-
{
|
| 180 |
-
"loss": 0.6715,
|
| 181 |
-
"grad_norm": 0.945395290851593,
|
| 182 |
-
"learning_rate": 1.755e-05,
|
| 183 |
-
"epoch": 1.3,
|
| 184 |
-
"step": 650
|
| 185 |
-
},
|
| 186 |
-
{
|
| 187 |
-
"loss": 0.7764,
|
| 188 |
-
"grad_norm": 2.0758280754089355,
|
| 189 |
-
"learning_rate": 1.505e-05,
|
| 190 |
-
"epoch": 1.4,
|
| 191 |
-
"step": 700
|
| 192 |
-
},
|
| 193 |
-
{
|
| 194 |
-
"loss": 0.6834,
|
| 195 |
-
"grad_norm": 0.43225401639938354,
|
| 196 |
-
"learning_rate": 1.255e-05,
|
| 197 |
-
"epoch": 1.5,
|
| 198 |
-
"step": 750
|
| 199 |
-
},
|
| 200 |
-
{
|
| 201 |
-
"loss": 0.7715,
|
| 202 |
-
"grad_norm": 0.982354998588562,
|
| 203 |
-
"learning_rate": 1.005e-05,
|
| 204 |
-
"epoch": 1.6,
|
| 205 |
-
"step": 800
|
| 206 |
-
},
|
| 207 |
-
{
|
| 208 |
-
"eval_loss": 0.6118303537368774,
|
| 209 |
-
"eval_score": 2.2446563832557205,
|
| 210 |
-
"eval_counts": [
|
| 211 |
-
312,
|
| 212 |
-
37,
|
| 213 |
-
11,
|
| 214 |
-
5
|
| 215 |
-
],
|
| 216 |
-
"eval_totals": [
|
| 217 |
-
1298,
|
| 218 |
-
1098,
|
| 219 |
-
899,
|
| 220 |
-
717
|
| 221 |
-
],
|
| 222 |
-
"eval_precisions": [
|
| 223 |
-
24.03697996918336,
|
| 224 |
-
3.3697632058287796,
|
| 225 |
-
1.2235817575083425,
|
| 226 |
-
0.697350069735007
|
| 227 |
-
],
|
| 228 |
-
"eval_bp": 0.7785008405436009,
|
| 229 |
-
"eval_sys_len": 1298,
|
| 230 |
-
"eval_ref_len": 1623,
|
| 231 |
-
"eval_bleu": 2.2446563832557205,
|
| 232 |
-
"eval_runtime": 50.8519,
|
| 233 |
-
"eval_samples_per_second": 3.933,
|
| 234 |
-
"eval_steps_per_second": 0.983,
|
| 235 |
-
"epoch": 1.6,
|
| 236 |
-
"step": 800
|
| 237 |
-
},
|
| 238 |
-
{
|
| 239 |
-
"loss": 0.7415,
|
| 240 |
-
"grad_norm": 0.5001242160797119,
|
| 241 |
-
"learning_rate": 7.55e-06,
|
| 242 |
-
"epoch": 1.7,
|
| 243 |
-
"step": 850
|
| 244 |
-
},
|
| 245 |
-
{
|
| 246 |
-
"loss": 0.6018,
|
| 247 |
-
"grad_norm": 0.6771586537361145,
|
| 248 |
-
"learning_rate": 5.050000000000001e-06,
|
| 249 |
-
"epoch": 1.8,
|
| 250 |
-
"step": 900
|
| 251 |
-
},
|
| 252 |
-
{
|
| 253 |
-
"loss": 0.6488,
|
| 254 |
-
"grad_norm": 0.7276270389556885,
|
| 255 |
-
"learning_rate": 2.55e-06,
|
| 256 |
-
"epoch": 1.9,
|
| 257 |
-
"step": 950
|
| 258 |
-
},
|
| 259 |
-
{
|
| 260 |
-
"loss": 0.6508,
|
| 261 |
-
"grad_norm": 0.5777331590652466,
|
| 262 |
-
"learning_rate": 5.0000000000000004e-08,
|
| 263 |
-
"epoch": 2.0,
|
| 264 |
-
"step": 1000
|
| 265 |
-
},
|
| 266 |
-
{
|
| 267 |
-
"eval_loss": 0.6058484315872192,
|
| 268 |
-
"eval_score": 2.256370766803717,
|
| 269 |
-
"eval_counts": [
|
| 270 |
-
319,
|
| 271 |
-
37,
|
| 272 |
-
11,
|
| 273 |
-
5
|
| 274 |
-
],
|
| 275 |
-
"eval_totals": [
|
| 276 |
-
1310,
|
| 277 |
-
1110,
|
| 278 |
-
911,
|
| 279 |
-
727
|
| 280 |
-
],
|
| 281 |
-
"eval_precisions": [
|
| 282 |
-
24.35114503816794,
|
| 283 |
-
3.3333333333333335,
|
| 284 |
-
1.2074643249176729,
|
| 285 |
-
0.687757909215956
|
| 286 |
-
],
|
| 287 |
-
"eval_bp": 0.7874689814366906,
|
| 288 |
-
"eval_sys_len": 1310,
|
| 289 |
-
"eval_ref_len": 1623,
|
| 290 |
-
"eval_bleu": 2.256370766803717,
|
| 291 |
-
"eval_runtime": 50.885,
|
| 292 |
-
"eval_samples_per_second": 3.93,
|
| 293 |
-
"eval_steps_per_second": 0.983,
|
| 294 |
-
"epoch": 2.0,
|
| 295 |
-
"step": 1000
|
| 296 |
-
},
|
| 297 |
-
{
|
| 298 |
-
"train_runtime": 493.5783,
|
| 299 |
-
"train_samples_per_second": 8.104,
|
| 300 |
-
"train_steps_per_second": 2.026,
|
| 301 |
-
"total_flos": 136952414208000.0,
|
| 302 |
-
"train_loss": 1.2491823387145997,
|
| 303 |
-
"epoch": 2.0,
|
| 304 |
-
"step": 1000
|
| 305 |
-
}
|
| 306 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api/rag/figures/training_loss.pdf
DELETED
|
Binary file (11.4 kB)
|
|
|
api/rag/figures/training_loss.png
DELETED
|
Binary file (43.2 kB)
|
|
|
api/rag/rag.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
api/rag/translated_schemes_kn.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
api/rag/translator.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
api/routes/rag_route.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
| 1 |
-
from fastapi import APIRouter, HTTPException
|
| 2 |
-
from pydantic import BaseModel
|
| 3 |
-
from api.services.rag_service import get_answer_from_vectorstore
|
| 4 |
-
|
| 5 |
-
router = APIRouter()
|
| 6 |
-
|
| 7 |
-
class QueryInput(BaseModel):
|
| 8 |
-
question: str
|
| 9 |
-
|
| 10 |
-
@router.post("/rag/query")
|
| 11 |
-
async def rag_query(query: QueryInput):
|
| 12 |
-
try:
|
| 13 |
-
result = get_answer_from_vectorstore(query.question)
|
| 14 |
-
return result
|
| 15 |
-
except Exception as e:
|
| 16 |
-
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api/routes/recommend_route.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException, Path, status
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import List
|
| 4 |
+
|
| 5 |
+
# Import the recommendation service
|
| 6 |
+
from api.services.recommend_service import get_recommendations
|
| 7 |
+
|
| 8 |
+
router = APIRouter()
|
| 9 |
+
|
| 10 |
+
# --- Pydantic Request Model ---
|
| 11 |
+
|
| 12 |
+
class RecommendationRequest(BaseModel):
|
| 13 |
+
"""
|
| 14 |
+
Payload for the recommendation endpoint.
|
| 15 |
+
Expects a list of tags.
|
| 16 |
+
"""
|
| 17 |
+
tags: List[str]
|
| 18 |
+
|
| 19 |
+
# --- API Endpoint ---
|
| 20 |
+
|
| 21 |
+
@router.post(
|
| 22 |
+
"/{lang}/recommend",
|
| 23 |
+
tags=["Recommendations"],
|
| 24 |
+
summary="Get Hybrid Scheme Recommendations"
|
| 25 |
+
)
|
| 26 |
+
async def recommend_schemes(
|
| 27 |
+
request: RecommendationRequest,
|
| 28 |
+
lang: str = Path(..., title="Language Code", description="ISO 639-1 language code (e.g., 'en', 'hi')")
|
| 29 |
+
):
|
| 30 |
+
"""
|
| 31 |
+
Get a list of recommended schemes from both State and Central governments
|
| 32 |
+
based on a list of input tags.
|
| 33 |
+
|
| 34 |
+
This endpoint uses a hybrid model that considers:
|
| 35 |
+
1. **Tag Matching:** How well the user's tags match the scheme's tags.
|
| 36 |
+
2. **Popularity:** The general popularity score of the scheme.
|
| 37 |
+
"""
|
| 38 |
+
if not request.tags:
|
| 39 |
+
raise HTTPException(
|
| 40 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 41 |
+
detail="The 'tags' list cannot be empty."
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
# Call the service layer to get recommendations
|
| 46 |
+
recommendations = get_recommendations(user_tags=request.tags, lang=lang)
|
| 47 |
+
return recommendations
|
| 48 |
+
except Exception as e:
|
| 49 |
+
# Generic error for unexpected issues in the service layer
|
| 50 |
+
logger.error(f"Recommendation endpoint failed: {e}")
|
| 51 |
+
raise HTTPException(
|
| 52 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 53 |
+
detail=f"An error occurred while generating recommendations."
|
| 54 |
+
)
|
api/services/rag_service.py
DELETED
|
@@ -1,93 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
from dotenv import load_dotenv
|
| 3 |
-
from langchain_pinecone import PineconeVectorStore
|
| 4 |
-
from langchain_huggingface import HuggingFaceEmbeddings
|
| 5 |
-
from langchain_groq import ChatGroq
|
| 6 |
-
from langchain_core.messages import HumanMessage
|
| 7 |
-
from pinecone import Pinecone
|
| 8 |
-
|
| 9 |
-
# --- Load environment variables ---
|
| 10 |
-
load_dotenv()
|
| 11 |
-
|
| 12 |
-
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
|
| 13 |
-
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 14 |
-
|
| 15 |
-
if not PINECONE_API_KEY or not GROQ_API_KEY:
|
| 16 |
-
raise ValueError("❌ Missing PINECONE_API_KEY or GROQ_API_KEY")
|
| 17 |
-
|
| 18 |
-
# --- Configurations ---
|
| 19 |
-
PINECONE_INDEX_NAME = "scheme-index"
|
| 20 |
-
PINECONE_NAMESPACE = "schemes"
|
| 21 |
-
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
| 22 |
-
GROQ_MODEL_NAME = "llama-3.1-8b-instant"
|
| 23 |
-
|
| 24 |
-
# --- Initialize Services ---
|
| 25 |
-
print("🚀 Initializing embeddings and LLM...")
|
| 26 |
-
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
|
| 27 |
-
llm = ChatGroq(model_name=GROQ_MODEL_NAME)
|
| 28 |
-
|
| 29 |
-
print("🔗 Connecting to Pinecone...")
|
| 30 |
-
try:
|
| 31 |
-
pc = Pinecone(api_key=PINECONE_API_KEY)
|
| 32 |
-
indexes = pc.list_indexes()
|
| 33 |
-
print(f"✅ Pinecone reachable. Indexes: {indexes}")
|
| 34 |
-
except Exception as e:
|
| 35 |
-
print(f"❌ Pinecone connection failed: {e}")
|
| 36 |
-
|
| 37 |
-
# --- Vector Store ---
|
| 38 |
-
vectorstore = PineconeVectorStore.from_existing_index(
|
| 39 |
-
index_name=PINECONE_INDEX_NAME,
|
| 40 |
-
embedding=embeddings,
|
| 41 |
-
namespace=PINECONE_NAMESPACE
|
| 42 |
-
)
|
| 43 |
-
|
| 44 |
-
# --- Main RAG Function ---
|
| 45 |
-
def get_answer_from_vectorstore(question: str) -> dict:
|
| 46 |
-
print(f"🧠 Query received: {question}")
|
| 47 |
-
try:
|
| 48 |
-
docs_with_scores = vectorstore.similarity_search_with_score(question, k=5)
|
| 49 |
-
print(f"📄 Retrieved {len(docs_with_scores)} docs")
|
| 50 |
-
|
| 51 |
-
for doc, score in docs_with_scores:
|
| 52 |
-
print(f"→ Score: {score:.4f} | Snippet: {doc.page_content[:80]}")
|
| 53 |
-
|
| 54 |
-
threshold = 0.75
|
| 55 |
-
filtered_docs = [doc for doc, score in docs_with_scores if score < threshold]
|
| 56 |
-
print(f"✅ Filtered {len(filtered_docs)} docs below threshold {threshold}")
|
| 57 |
-
|
| 58 |
-
if not filtered_docs:
|
| 59 |
-
print("⚠️ No matching documents found.")
|
| 60 |
-
return {
|
| 61 |
-
"answer": "This question seems to be outside my knowledge of government schemes. Please ask about a specific scheme or benefit.",
|
| 62 |
-
"sources": []
|
| 63 |
-
}
|
| 64 |
-
|
| 65 |
-
context = "\n\n".join([doc.page_content for doc in filtered_docs])
|
| 66 |
-
prompt = f"""
|
| 67 |
-
You are a helpful assistant for rural users regarding Indian government schemes.
|
| 68 |
-
Answer the following question using only the context provided below.
|
| 69 |
-
If the answer cannot be found in the context, say:
|
| 70 |
-
"I'm sorry, I couldn't find information about that in my current knowledge base."
|
| 71 |
-
|
| 72 |
-
Context:
|
| 73 |
-
{context}
|
| 74 |
-
|
| 75 |
-
Question: {question}
|
| 76 |
-
|
| 77 |
-
Answer:
|
| 78 |
-
"""
|
| 79 |
-
|
| 80 |
-
answer_message = llm.invoke([HumanMessage(content=prompt)])
|
| 81 |
-
answer = answer_message.content.strip()
|
| 82 |
-
|
| 83 |
-
return {
|
| 84 |
-
"answer": answer,
|
| 85 |
-
"sources": [doc.metadata for doc in filtered_docs]
|
| 86 |
-
}
|
| 87 |
-
|
| 88 |
-
except Exception as e:
|
| 89 |
-
print(f"❌ Error in get_answer_from_vectorstore: {e}")
|
| 90 |
-
return {
|
| 91 |
-
"answer": f"An error occurred while fetching the answer: {str(e)}",
|
| 92 |
-
"sources": []
|
| 93 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api/services/recommend_service.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
# MODIFIED IMPORTS: Import the modules themselves, not the variables
|
| 3 |
+
from api.services import scheme_service
|
| 4 |
+
from api.services import central_services
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
# --- NEW: Helper function for dynamic tag generation ---
|
| 9 |
+
def _generate_tags_from_scheme(scheme: dict, user_tags_set: set) -> list[str]:
|
| 10 |
+
"""
|
| 11 |
+
Searches a scheme's Title and Description for any of the user's tags.
|
| 12 |
+
Returns a list of tags that were found.
|
| 13 |
+
"""
|
| 14 |
+
# Combine Title and Description into a single searchable text
|
| 15 |
+
search_text = (
|
| 16 |
+
scheme.get("Title", "") + " " +
|
| 17 |
+
scheme.get("Description", "")
|
| 18 |
+
).lower()
|
| 19 |
+
|
| 20 |
+
if not search_text:
|
| 21 |
+
return []
|
| 22 |
+
|
| 23 |
+
found_tags = []
|
| 24 |
+
# Check each of the user's original tags
|
| 25 |
+
for tag in user_tags_set:
|
| 26 |
+
# Use ' in ' for simple substring matching
|
| 27 |
+
if tag in search_text:
|
| 28 |
+
found_tags.append(tag)
|
| 29 |
+
return found_tags
|
| 30 |
+
# --- END NEW HELPER ---
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# --- Hybrid Recommendation Logic ---
|
| 34 |
+
|
| 35 |
+
def _calculate_hybrid_score(scheme: dict, user_tags_set: set) -> float:
|
| 36 |
+
"""
|
| 37 |
+
Calculates a hybrid recommendation score for a single scheme.
|
| 38 |
+
|
| 39 |
+
ASSUMPTIONS:
|
| 40 |
+
- scheme (dict): A scheme object.
|
| 41 |
+
- 'tags' (list[str]): Assumes scheme has a 'tags' key with a list of strings.
|
| 42 |
+
- 'popularity' (float): Assumes scheme has a 'popularity' key with a float (0.0 to 1.0).
|
| 43 |
+
If not present, defaults to 0.5.
|
| 44 |
+
"""
|
| 45 |
+
# Define weights for each part of the hybrid model
|
| 46 |
+
WEIGHT_TAG_MATCH = 0.7 # 70% importance
|
| 47 |
+
WEIGHT_POPULARITY = 0.3 # 30% importance
|
| 48 |
+
|
| 49 |
+
# 1. Content-Based Score (Jaccard Similarity)
|
| 50 |
+
# Jaccard Similarity = (Intersection of tags) / (Union of tags)
|
| 51 |
+
|
| 52 |
+
# --- Assumption Handling ---
|
| 53 |
+
# Safely get tags, default to empty list if not present or wrong type
|
| 54 |
+
scheme_tags = scheme.get("tags", [])
|
| 55 |
+
if not isinstance(scheme_tags, list):
|
| 56 |
+
# FIX: Use 'Title' for logging, as 'id' may not exist
|
| 57 |
+
logger.warning(f"Scheme {scheme.get('Title', 'Unknown')} has invalid 'tags' format. Skipping.")
|
| 58 |
+
scheme_tags = []
|
| 59 |
+
|
| 60 |
+
scheme_tags_set = set(tag.lower() for tag in scheme_tags)
|
| 61 |
+
# --- End Assumption Handling ---
|
| 62 |
+
|
| 63 |
+
intersection = user_tags_set.intersection(scheme_tags_set)
|
| 64 |
+
union = user_tags_set.union(scheme_tags_set)
|
| 65 |
+
|
| 66 |
+
if not union:
|
| 67 |
+
tag_score = 0.0
|
| 68 |
+
else:
|
| 69 |
+
tag_score = len(intersection) / len(union)
|
| 70 |
+
|
| 71 |
+
# 2. Popularity-Based Score
|
| 72 |
+
# --- Assumption Handling ---
|
| 73 |
+
# Safely get popularity, default to 0.5 if not present or wrong type
|
| 74 |
+
popularity_score = scheme.get("popularity", 0.5)
|
| 75 |
+
if not isinstance(popularity_score, (int, float)):
|
| 76 |
+
# FIX: Use 'Title' for logging
|
| 77 |
+
logger.warning(f"Scheme {scheme.get('Title', 'Unknown')} has invalid 'popularity' format. Defaulting to 0.5.")
|
| 78 |
+
popularity_score = 0.5
|
| 79 |
+
# --- End Assumption Handling ---
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
# 3. Final Hybrid Score
|
| 83 |
+
final_score = (WEIGHT_TAG_MATCH * tag_score) + (WEIGHT_POPULARITY * popularity_score)
|
| 84 |
+
|
| 85 |
+
return final_score
|
| 86 |
+
|
| 87 |
+
def get_recommendations(user_tags: list[str], lang: str) -> list[dict]:
|
| 88 |
+
"""
|
| 89 |
+
Generates a ranked list of scheme recommendations from both state and
|
| 90 |
+
central caches based on user tags.
|
| 91 |
+
|
| 92 |
+
NOTE: This function currently ignores the 'lang' parameter and searches
|
| 93 |
+
across ALL languages in the cache.
|
| 94 |
+
"""
|
| 95 |
+
logger.info(f"Generating recommendations with tags={user_tags}. (NOTE: Ignoring lang='{lang}' and searching all languages)")
|
| 96 |
+
|
| 97 |
+
# --- FIX: Get cache variables at RUN-TIME ---
|
| 98 |
+
# Access the variables *through* their modules to get the current, populated data
|
| 99 |
+
cached_all_schemes = scheme_service.cached_all_schemes
|
| 100 |
+
_central_schemes_cache = central_services._central_schemes_cache
|
| 101 |
+
# --- END FIX ---
|
| 102 |
+
|
| 103 |
+
all_schemes = []
|
| 104 |
+
user_tags_set = set(tag.lower() for tag in user_tags)
|
| 105 |
+
|
| 106 |
+
# --- NEW: Diagnostic Logging ---
|
| 107 |
+
# Log what this function *sees* in the imported caches.
|
| 108 |
+
logger.info(f"DIAGNOSTIC: State cache size: {len(cached_all_schemes)}")
|
| 109 |
+
logger.info(f"DIAGNOSTIC: State cache keys: {list(cached_all_schemes.keys())}")
|
| 110 |
+
logger.info(f"DIAGNOSTIC: Central cache size: {len(_central_schemes_cache)}")
|
| 111 |
+
logger.info(f"DIAGNOSTIC: Central cache keys: {list(_central_schemes_cache.keys())}")
|
| 112 |
+
# --- End Diagnostic Logging ---
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
# 1. Aggregate State Schemes (Ignoring 'lang' parameter)
|
| 116 |
+
try:
|
| 117 |
+
# --- FIX: Changed loop to handle Dict[StateName, List[Schemes]] ---
|
| 118 |
+
# Iterate over all states in the cache
|
| 119 |
+
for state_name, state_schemes in cached_all_schemes.items():
|
| 120 |
+
# Log the number of schemes found for this state
|
| 121 |
+
logger.info(f"DIAGNOSTIC: Processing state: {state_name}, found {len(state_schemes)} schemes.")
|
| 122 |
+
|
| 123 |
+
# We don't have a definitive lang_key here.
|
| 124 |
+
# Based on logs ('Kannada schemes loaded'), we make an assumption.
|
| 125 |
+
lang_key = "unknown"
|
| 126 |
+
if state_name.lower() == "karnataka":
|
| 127 |
+
lang_key = "ka" # HACK: based on user log
|
| 128 |
+
|
| 129 |
+
if not isinstance(state_schemes, list):
|
| 130 |
+
logger.warning(f"DIAGNOSTIC: Expected list of schemes for state '{state_name}', but got {type(state_schemes)}. Skipping.")
|
| 131 |
+
continue
|
| 132 |
+
|
| 133 |
+
for scheme in state_schemes:
|
| 134 |
+
# Add source to identify origin
|
| 135 |
+
scheme_copy = scheme.copy()
|
| 136 |
+
|
| 137 |
+
# --- FIX: DYNAMICALLY GENERATE TAGS ---
|
| 138 |
+
# If 'tags' field is missing or empty, create them from Title/Description
|
| 139 |
+
if not scheme_copy.get("tags"):
|
| 140 |
+
generated_tags = _generate_tags_from_scheme(scheme_copy, user_tags_set)
|
| 141 |
+
scheme_copy["tags"] = generated_tags # Add the new tags
|
| 142 |
+
# --- END FIX ---
|
| 143 |
+
|
| 144 |
+
scheme_copy["source"] = "state"
|
| 145 |
+
scheme_copy["source_name"] = state_name
|
| 146 |
+
scheme_copy["lang_found"] = lang_key # Set to unknown or assumed lang
|
| 147 |
+
all_schemes.append(scheme_copy)
|
| 148 |
+
# --- END FIX ---
|
| 149 |
+
except Exception as e:
|
| 150 |
+
logger.error(f"Error processing state schemes cache: {e}")
|
| 151 |
+
|
| 152 |
+
# 2. Aggregate Central Schemes (Ignoring 'lang' parameter)
|
| 153 |
+
try:
|
| 154 |
+
# Iterate over all languages in the central cache, not just the specified one
|
| 155 |
+
for lang_key, central_lang_cache in _central_schemes_cache.items():
|
| 156 |
+
|
| 157 |
+
# --- USER REQUEST: Skip 'hi' language ---
|
| 158 |
+
if lang_key == "hi":
|
| 159 |
+
continue
|
| 160 |
+
# --- END USER REQUEST ---
|
| 161 |
+
|
| 162 |
+
logger.info(f"DIAGNOSTIC: Processing central lang: {lang_key}, found ministries: {len(central_lang_cache)}") # NEW LOG
|
| 163 |
+
|
| 164 |
+
if not isinstance(central_lang_cache, dict):
|
| 165 |
+
logger.warning(f"DIAGNOSTIC: Expected dict of ministries for lang '{lang_key}', but got {type(central_lang_cache)}. Skipping.")
|
| 166 |
+
continue
|
| 167 |
+
|
| 168 |
+
# Iterate over all ministries in that language cache
|
| 169 |
+
for ministry_name, ministry_schemes in central_lang_cache.items():
|
| 170 |
+
for scheme in ministry_schemes:
|
| 171 |
+
# Add source to identify origin
|
| 172 |
+
scheme_copy = scheme.copy()
|
| 173 |
+
|
| 174 |
+
# --- FIX: DYNAMICALLY GENERATE TAGS ---
|
| 175 |
+
# If 'tags' field is missing or empty, create them from Title/Description
|
| 176 |
+
if not scheme_copy.get("tags"):
|
| 177 |
+
generated_tags = _generate_tags_from_scheme(scheme_copy, user_tags_set)
|
| 178 |
+
scheme_copy["tags"] = generated_tags # Add the new tags
|
| 179 |
+
# --- END FIX ---
|
| 180 |
+
|
| 181 |
+
scheme_copy["source"] = "central"
|
| 182 |
+
scheme_copy["source_name"] = ministry_name
|
| 183 |
+
scheme_copy["lang_found"] = lang_key # Add which lang it came from
|
| 184 |
+
all_schemes.append(scheme_copy)
|
| 185 |
+
except Exception as e:
|
| 186 |
+
logger.error(f"Error processing central schemes cache: {e}")
|
| 187 |
+
|
| 188 |
+
if not all_schemes:
|
| 189 |
+
# Updated warning message
|
| 190 |
+
logger.warning(f"No schemes found in cache across ANY language. Caches might be empty.")
|
| 191 |
+
return []
|
| 192 |
+
|
| 193 |
+
# 3. Calculate scores for all aggregated schemes
|
| 194 |
+
recommendations = []
|
| 195 |
+
for scheme in all_schemes:
|
| 196 |
+
score = _calculate_hybrid_score(scheme, user_tags_set)
|
| 197 |
+
|
| 198 |
+
# Only include schemes that had at least one tag match
|
| 199 |
+
# This check will now work because we dynamically added tags
|
| 200 |
+
scheme_tags_set = set(tag.lower() for tag in scheme.get("tags", []))
|
| 201 |
+
if user_tags_set.intersection(scheme_tags_set):
|
| 202 |
+
recommendations.append({
|
| 203 |
+
# --- Assumed Fields ---
|
| 204 |
+
# FIX: Use 'Title' and 'Description' to match your scheme data
|
| 205 |
+
"name": scheme.get("Title", "Unnamed Scheme"),
|
| 206 |
+
"description": scheme.get("Description", ""),
|
| 207 |
+
"tags": scheme.get("tags", []), # Will now show generated tags
|
| 208 |
+
# --- End Assumed Fields ---
|
| 209 |
+
"source": scheme["source"], # 'state' or 'central'
|
| 210 |
+
"source_name": scheme["source_name"], # State or Ministry name
|
| 211 |
+
"lang_found": scheme.get("lang_found", "unknown"), # Show which lang it came from
|
| 212 |
+
"matched_tags": list(user_tags_set.intersection(scheme_tags_set)),
|
| 213 |
+
"final_score": round(score, 4)
|
| 214 |
+
})
|
| 215 |
+
|
| 216 |
+
# 4. Sort by the final score in descending order
|
| 217 |
+
sorted_recommendations = sorted(recommendations, key=lambda x: x["final_score"], reverse=True)
|
| 218 |
+
|
| 219 |
+
logger.info(f"Found {len(sorted_recommendations)} matching recommendations.")
|
| 220 |
+
return sorted_recommendations
|
| 221 |
+
|
requirements.txt
CHANGED
|
@@ -1,22 +1,3 @@
|
|
| 1 |
-
# SLIM requirements.txt
|
| 2 |
-
|
| 3 |
-
# Core web framework
|
| 4 |
fastapi
|
| 5 |
uvicorn[standard]
|
| 6 |
-
|
| 7 |
-
# Database & Cloud Services
|
| 8 |
-
firebase-admin
|
| 9 |
-
pinecone-client>=4.0.0
|
| 10 |
-
|
| 11 |
-
# LLM & AI Libraries
|
| 12 |
-
python-dotenv
|
| 13 |
-
groq
|
| 14 |
-
sentence-transformers
|
| 15 |
-
|
| 16 |
-
# LangChain - with minimum versions to fix import errors
|
| 17 |
-
langchain>=0.2.0
|
| 18 |
-
langchain-core>=0.2.0
|
| 19 |
-
langchain-community>=0.2.0
|
| 20 |
-
langchain-groq>=0.1.5
|
| 21 |
-
langchain-pinecone>=0.1.1
|
| 22 |
-
langchain-huggingface>=0.0.3
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
fastapi
|
| 2 |
uvicorn[standard]
|
| 3 |
+
firebase-admin
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|