Spaces:

NAVARASA
/

chathur_api

Sleeping

App Files Files Community

VJnCode commited on Oct 26, 2025

Commit

2e108ec

1 Parent(s): aaa9a08

FEAT : added recomendation route

Browse files

Files changed (15) hide show

.gitignore +1 -1
api/main.py +5 -9
api/rag/figures/eval_bleu.pdf +0 -0
api/rag/figures/eval_bleu.png +0 -0
api/rag/figures/training_logs.json +0 -306
api/rag/figures/training_loss.pdf +0 -0
api/rag/figures/training_loss.png +0 -0
api/rag/rag.ipynb +0 -0
api/rag/translated_schemes_kn.json +0 -0
api/rag/translator.ipynb +0 -0
api/routes/rag_route.py +0 -16
api/routes/recommend_route.py +54 -0
api/services/rag_service.py +0 -93
api/services/recommend_service.py +221 -0
requirements.txt +1 -20

.gitignore CHANGED Viewed

@@ -1,4 +1,4 @@
-chathur/
 **/__pycache__/
 .env
 api/rag/translator-en-kn-merged/

+chatur/
 **/__pycache__/
 .env
 api/rag/translator-en-kn-merged/

api/main.py CHANGED Viewed

@@ -6,9 +6,9 @@ import logging
 from api.routes import endpoints
 from api.services.scheme_service import load_all_schemes_into_cache, is_cache_loading, cached_all_schemes
-# Central scheme imports
 from api.routes import central_endpoints
-# MODIFIED IMPORT: Added _central_schemes_cache to get more stats
 from api.services.central_services import (
     load_central_schemes_into_cache,
     get_central_cache_loading_status,
@@ -16,9 +16,9 @@ from api.services.central_services import (
     _central_schemes_cache
 )
-# Other imports
 from api.core.firebase_utils import db, initialize_firebase
-from api.routes import rag_route
 from fastapi.middleware.cors import CORSMiddleware
 # Configure logging
@@ -55,7 +55,7 @@ app.include_router(
     prefix="/{lang}/central",
     tags=["Central Schemes"]
 )
-app.include_router(rag_route.router, prefix="/api", tags=["RAG Chatbot"])
 @app.get("/")
 def root():
@@ -63,10 +63,6 @@ def root():
     return {"message": "Welcome to Chathur API"}
 # --- Cache Status and Refresh Endpoints ---
-# REMOVED: Combined /cache_status endpoint
-# NEW: Separate endpoint for state scheme cache status
 @app.get("/state_cache_status")
 def get_state_cache_status():
     """Returns the current status of the state scheme cache."""

 from api.routes import endpoints
 from api.services.scheme_service import load_all_schemes_into_cache, is_cache_loading, cached_all_schemes
+from api.routes import recommend_route
 from api.routes import central_endpoints
 from api.services.central_services import (
     load_central_schemes_into_cache,
     get_central_cache_loading_status,
     _central_schemes_cache
 )
 from api.core.firebase_utils import db, initialize_firebase
 from fastapi.middleware.cors import CORSMiddleware
 # Configure logging
     prefix="/{lang}/central",
     tags=["Central Schemes"]
 )
+app.include_router(recommend_route.router)
 @app.get("/")
 def root():
     return {"message": "Welcome to Chathur API"}
 # --- Cache Status and Refresh Endpoints ---
 @app.get("/state_cache_status")
 def get_state_cache_status():
     """Returns the current status of the state scheme cache."""

api/rag/figures/eval_bleu.pdf DELETED Viewed

Binary file (12.5 kB)

api/rag/figures/eval_bleu.png DELETED Viewed

Binary file (47.4 kB)

api/rag/figures/training_logs.json DELETED Viewed

@@ -1,306 +0,0 @@
-[
-  {
-    "loss": 8.1255,
-    "grad_norm": 5.886991024017334,
-    "learning_rate": 4.755e-05,
-    "epoch": 0.1,
-    "step": 50
-  },
-  {
-    "loss": 2.1223,
-    "grad_norm": 1.780342936515808,
-    "learning_rate": 4.5050000000000004e-05,
-    "epoch": 0.2,
-    "step": 100
-  },
-  {
-    "loss": 1.4172,
-    "grad_norm": 1.2484183311462402,
-    "learning_rate": 4.2550000000000004e-05,
-    "epoch": 0.3,
-    "step": 150
-  },
-  {
-    "loss": 1.0609,
-    "grad_norm": 1.4256188869476318,
-    "learning_rate": 4.0050000000000004e-05,
-    "epoch": 0.4,
-    "step": 200
-  },
-  {
-    "eval_loss": 0.8911033868789673,
-    "eval_score": 0.06293457344434858,
-    "eval_counts": [
-      163,
-      1,
-      0,
-      0
-    ],
-    "eval_totals": [
-      3683,
-      3483,
-      3283,
-      3084
-    ],
-    "eval_precisions": [
-      4.425739885962531,
-      0.02871088142405972,
-      0.015229972586049346,
-      0.008106355382619975
-    ],
-    "eval_bp": 1.0,
-    "eval_sys_len": 3683,
-    "eval_ref_len": 1623,
-    "eval_bleu": 0.06293457344434858,
-    "eval_runtime": 109.0083,
-    "eval_samples_per_second": 1.835,
-    "eval_steps_per_second": 0.459,
-    "epoch": 0.4,
-    "step": 200
-  },
-  {
-    "loss": 0.938,
-    "grad_norm": 0.9899176955223083,
-    "learning_rate": 3.7550000000000005e-05,
-    "epoch": 0.5,
-    "step": 250
-  },
-  {
-    "loss": 0.8151,
-    "grad_norm": 0.8253363966941833,
-    "learning_rate": 3.505e-05,
-    "epoch": 0.6,
-    "step": 300
-  },
-  {
-    "loss": 0.8122,
-    "grad_norm": 1.7979626655578613,
-    "learning_rate": 3.2550000000000005e-05,
-    "epoch": 0.7,
-    "step": 350
-  },
-  {
-    "loss": 0.8516,
-    "grad_norm": 0.5633005499839783,
-    "learning_rate": 3.0050000000000002e-05,
-    "epoch": 0.8,
-    "step": 400
-  },
-  {
-    "eval_loss": 0.7273606657981873,
-    "eval_score": 0.45057594789546845,
-    "eval_counts": [
-      208,
-      5,
-      2,
-      0
-    ],
-    "eval_totals": [
-      1368,
-      1168,
-      968,
-      774
-    ],
-    "eval_precisions": [
-      15.2046783625731,
-      0.4280821917808219,
-      0.2066115702479339,
-      0.06459948320413436
-    ],
-    "eval_bp": 0.8299386398864602,
-    "eval_sys_len": 1368,
-    "eval_ref_len": 1623,
-    "eval_bleu": 0.45057594789546845,
-    "eval_runtime": 77.3509,
-    "eval_samples_per_second": 2.586,
-    "eval_steps_per_second": 0.646,
-    "epoch": 0.8,
-    "step": 400
-  },
-  {
-    "loss": 0.9177,
-    "grad_norm": 0.6352578997612,
-    "learning_rate": 2.7550000000000002e-05,
-    "epoch": 0.9,
-    "step": 450
-  },
-  {
-    "loss": 0.7974,
-    "grad_norm": 0.8983929753303528,
-    "learning_rate": 2.5050000000000002e-05,
-    "epoch": 1.0,
-    "step": 500
-  },
-  {
-    "loss": 0.7734,
-    "grad_norm": 0.6885063648223877,
-    "learning_rate": 2.2550000000000003e-05,
-    "epoch": 1.1,
-    "step": 550
-  },
-  {
-    "loss": 0.8068,
-    "grad_norm": 0.9066347479820251,
-    "learning_rate": 2.0050000000000003e-05,
-    "epoch": 1.2,
-    "step": 600
-  },
-  {
-    "eval_loss": 0.6409754157066345,
-    "eval_score": 2.2308463972371086,
-    "eval_counts": [
-      281,
-      33,
-      11,
-      6
-    ],
-    "eval_totals": [
-      1269,
-      1069,
-      870,
-      686
-    ],
-    "eval_precisions": [
-      22.14342001576044,
-      3.086997193638915,
-      1.264367816091954,
-      0.8746355685131195
-    ],
-    "eval_bp": 0.7565703085029857,
-    "eval_sys_len": 1269,
-    "eval_ref_len": 1623,
-    "eval_bleu": 2.2308463972371086,
-    "eval_runtime": 53.7294,
-    "eval_samples_per_second": 3.722,
-    "eval_steps_per_second": 0.931,
-    "epoch": 1.2,
-    "step": 600
-  },
-  {
-    "loss": 0.6715,
-    "grad_norm": 0.945395290851593,
-    "learning_rate": 1.755e-05,
-    "epoch": 1.3,
-    "step": 650
-  },
-  {
-    "loss": 0.7764,
-    "grad_norm": 2.0758280754089355,
-    "learning_rate": 1.505e-05,
-    "epoch": 1.4,
-    "step": 700
-  },
-  {
-    "loss": 0.6834,
-    "grad_norm": 0.43225401639938354,
-    "learning_rate": 1.255e-05,
-    "epoch": 1.5,
-    "step": 750
-  },
-  {
-    "loss": 0.7715,
-    "grad_norm": 0.982354998588562,
-    "learning_rate": 1.005e-05,
-    "epoch": 1.6,
-    "step": 800
-  },
-  {
-    "eval_loss": 0.6118303537368774,
-    "eval_score": 2.2446563832557205,
-    "eval_counts": [
-      312,
-      37,
-      11,
-      5
-    ],
-    "eval_totals": [
-      1298,
-      1098,
-      899,
-      717
-    ],
-    "eval_precisions": [
-      24.03697996918336,
-      3.3697632058287796,
-      1.2235817575083425,
-      0.697350069735007
-    ],
-    "eval_bp": 0.7785008405436009,
-    "eval_sys_len": 1298,
-    "eval_ref_len": 1623,
-    "eval_bleu": 2.2446563832557205,
-    "eval_runtime": 50.8519,
-    "eval_samples_per_second": 3.933,
-    "eval_steps_per_second": 0.983,
-    "epoch": 1.6,
-    "step": 800
-  },
-  {
-    "loss": 0.7415,
-    "grad_norm": 0.5001242160797119,
-    "learning_rate": 7.55e-06,
-    "epoch": 1.7,
-    "step": 850
-  },
-  {
-    "loss": 0.6018,
-    "grad_norm": 0.6771586537361145,
-    "learning_rate": 5.050000000000001e-06,
-    "epoch": 1.8,
-    "step": 900
-  },
-  {
-    "loss": 0.6488,
-    "grad_norm": 0.7276270389556885,
-    "learning_rate": 2.55e-06,
-    "epoch": 1.9,
-    "step": 950
-  },
-  {
-    "loss": 0.6508,
-    "grad_norm": 0.5777331590652466,
-    "learning_rate": 5.0000000000000004e-08,
-    "epoch": 2.0,
-    "step": 1000
-  },
-  {
-    "eval_loss": 0.6058484315872192,
-    "eval_score": 2.256370766803717,
-    "eval_counts": [
-      319,
-      37,
-      11,
-      5
-    ],
-    "eval_totals": [
-      1310,
-      1110,
-      911,
-      727
-    ],
-    "eval_precisions": [
-      24.35114503816794,
-      3.3333333333333335,
-      1.2074643249176729,
-      0.687757909215956
-    ],
-    "eval_bp": 0.7874689814366906,
-    "eval_sys_len": 1310,
-    "eval_ref_len": 1623,
-    "eval_bleu": 2.256370766803717,
-    "eval_runtime": 50.885,
-    "eval_samples_per_second": 3.93,
-    "eval_steps_per_second": 0.983,
-    "epoch": 2.0,
-    "step": 1000
-  },
-  {
-    "train_runtime": 493.5783,
-    "train_samples_per_second": 8.104,
-    "train_steps_per_second": 2.026,
-    "total_flos": 136952414208000.0,
-    "train_loss": 1.2491823387145997,
-    "epoch": 2.0,
-    "step": 1000
-  }
-]

api/rag/figures/training_loss.pdf DELETED Viewed

Binary file (11.4 kB)

api/rag/figures/training_loss.png DELETED Viewed

Binary file (43.2 kB)

api/rag/rag.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

api/rag/translated_schemes_kn.json DELETED Viewed

The diff for this file is too large to render. See raw diff

api/rag/translator.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

api/routes/rag_route.py DELETED Viewed

@@ -1,16 +0,0 @@
-from fastapi import APIRouter, HTTPException
-from pydantic import BaseModel
-from api.services.rag_service import get_answer_from_vectorstore
-router = APIRouter()
-class QueryInput(BaseModel):
-    question: str
-@router.post("/rag/query")
-async def rag_query(query: QueryInput):
-    try:
-        result = get_answer_from_vectorstore(query.question)
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))

api/routes/recommend_route.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from fastapi import APIRouter, HTTPException, Path, status
+from pydantic import BaseModel
+from typing import List
+# Import the recommendation service
+from api.services.recommend_service import get_recommendations
+router = APIRouter()
+# --- Pydantic Request Model ---
+class RecommendationRequest(BaseModel):
+    """
+    Payload for the recommendation endpoint.
+    Expects a list of tags.
+    """
+    tags: List[str]
+# --- API Endpoint ---
+@router.post(
+    "/{lang}/recommend",
+    tags=["Recommendations"],
+    summary="Get Hybrid Scheme Recommendations"
+)
+async def recommend_schemes(
+    request: RecommendationRequest,
+    lang: str = Path(..., title="Language Code", description="ISO 639-1 language code (e.g., 'en', 'hi')")
+):
+    """
+    Get a list of recommended schemes from both State and Central governments
+    based on a list of input tags.
+    This endpoint uses a hybrid model that considers:
+    1.  **Tag Matching:** How well the user's tags match the scheme's tags.
+    2.  **Popularity:** The general popularity score of the scheme.
+    """
+    if not request.tags:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="The 'tags' list cannot be empty."
+        )
+    try:
+        # Call the service layer to get recommendations
+        recommendations = get_recommendations(user_tags=request.tags, lang=lang)
+        return recommendations
+    except Exception as e:
+        # Generic error for unexpected issues in the service layer
+        logger.error(f"Recommendation endpoint failed: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"An error occurred while generating recommendations."
+        )

api/services/rag_service.py DELETED Viewed

@@ -1,93 +0,0 @@
-import os
-from dotenv import load_dotenv
-from langchain_pinecone import PineconeVectorStore
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_groq import ChatGroq
-from langchain_core.messages import HumanMessage
-from pinecone import Pinecone
-# --- Load environment variables ---
-load_dotenv()
-PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
-GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-if not PINECONE_API_KEY or not GROQ_API_KEY:
-    raise ValueError("❌ Missing PINECONE_API_KEY or GROQ_API_KEY")
-# --- Configurations ---
-PINECONE_INDEX_NAME = "scheme-index"
-PINECONE_NAMESPACE = "schemes"
-EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
-GROQ_MODEL_NAME = "llama-3.1-8b-instant"
-# --- Initialize Services ---
-print("🚀 Initializing embeddings and LLM...")
-embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
-llm = ChatGroq(model_name=GROQ_MODEL_NAME)
-print("🔗 Connecting to Pinecone...")
-try:
-    pc = Pinecone(api_key=PINECONE_API_KEY)
-    indexes = pc.list_indexes()
-    print(f"✅ Pinecone reachable. Indexes: {indexes}")
-except Exception as e:
-    print(f"❌ Pinecone connection failed: {e}")
-# --- Vector Store ---
-vectorstore = PineconeVectorStore.from_existing_index(
-    index_name=PINECONE_INDEX_NAME,
-    embedding=embeddings,
-    namespace=PINECONE_NAMESPACE
-)
-# --- Main RAG Function ---
-def get_answer_from_vectorstore(question: str) -> dict:
-    print(f"🧠 Query received: {question}")
-    try:
-        docs_with_scores = vectorstore.similarity_search_with_score(question, k=5)
-        print(f"📄 Retrieved {len(docs_with_scores)} docs")
-        for doc, score in docs_with_scores:
-            print(f"→ Score: {score:.4f} | Snippet: {doc.page_content[:80]}")
-        threshold = 0.75
-        filtered_docs = [doc for doc, score in docs_with_scores if score < threshold]
-        print(f"✅ Filtered {len(filtered_docs)} docs below threshold {threshold}")
-        if not filtered_docs:
-            print("⚠️ No matching documents found.")
-            return {
-                "answer": "This question seems to be outside my knowledge of government schemes. Please ask about a specific scheme or benefit.",
-                "sources": []
-            }
-        context = "\n\n".join([doc.page_content for doc in filtered_docs])
-        prompt = f"""
-You are a helpful assistant for rural users regarding Indian government schemes.
-Answer the following question using only the context provided below.
-If the answer cannot be found in the context, say:
-"I'm sorry, I couldn't find information about that in my current knowledge base."
-Context:
-{context}
-Question: {question}
-Answer:
-"""
-        answer_message = llm.invoke([HumanMessage(content=prompt)])
-        answer = answer_message.content.strip()
-        return {
-            "answer": answer,
-            "sources": [doc.metadata for doc in filtered_docs]
-        }
-    except Exception as e:
-        print(f"❌ Error in get_answer_from_vectorstore: {e}")
-        return {
-            "answer": f"An error occurred while fetching the answer: {str(e)}",
-            "sources": []
-        }

api/services/recommend_service.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import logging
+# MODIFIED IMPORTS: Import the modules themselves, not the variables
+from api.services import scheme_service
+from api.services import central_services
+logger = logging.getLogger(__name__)
+# --- NEW: Helper function for dynamic tag generation ---
+def _generate_tags_from_scheme(scheme: dict, user_tags_set: set) -> list[str]:
+    """
+    Searches a scheme's Title and Description for any of the user's tags.
+    Returns a list of tags that were found.
+    """
+    # Combine Title and Description into a single searchable text
+    search_text = (
+        scheme.get("Title", "") + " " +
+        scheme.get("Description", "")
+    ).lower()
+    if not search_text:
+        return []
+    found_tags = []
+    # Check each of the user's original tags
+    for tag in user_tags_set:
+        # Use ' in ' for simple substring matching
+        if tag in search_text:
+            found_tags.append(tag)
+    return found_tags
+# --- END NEW HELPER ---
+# --- Hybrid Recommendation Logic ---
+def _calculate_hybrid_score(scheme: dict, user_tags_set: set) -> float:
+    """
+    Calculates a hybrid recommendation score for a single scheme.
+    ASSUMPTIONS:
+    - scheme (dict): A scheme object.
+    - 'tags' (list[str]): Assumes scheme has a 'tags' key with a list of strings.
+    - 'popularity' (float): Assumes scheme has a 'popularity' key with a float (0.0 to 1.0).
+                              If not present, defaults to 0.5.
+    """
+    # Define weights for each part of the hybrid model
+    WEIGHT_TAG_MATCH = 0.7  # 70% importance
+    WEIGHT_POPULARITY = 0.3 # 30% importance
+    # 1. Content-Based Score (Jaccard Similarity)
+    # Jaccard Similarity = (Intersection of tags) / (Union of tags)
+    # --- Assumption Handling ---
+    # Safely get tags, default to empty list if not present or wrong type
+    scheme_tags = scheme.get("tags", [])
+    if not isinstance(scheme_tags, list):
+        # FIX: Use 'Title' for logging, as 'id' may not exist
+        logger.warning(f"Scheme {scheme.get('Title', 'Unknown')} has invalid 'tags' format. Skipping.")
+        scheme_tags = []
+    scheme_tags_set = set(tag.lower() for tag in scheme_tags)
+    # --- End Assumption Handling ---
+    intersection = user_tags_set.intersection(scheme_tags_set)
+    union = user_tags_set.union(scheme_tags_set)
+    if not union:
+        tag_score = 0.0
+    else:
+        tag_score = len(intersection) / len(union)
+    # 2. Popularity-Based Score
+    # --- Assumption Handling ---
+    # Safely get popularity, default to 0.5 if not present or wrong type
+    popularity_score = scheme.get("popularity", 0.5)
+    if not isinstance(popularity_score, (int, float)):
+        # FIX: Use 'Title' for logging
+        logger.warning(f"Scheme {scheme.get('Title', 'Unknown')} has invalid 'popularity' format. Defaulting to 0.5.")
+        popularity_score = 0.5
+    # --- End Assumption Handling ---
+    # 3. Final Hybrid Score
+    final_score = (WEIGHT_TAG_MATCH * tag_score) + (WEIGHT_POPULARITY * popularity_score)
+    return final_score
+def get_recommendations(user_tags: list[str], lang: str) -> list[dict]:
+    """
+    Generates a ranked list of scheme recommendations from both state and
+    central caches based on user tags.
+    NOTE: This function currently ignores the 'lang' parameter and searches
+    across ALL languages in the cache.
+    """
+    logger.info(f"Generating recommendations with tags={user_tags}. (NOTE: Ignoring lang='{lang}' and searching all languages)")
+    # --- FIX: Get cache variables at RUN-TIME ---
+    # Access the variables *through* their modules to get the current, populated data
+    cached_all_schemes = scheme_service.cached_all_schemes
+    _central_schemes_cache = central_services._central_schemes_cache
+    # --- END FIX ---
+    all_schemes = []
+    user_tags_set = set(tag.lower() for tag in user_tags)
+    # --- NEW: Diagnostic Logging ---
+    # Log what this function *sees* in the imported caches.
+    logger.info(f"DIAGNOSTIC: State cache size: {len(cached_all_schemes)}")
+    logger.info(f"DIAGNOSTIC: State cache keys: {list(cached_all_schemes.keys())}")
+    logger.info(f"DIAGNOSTIC: Central cache size: {len(_central_schemes_cache)}")
+    logger.info(f"DIAGNOSTIC: Central cache keys: {list(_central_schemes_cache.keys())}")
+    # --- End Diagnostic Logging ---
+    # 1. Aggregate State Schemes (Ignoring 'lang' parameter)
+    try:
+        # --- FIX: Changed loop to handle Dict[StateName, List[Schemes]] ---
+        # Iterate over all states in the cache
+        for state_name, state_schemes in cached_all_schemes.items():
+            # Log the number of schemes found for this state
+            logger.info(f"DIAGNOSTIC: Processing state: {state_name}, found {len(state_schemes)} schemes.")
+            # We don't have a definitive lang_key here.
+            # Based on logs ('Kannada schemes loaded'), we make an assumption.
+            lang_key = "unknown"
+            if state_name.lower() == "karnataka":
+                 lang_key = "ka" # HACK: based on user log
+            if not isinstance(state_schemes, list):
+                logger.warning(f"DIAGNOSTIC: Expected list of schemes for state '{state_name}', but got {type(state_schemes)}. Skipping.")
+                continue
+            for scheme in state_schemes:
+                # Add source to identify origin
+                scheme_copy = scheme.copy()
+                # --- FIX: DYNAMICALLY GENERATE TAGS ---
+                # If 'tags' field is missing or empty, create them from Title/Description
+                if not scheme_copy.get("tags"):
+                    generated_tags = _generate_tags_from_scheme(scheme_copy, user_tags_set)
+                    scheme_copy["tags"] = generated_tags # Add the new tags
+                # --- END FIX ---
+                scheme_copy["source"] = "state"
+                scheme_copy["source_name"] = state_name
+                scheme_copy["lang_found"] = lang_key # Set to unknown or assumed lang
+                all_schemes.append(scheme_copy)
+        # --- END FIX ---
+    except Exception as e:
+        logger.error(f"Error processing state schemes cache: {e}")
+    # 2. Aggregate Central Schemes (Ignoring 'lang' parameter)
+    try:
+        # Iterate over all languages in the central cache, not just the specified one
+        for lang_key, central_lang_cache in _central_schemes_cache.items():
+            # --- USER REQUEST: Skip 'hi' language ---
+            if lang_key == "hi":
+                continue
+            # --- END USER REQUEST ---
+            logger.info(f"DIAGNOSTIC: Processing central lang: {lang_key}, found ministries: {len(central_lang_cache)}") # NEW LOG
+            if not isinstance(central_lang_cache, dict):
+                logger.warning(f"DIAGNOSTIC: Expected dict of ministries for lang '{lang_key}', but got {type(central_lang_cache)}. Skipping.")
+                continue
+            # Iterate over all ministries in that language cache
+            for ministry_name, ministry_schemes in central_lang_cache.items():
+                for scheme in ministry_schemes:
+                    # Add source to identify origin
+                    scheme_copy = scheme.copy()
+                    # --- FIX: DYNAMICALLY GENERATE TAGS ---
+                    # If 'tags' field is missing or empty, create them from Title/Description
+                    if not scheme_copy.get("tags"):
+                        generated_tags = _generate_tags_from_scheme(scheme_copy, user_tags_set)
+                        scheme_copy["tags"] = generated_tags # Add the new tags
+                    # --- END FIX ---
+                    scheme_copy["source"] = "central"
+                    scheme_copy["source_name"] = ministry_name
+                    scheme_copy["lang_found"] = lang_key # Add which lang it came from
+                    all_schemes.append(scheme_copy)
+    except Exception as e:
+        logger.error(f"Error processing central schemes cache: {e}")
+    if not all_schemes:
+        # Updated warning message
+        logger.warning(f"No schemes found in cache across ANY language. Caches might be empty.")
+        return []
+    # 3. Calculate scores for all aggregated schemes
+    recommendations = []
+    for scheme in all_schemes:
+        score = _calculate_hybrid_score(scheme, user_tags_set)
+        # Only include schemes that had at least one tag match
+        # This check will now work because we dynamically added tags
+        scheme_tags_set = set(tag.lower() for tag in scheme.get("tags", []))
+        if user_tags_set.intersection(scheme_tags_set):
+            recommendations.append({
+                # --- Assumed Fields ---
+                # FIX: Use 'Title' and 'Description' to match your scheme data
+                "name": scheme.get("Title", "Unnamed Scheme"),
+                "description": scheme.get("Description", ""),
+                "tags": scheme.get("tags", []), # Will now show generated tags
+                # --- End Assumed Fields ---
+                "source": scheme["source"], # 'state' or 'central'
+                "source_name": scheme["source_name"], # State or Ministry name
+                "lang_found": scheme.get("lang_found", "unknown"), # Show which lang it came from
+                "matched_tags": list(user_tags_set.intersection(scheme_tags_set)),
+                "final_score": round(score, 4)
+            })
+    # 4. Sort by the final score in descending order
+    sorted_recommendations = sorted(recommendations, key=lambda x: x["final_score"], reverse=True)
+    logger.info(f"Found {len(sorted_recommendations)} matching recommendations.")
+    return sorted_recommendations

requirements.txt CHANGED Viewed

@@ -1,22 +1,3 @@
-# SLIM requirements.txt
-# Core web framework
 fastapi
 uvicorn[standard]
-# Database & Cloud Services
-firebase-admin
-pinecone-client>=4.0.0
-# LLM & AI Libraries
-python-dotenv
-groq
-sentence-transformers
-# LangChain - with minimum versions to fix import errors
-langchain>=0.2.0
-langchain-core>=0.2.0
-langchain-community>=0.2.0
-langchain-groq>=0.1.5
-langchain-pinecone>=0.1.1
-langchain-huggingface>=0.0.3

 fastapi
 uvicorn[standard]
+firebase-admin