Spaces:
Sleeping
Sleeping
File size: 7,699 Bytes
f3ca15e 450d1d2 1c3cab3 f3ca15e 450d1d2 f3ca15e 1c3cab3 f3ca15e 1c3cab3 f3ca15e 1c3cab3 f3ca15e 1c3cab3 f3ca15e 450d1d2 f3ca15e 450d1d2 f3ca15e 450d1d2 1c3cab3 450d1d2 1c3cab3 450d1d2 1c3cab3 450d1d2 1c3cab3 450d1d2 1c3cab3 450d1d2 1c3cab3 450d1d2 1c3cab3 f3ca15e 1c3cab3 f3ca15e 1c3cab3 f3ca15e 1c3cab3 f3ca15e 450d1d2 1c3cab3 450d1d2 0737512 1c3cab3 0737512 1c3cab3 f3ca15e 1c3cab3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | import asyncio
import logging
import os
import json
from difflib import SequenceMatcher
from api.core.firebase_utils import get_firestore_db
logger = logging.getLogger(__name__)
# --- Global Cache for Schemes ---
cached_all_schemes = {}
is_cache_loading = False
# File path for Kannada JSON (project_root/data/translated_schemes_kn.json)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
KAN_JSON_FILE = os.path.join(BASE_DIR, "data", "translated_schemes_kn.json")
async def load_all_schemes_into_cache():
"""
Fetches all schemes from Firestore and populates the in-memory cache.
This function should be called at application startup and/or periodically.
"""
global cached_all_schemes, is_cache_loading
if is_cache_loading:
logger.info("Cache is already loading, skipping concurrent load request.")
return
is_cache_loading = True
logger.info("Starting to load all schemes into cache from Firestore...")
temp_schemes_cache = {}
db = get_firestore_db()
if not db:
logger.error("Firestore DB client is not available. Cannot load schemes into cache.")
is_cache_loading = False
return
try:
# Fetch all state docs
state_docs = db.collection("schemes").stream()
for state_doc in state_docs:
state_name = state_doc.id.strip().lower() # store lowercase for consistency
scheme_ref = db.collection("schemes").document(state_doc.id).collection("schemes")
scheme_docs = scheme_ref.stream()
schemes_in_state = []
for scheme_doc in scheme_docs:
data = scheme_doc.to_dict()
data["id"] = scheme_doc.id
schemes_in_state.append(data)
temp_schemes_cache[state_name] = schemes_in_state
# cached_all_schemes = temp_schemes_cache
# logger.info(f"Cache loaded successfully. Total states: {len(cached_all_schemes)}")
except Exception as e:
logger.error(f"Error loading schemes into cache: {e}")
finally:
is_cache_loading = False
# --- Load Kannada Schemes from JSON ---
try:
if os.path.exists(KAN_JSON_FILE):
with open(KAN_JSON_FILE, "r", encoding="utf-8") as f:
kn_data = json.load(f)
for state, schemes in kn_data.items():
state_key = state.strip().lower()
for s in schemes:
s["language"] = "kn" # ensure Kannada tag
if state_key in temp_schemes_cache:
temp_schemes_cache[state_key].extend(schemes)
else:
temp_schemes_cache[state_key] = schemes
logger.info("Kannada schemes loaded successfully from JSON.")
else:
logger.warning(f"Kannada JSON file not found at {KAN_JSON_FILE}")
except Exception as e:
logger.error(f"Error loading Kannada JSON schemes: {e}")
# --- Finalize cache ---
cached_all_schemes = temp_schemes_cache
is_cache_loading = False
logger.info(f"Cache ready. Total states: {len(cached_all_schemes)}")
# In scheme_service.py
def get_all_schemes(lang=None):
"""
Returns all schemes from the in-memory cache.
If lang is provided, return all schemes that match the specified language.
Schemes without a language tag are considered 'en' by default.
"""
if not lang:
# No change here, returns everything if no language is specified
return cached_all_schemes
filtered_cache = {}
for state, schemes in cached_all_schemes.items():
# Corrected Logic: Default the language to 'en' if the key is missing.
filtered = [
s for s in schemes
if s.get("language", "en").lower() == lang.lower()
]
if filtered:
filtered_cache[state] = filtered
logger.info(f"Filtering schemes for lang={lang}")
return filtered_cache
def search_schemes_in_cache(query: str, lang: str = None):
"""
Searches schemes across all states within the in-memory cache with basic stemming.
Automatically includes schemes that don't have a language field if lang is provided.
"""
from difflib import SequenceMatcher
search_query = query.strip().lower()
matched = []
# Create variations of the query for simple stemming
search_terms = [search_query]
if search_query.endswith('ies'):
search_terms.append(search_query[:-3] + 'y')
elif search_query.endswith('s'):
search_terms.append(search_query[:-1])
logger.info(f"Starting smart search for terms: {search_terms}...")
for state_name, schemes in cached_all_schemes.items():
for scheme in schemes:
# Language filter: include scheme if language matches OR no language specified
language = scheme.get("language", "")
if lang and language and language.lower() != lang.lower():
continue
# Combine all searchable fields
searchable_parts = [
scheme.get("Title", ""),
scheme.get("Description", ""),
scheme.get("Tags", ""),
]
list_fields_to_search = ["Eligibility", "Benefits", "Details", "Documents Required"]
for field in list_fields_to_search:
items = scheme.get(field, [])
if isinstance(items, list):
searchable_parts.extend(items)
elif isinstance(items, str):
searchable_parts.append(items)
searchable_text = " ".join(searchable_parts).lower()
# Check if any search term is contained or fuzzy match (for typos)
if any(term in searchable_text for term in search_terms) or \
any(SequenceMatcher(None, term, searchable_text).ratio() > 0.7 for term in search_terms):
result = scheme.copy()
result["state"] = state_name
matched.append(result)
# Don't break; allow multiple schemes per state if needed
logger.info(f"Search for '{query}' completed. Found {len(matched)} matches.")
return matched
# In scheme_service.py
def get_schemes_by_state(state: str, lang: str = None):
"""
Returns schemes for a specific state from the in-memory cache.
"""
state_key = state.strip().lower()
schemes = cached_all_schemes.get(state_key)
if not schemes:
return None
if lang:
# Corrected Logic: Default to an empty string to prevent false matches.
return [s for s in schemes if s.get("language", "en").lower() == lang.lower()]
return schemes
def get_scheme_details_by_title(state: str, title: str, lang: str = None):
"""
Returns details for a single scheme by title or id within a specific state.
"""
state_key = state.strip().lower()
schemes_for_state = cached_all_schemes.get(state_key)
if not schemes_for_state:
return None
url_title_clean = title.strip().lower()
for scheme in schemes_for_state:
db_id_clean = scheme.get("id", "").strip().lower()
db_title_clean = scheme.get("Title", "").strip().lower()
if db_id_clean == url_title_clean or db_title_clean == url_title_clean:
# Corrected logic: Default the scheme's language to "en" if not present
scheme_lang = scheme.get("language", "en").lower()
if not lang or scheme_lang == lang.lower():
return scheme
return None
def get_cache_loading_status():
"""Returns the current loading status of the cache."""
return is_cache_loading
|