File size: 7,699 Bytes
f3ca15e
 
450d1d2
 
1c3cab3
 
f3ca15e
 
 
 
 
 
 
450d1d2
 
 
 
f3ca15e
 
 
 
 
 
 
 
 
 
 
 
 
1c3cab3
f3ca15e
 
 
 
 
 
 
1c3cab3
 
f3ca15e
 
1c3cab3
 
f3ca15e
 
 
 
 
1c3cab3
f3ca15e
 
 
 
450d1d2
 
f3ca15e
 
 
 
 
450d1d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3ca15e
450d1d2
 
 
 
 
 
 
1c3cab3
 
 
450d1d2
 
1c3cab3
 
450d1d2
1c3cab3
 
 
 
450d1d2
1c3cab3
 
450d1d2
1c3cab3
 
 
450d1d2
1c3cab3
 
450d1d2
1c3cab3
 
 
 
 
 
f3ca15e
 
 
 
1c3cab3
 
 
 
 
 
 
 
f3ca15e
 
 
1c3cab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3ca15e
 
 
1c3cab3
f3ca15e
 
 
 
450d1d2
1c3cab3
 
 
 
 
 
 
 
 
 
450d1d2
0737512
1c3cab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0737512
 
 
1c3cab3
 
 
 
 
f3ca15e
 
1c3cab3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import asyncio
import logging
import os
import json
from difflib import SequenceMatcher
from api.core.firebase_utils import get_firestore_db

logger = logging.getLogger(__name__)

# --- Global Cache for Schemes ---
cached_all_schemes = {}
is_cache_loading = False

# File path for Kannada JSON (project_root/data/translated_schemes_kn.json)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
KAN_JSON_FILE = os.path.join(BASE_DIR, "data", "translated_schemes_kn.json")

async def load_all_schemes_into_cache():
    """
    Fetches all schemes from Firestore and populates the in-memory cache.
    This function should be called at application startup and/or periodically.
    """
    global cached_all_schemes, is_cache_loading
    if is_cache_loading:
        logger.info("Cache is already loading, skipping concurrent load request.")
        return

    is_cache_loading = True
    logger.info("Starting to load all schemes into cache from Firestore...")
    temp_schemes_cache = {}
    db = get_firestore_db()

    if not db:
        logger.error("Firestore DB client is not available. Cannot load schemes into cache.")
        is_cache_loading = False
        return

    try:
        # Fetch all state docs
        state_docs = db.collection("schemes").stream()

        for state_doc in state_docs:
            state_name = state_doc.id.strip().lower()  # store lowercase for consistency
            scheme_ref = db.collection("schemes").document(state_doc.id).collection("schemes")
            scheme_docs = scheme_ref.stream()

            schemes_in_state = []
            for scheme_doc in scheme_docs:
                data = scheme_doc.to_dict()
                data["id"] = scheme_doc.id
                schemes_in_state.append(data)

            temp_schemes_cache[state_name] = schemes_in_state

        # cached_all_schemes = temp_schemes_cache
        # logger.info(f"Cache loaded successfully. Total states: {len(cached_all_schemes)}")

    except Exception as e:
        logger.error(f"Error loading schemes into cache: {e}")
    finally:
        is_cache_loading = False
        
        
     # --- Load Kannada Schemes from JSON ---
    try:
        if os.path.exists(KAN_JSON_FILE):
            with open(KAN_JSON_FILE, "r", encoding="utf-8") as f:
                kn_data = json.load(f)

            for state, schemes in kn_data.items():
                state_key = state.strip().lower()
                for s in schemes:
                    s["language"] = "kn"  # ensure Kannada tag
                if state_key in temp_schemes_cache:
                    temp_schemes_cache[state_key].extend(schemes)
                else:
                    temp_schemes_cache[state_key] = schemes

            logger.info("Kannada schemes loaded successfully from JSON.")
        else:
            logger.warning(f"Kannada JSON file not found at {KAN_JSON_FILE}")

    except Exception as e:
        logger.error(f"Error loading Kannada JSON schemes: {e}")

    # --- Finalize cache ---
    cached_all_schemes = temp_schemes_cache
    is_cache_loading = False
    logger.info(f"Cache ready. Total states: {len(cached_all_schemes)}")


# In scheme_service.py
def get_all_schemes(lang=None):
    """
    Returns all schemes from the in-memory cache.
    If lang is provided, return all schemes that match the specified language.
    Schemes without a language tag are considered 'en' by default.
    """
    if not lang:
        # No change here, returns everything if no language is specified
        return cached_all_schemes

    filtered_cache = {}
    for state, schemes in cached_all_schemes.items():
        # Corrected Logic: Default the language to 'en' if the key is missing.
        filtered = [
            s for s in schemes
            if s.get("language", "en").lower() == lang.lower()
        ]
        if filtered:
            filtered_cache[state] = filtered
    logger.info(f"Filtering schemes for lang={lang}")
    return filtered_cache


def search_schemes_in_cache(query: str, lang: str = None):
    """
    Searches schemes across all states within the in-memory cache with basic stemming.
    Automatically includes schemes that don't have a language field if lang is provided.
    """
    from difflib import SequenceMatcher

    search_query = query.strip().lower()
    matched = []

    # Create variations of the query for simple stemming
    search_terms = [search_query]
    if search_query.endswith('ies'):
        search_terms.append(search_query[:-3] + 'y')
    elif search_query.endswith('s'):
        search_terms.append(search_query[:-1])

    logger.info(f"Starting smart search for terms: {search_terms}...")

    for state_name, schemes in cached_all_schemes.items():
        for scheme in schemes:
            # Language filter: include scheme if language matches OR no language specified
            language = scheme.get("language", "")
            if lang and language and language.lower() != lang.lower():
                continue

            # Combine all searchable fields
            searchable_parts = [
                scheme.get("Title", ""),
                scheme.get("Description", ""),
                scheme.get("Tags", ""),
            ]

            list_fields_to_search = ["Eligibility", "Benefits", "Details", "Documents Required"]
            for field in list_fields_to_search:
                items = scheme.get(field, [])
                if isinstance(items, list):
                    searchable_parts.extend(items)
                elif isinstance(items, str):
                    searchable_parts.append(items)

            searchable_text = " ".join(searchable_parts).lower()

            # Check if any search term is contained or fuzzy match (for typos)
            if any(term in searchable_text for term in search_terms) or \
               any(SequenceMatcher(None, term, searchable_text).ratio() > 0.7 for term in search_terms):
                result = scheme.copy()
                result["state"] = state_name
                matched.append(result)
                # Don't break; allow multiple schemes per state if needed

    logger.info(f"Search for '{query}' completed. Found {len(matched)} matches.")
    return matched

# In scheme_service.py
def get_schemes_by_state(state: str, lang: str = None):
    """
    Returns schemes for a specific state from the in-memory cache.
    """
    state_key = state.strip().lower()
    schemes = cached_all_schemes.get(state_key)
    if not schemes:
        return None

    if lang:
        # Corrected Logic: Default to an empty string to prevent false matches.
        return [s for s in schemes if s.get("language", "en").lower() == lang.lower()]
    return schemes

def get_scheme_details_by_title(state: str, title: str, lang: str = None):
    """
    Returns details for a single scheme by title or id within a specific state.
    """
    state_key = state.strip().lower()
    schemes_for_state = cached_all_schemes.get(state_key)

    if not schemes_for_state:
        return None

    url_title_clean = title.strip().lower()

    for scheme in schemes_for_state:
        db_id_clean = scheme.get("id", "").strip().lower()
        db_title_clean = scheme.get("Title", "").strip().lower()

        if db_id_clean == url_title_clean or db_title_clean == url_title_clean:
            # Corrected logic: Default the scheme's language to "en" if not present
            scheme_lang = scheme.get("language", "en").lower()
            if not lang or scheme_lang == lang.lower():
                return scheme

    return None


def get_cache_loading_status():
    """Returns the current loading status of the cache."""
    return is_cache_loading