Spaces:

Neha-Rudraraju
/

Financial-Planning

Sleeping

App Files Files Community

Nyha15 commited on Apr 27, 2025

Commit

5c95ea1

1 Parent(s): 5d862db

Refactored

Browse files

Files changed (1) hide show

app.py +55 -93

app.py CHANGED Viewed

@@ -53,17 +53,14 @@ def get_workflow_log() -> str:
 # =======================================
 class TaxRegulationDatabase:
-    """Database of tax regulations for international students"""
     def __init__(self):
-        self.llm = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")
         self.tax_regulations: Dict[str, List[str]] = {}
         self.tax_treaties: Dict[str, List[str]] = {}
         self.lock = threading.Lock()
     def preload_common_countries(self):
-        countries = ["India", "China", "South Korea", "Brazil", "Saudi Arabia",
-                     "Canada", "Mexico", "Taiwan", "Japan", "Vietnam"]
         log_workflow("Preloading tax regulations for common countries")
         for country in countries:
             threading.Thread(target=self._load_all, args=(country,), daemon=True).start()
@@ -75,11 +72,10 @@ class TaxRegulationDatabase:
     @lru_cache(maxsize=32)
     def _get_tax_regulations(self, country: str) -> List[str]:
         log_workflow(f"Loading tax regulations for {country}")
-        prompt = (f"Provide 5 specific, factual statements about tax regulations that directly affect international students "
-                  f"from {country} studying in the US. Include form numbers, thresholds, deadlines.")
         try:
             resp = self.llm.invoke(prompt)
-            regs = [line.strip() for line in resp.content.split('\n') if line.strip()]
             with self.lock:
                 self.tax_regulations[country] = regs
             return regs
@@ -89,12 +85,11 @@ class TaxRegulationDatabase:
     @lru_cache(maxsize=32)
     def _get_tax_treaty(self, country: str) -> List[str]:
-        log_workflow(f"Loading tax treaty info for {country}")
-        prompt = (f"Provide 5 specific statements about the US-{country} tax treaty relevant to students, "
-                  f"including article numbers and exemption limits.")
         try:
             resp = self.llm.invoke(prompt)
-            treaty = [line.strip() for line in resp.content.split('\n') if line.strip()]
             with self.lock:
                 self.tax_treaties[country] = treaty
             return treaty
@@ -113,43 +108,42 @@ class TaxRegulationDatabase:
 # =======================================
 class InternationalStudentDataCollector:
-    """Collects financial data for international students"""
     def __init__(self):
-        self.llm = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")
         self.cache: Dict[str, List[str]] = {}
         self.tax_db = TaxRegulationDatabase()
     def preload_common(self):
         log_workflow("Preloading data for common countries")
         self.tax_db.preload_common_countries()
-        for country in ["India", "China"]:
             for fn in [self.get_banking_data, self.get_credit_data]:
-                threading.Thread(target=fn, args=(country,), daemon=True).start()
     def _cached(self, key: str, prompt: str) -> List[str]:
         log_workflow(f"Collecting data for {key}")
         if key in self.cache:
-            log_workflow("Using cached data")
             return self.cache[key]
         try:
             resp = self.llm.invoke(prompt)
-            facts = [line.strip() for line in resp.content.split('\n') if line.strip()]
-            self.cache[key] = facts
-            return facts
         except Exception as e:
             log_workflow(f"Error collecting {key}", str(e))
             return [f"Error: {e}"]
     def get_banking_data(self, country: str) -> List[str]:
-        prompt = (f"5 facts about banking for {country} students in the US: banks, fees, docs.")
-        return self._cached(f"banking_{country}", prompt)
     def get_credit_data(self, country: str) -> List[str]:
-        prompt = (f"5 facts about credit building for {country} students: cards, steps, pitfalls.")
-        return self._cached(f"credit_{country}", prompt)
-    # Additional domain methods omitted for brevity
 # =======================================
 # RAG Knowledge Base
@@ -169,7 +163,6 @@ class KnowledgeBase:
         with self.lock:
             if country in self.vstores:
                 return
-        # Retrieve raw texts
         if self.domain == "banking":
             texts = self.collector.get_banking_data(country)
         elif self.domain == "credit":
@@ -179,68 +172,48 @@ class KnowledgeBase:
             texts = ti.get("regulations", []) + ti.get("treaty", [])
         else:
             texts = []
         if not texts:
-            log_workflow(f"No texts available for domain '{self.domain}' and country '{country}'")
             with self.lock:
                 self.vstores[country] = None
                 self.retrievers[country] = None
             return
-        # Split texts into chunks
-                # Split texts into chunks
         splits = self.splitter.split_text("\n\n".join(texts))
         if not splits:
-            log_workflow(f"No splits generated for domain '{self.domain}' and country '{country}'")
             with self.lock:
                 self.vstores[country] = None
                 self.retrievers[country] = None
             return
-        # Build vector store
         store = Chroma.from_texts(splits, self.embeddings, collection_name=f"{self.domain}_{country}")
-        retr = store.as_retriever(search_kwargs={"k": 3})
         with self.lock:
             self.vstores[country] = store
             self.retrievers[country] = retr
-        log_workflow(f"Vector store ready for domain '{self.domain}' and country '{country}'")
     def retrieve(self, query: str, country: str) -> List[str]:
-        log_workflow(f"Retrieving domain '{self.domain}' for {country}")
         self._init_country(country)
         retr = self.retrievers.get(country)
         if not retr:
-            # Fallback to direct collector methods
-            log_workflow(f"Falling back to direct retrieval for domain '{self.domain}' and country '{country}'")
-            if self.domain == "banking":
-                return self.collector.get_banking_data(country)
-            if self.domain == "credit":
-                return self.collector.get_credit_data(country)
             if self.domain == "tax":
-                info = self.collector.tax_db.get_tax_information(country)
-                return info.get("regulations", []) + info.get("treaty", [])
             return []
-        # Perform similarity search
         docs = retr.get_relevant_documents(query)
-        results = [d.page_content for d in docs]
-        log_workflow(f"Retrieved {len(results)} docs for domain '{self.domain}' and country '{country}'")
-        return results
-# Pre-initialize KnowledgeBase for common domains and countries
-COMMON_COUNTRIES = ["India", "China", "South Korea", "Brazil", "Saudi Arabia", "Canada", "Mexico", "Taiwan", "Japan", "Vietnam"]
-DOMAINS = ["banking", "credit", "tax"]
-# Initialize and preload vector stores at startup
-def preload_kbs():
-    for domain in DOMAINS:
-        kb = KnowledgeBase(domain)
-        for country in COMMON_COUNTRIES:
-            # Launch in background to avoid blocking
-            threading.Thread(target=kb._init_country, args=(country,), daemon=True).start()
-# Trigger preload
-preload_kbs()
 # =======================================
 # Specialist Agents
@@ -256,17 +229,16 @@ class SpecialistAgent:
         log_workflow(f"{self.name} analyzing")
         refs = self.kb.retrieve(query, country)
         context = "\n".join(f"- {r}" for r in refs)
-        prompt = f"As {self.name} for {country}, references:\n{context}\nQuestion: {query}\nProvide detailed advice."
         resp = self.llm.invoke(prompt)
         log_workflow(f"{self.name} done")
         return resp.content
 # Instantiate specialists
-BankingAdvisor = lambda llm=None: SpecialistAgent("Banking Advisor", "banking")
-CreditBuilder = lambda llm=None: SpecialistAgent("Credit Builder", "credit")
-LegalFinanceAdvisor = lambda llm=None: SpecialistAgent("Legal Advisor", "legal")
-TaxSpecialist = lambda llm=None: SpecialistAgent("Tax Specialist", "tax")
-# Additional specialists omitted
 # =======================================
 # Coordinator Agent
@@ -278,38 +250,28 @@ class CoordinatorAgent:
         self.specialists = {
             "banking": BankingAdvisor(),
             "credit": CreditBuilder(),
-            "legal": LegalFinanceAdvisor(),
             "tax": TaxSpecialist()
         }
-    def run(self, query: str, profile: Dict[str, Any]) -> str:
         clear_workflow_log()
         country = profile.get("home_country","unknown")
-        # 1. collect
-        advice = {d: self.specialists[d].run(query, country) for d in self.specialists}
-        # 2. plan (omitted)
-        # 3. synthesis
-       # 4. Synthesize and pretty–print
         lines = ["# Your Personalized Financial Advice\n"]
-        # Add each specialist’s section
-        for domain, text in specialist_advice.items():
             lines.append(f"## {domain.capitalize()}\n")
-            # Indent each paragraph for readability
             for para in text.strip().split("\n\n"):
-                lines.append("    " + para.strip().replace("\n", "\n    "))
-            lines.append("")  # blank line
-        # Append Multi-Path Plans as a JSON code block
-        lines.append("## Multi-Path Financial Plans\n")
-        lines.append("```json")
-        lines.append(json.dumps(plans, indent=2))
         lines.append("```")
         formatted = "\n".join(lines)
         log_workflow("Synthesis complete")
-        # 5. Return formatted advice + workflow log
         return formatted + "\n\n---\n" + get_workflow_log()

 # =======================================
 class TaxRegulationDatabase:
     def __init__(self):
+        self.llm = ChatOpenAI(temperature=0.1)
         self.tax_regulations: Dict[str, List[str]] = {}
         self.tax_treaties: Dict[str, List[str]] = {}
         self.lock = threading.Lock()
     def preload_common_countries(self):
+        countries = ["India", "China", "South Korea", "Brazil", "Canada", "Mexico", "Taiwan", "Japan", "Vietnam"]
         log_workflow("Preloading tax regulations for common countries")
         for country in countries:
             threading.Thread(target=self._load_all, args=(country,), daemon=True).start()
     @lru_cache(maxsize=32)
     def _get_tax_regulations(self, country: str) -> List[str]:
         log_workflow(f"Loading tax regulations for {country}")
+        prompt = f"Provide 5 factual statements about tax regs for {country} students in the US, incl. forms, thresholds."
         try:
             resp = self.llm.invoke(prompt)
+            regs = [line.strip() for line in resp.content.split("\n") if line.strip()]
             with self.lock:
                 self.tax_regulations[country] = regs
             return regs
     @lru_cache(maxsize=32)
     def _get_tax_treaty(self, country: str) -> List[str]:
+        log_workflow(f"Loading tax treaty for {country}")
+        prompt = f"Provide 5 statements about US-{country} tax treaty for students, incl. articles, exemptions."
         try:
             resp = self.llm.invoke(prompt)
+            treaty = [line.strip() for line in resp.content.split("\n") if line.strip()]
             with self.lock:
                 self.tax_treaties[country] = treaty
             return treaty
 # =======================================
 class InternationalStudentDataCollector:
     def __init__(self):
+        self.llm = ChatOpenAI(temperature=0.1)
         self.cache: Dict[str, List[str]] = {}
         self.tax_db = TaxRegulationDatabase()
     def preload_common(self):
         log_workflow("Preloading data for common countries")
         self.tax_db.preload_common_countries()
+        for c in ["India", "China"]:
             for fn in [self.get_banking_data, self.get_credit_data]:
+                threading.Thread(target=fn, args=(c,), daemon=True).start()
     def _cached(self, key: str, prompt: str) -> List[str]:
         log_workflow(f"Collecting data for {key}")
         if key in self.cache:
             return self.cache[key]
         try:
             resp = self.llm.invoke(prompt)
+            items = [line.strip() for line in resp.content.split("\n") if line.strip()]
+            self.cache[key] = items
+            return items
         except Exception as e:
             log_workflow(f"Error collecting {key}", str(e))
             return [f"Error: {e}"]
     def get_banking_data(self, country: str) -> List[str]:
+        return self._cached(
+            f"banking_{country}",
+            f"5 facts on banking for {country} students in the US, incl. banks, fees, docs."
+        )
     def get_credit_data(self, country: str) -> List[str]:
+        return self._cached(
+            f"credit_{country}",
+            f"5 facts on credit building for {country} students: cards, history, pitfalls."
+        )
 # =======================================
 # RAG Knowledge Base
         with self.lock:
             if country in self.vstores:
                 return
         if self.domain == "banking":
             texts = self.collector.get_banking_data(country)
         elif self.domain == "credit":
             texts = ti.get("regulations", []) + ti.get("treaty", [])
         else:
             texts = []
         if not texts:
+            log_workflow(f"No texts for {self.domain}/{country}")
             with self.lock:
                 self.vstores[country] = None
                 self.retrievers[country] = None
             return
         splits = self.splitter.split_text("\n\n".join(texts))
         if not splits:
+            log_workflow(f"No splits for {self.domain}/{country}")
             with self.lock:
                 self.vstores[country] = None
                 self.retrievers[country] = None
             return
         store = Chroma.from_texts(splits, self.embeddings, collection_name=f"{self.domain}_{country}")
+        retr = store.as_retriever(search_kwargs={"k":3})
         with self.lock:
             self.vstores[country] = store
             self.retrievers[country] = retr
+        log_workflow(f"Vector store ready for {self.domain}/{country}")
     def retrieve(self, query: str, country: str) -> List[str]:
+        log_workflow(f"Retrieving {self.domain} for {country}")
         self._init_country(country)
         retr = self.retrievers.get(country)
         if not retr:
+            log_workflow(f"Fallback direct for {self.domain}/{country}")
+            if self.domain == "banking": return self.collector.get_banking_data(country)
+            if self.domain == "credit": return self.collector.get_credit_data(country)
             if self.domain == "tax":
+                ti = self.collector.tax_db.get_tax_information(country)
+                return ti.get("regulations",[]) + ti.get("treaty",[])
             return []
         docs = retr.get_relevant_documents(query)
+        return [d.page_content for d in docs]
+# Preload KBs
+COMMON_COUNTRIES = ["India","China"]
+DOMAINS = ["banking","credit","tax"]
+for dom in DOMAINS:
+    kb = KnowledgeBase(dom)
+    for c in COMMON_COUNTRIES:
+        threading.Thread(target=kb._init_country, args=(c,), daemon=True).start()
 # =======================================
 # Specialist Agents
         log_workflow(f"{self.name} analyzing")
         refs = self.kb.retrieve(query, country)
         context = "\n".join(f"- {r}" for r in refs)
+        prompt = f"As {self.name} for {country}, context:\n{context}\nQuestion: {query}\nProvide detailed advice."
         resp = self.llm.invoke(prompt)
         log_workflow(f"{self.name} done")
         return resp.content
 # Instantiate specialists
+BankingAdvisor = lambda: SpecialistAgent("Banking Advisor","banking")
+CreditBuilder = lambda: SpecialistAgent("Credit Builder","credit")
+TaxSpecialist = lambda: SpecialistAgent("Tax Specialist","tax")
+# Add more as needed
 # =======================================
 # Coordinator Agent
         self.specialists = {
             "banking": BankingAdvisor(),
             "credit": CreditBuilder(),
             "tax": TaxSpecialist()
         }
+    def run(self, query: str, profile: Dict[str,Any]) -> str:
         clear_workflow_log()
         country = profile.get("home_country","unknown")
+        # 1. Gather specialist advice
+        advice_map = {d:agent.run(query,country) for d,agent in self.specialists.items()}
+        # 2. Multi-path plans placeholder
+        plans = {"conservative":"...","balanced":"...","growth":"..."}
+        # 3. Synthesis & formatting
         lines = ["# Your Personalized Financial Advice\n"]
+        for domain, text in advice_map.items():
             lines.append(f"## {domain.capitalize()}\n")
             for para in text.strip().split("\n\n"):
+                lines.append("    "+para.replace("\n","\n    "))
+            lines.append("")
+        lines.append("## Multi-Path Financial Plans\n```json")
+        lines.append(json.dumps(plans,indent=2))
         lines.append("```")
         formatted = "\n".join(lines)
         log_workflow("Synthesis complete")
         return formatted + "\n\n---\n" + get_workflow_log()