Spaces:

Neha-Rudraraju
/

Financial-Planning

Runtime error

App Files Files Community

Nyha15 commited on Apr 27, 2025

Commit

2938edb

1 Parent(s): c704d60

Refactored

Browse files

Files changed (1) hide show

app.py +187 -464

app.py CHANGED Viewed

@@ -9,7 +9,9 @@ import os
 import sys
 import time
 import json
 from typing import List, Dict, Any, Optional
 try:
     # Import required libraries
@@ -70,6 +72,114 @@ def clear_workflow_log():
     global WORKFLOW_LOG
     WORKFLOW_LOG = []
 # Data collector for international students
 class InternationalStudentDataCollector:
     """Collects financial data for international students from different countries"""
@@ -78,6 +188,24 @@ class InternationalStudentDataCollector:
         """Initialize the data collector with a model for generating data"""
         self.llm = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")
         self.cache = {}
     def _get_data_with_caching(self, prompt_key, prompt):
         """Get data with caching to avoid repeated API calls"""
@@ -223,6 +351,10 @@ class InternationalStudentDataCollector:
         return self._get_data_with_caching(prompt_key, legal_prompt)
 # =======================================
 # Knowledge Base (RAG Implementation)
@@ -234,19 +366,23 @@ class KnowledgeBase:
     def __init__(self, domain: str):
         """Initialize the knowledge base for a specific domain"""
         self.domain = domain
-        self.vector_store = None
-        self.retriever = None
         self.data_collector = InternationalStudentDataCollector()
         self.embeddings = OpenAIEmbeddings()
     def _initialize_for_country(self, country: str):
         """Initialize the vector store for a specific country"""
         domain_key = f"{self.domain}_{country.lower()}"
-        log_workflow(f"Initializing knowledge base", {"domain": self.domain, "country": country})
-        if self.vector_store is not None:
-            log_workflow("Using existing vector store")
-            return
         # Get country-specific data from the data collector
         if self.domain == "banking":
@@ -263,6 +399,9 @@ class KnowledgeBase:
             domain_texts = self.data_collector.get_career_data(country)
         elif self.domain == "legal":
             domain_texts = self.data_collector.get_legal_data(country)
         else:
             domain_texts = [f"General information for {self.domain} domain for {country} international students."]
@@ -274,17 +413,22 @@ class KnowledgeBase:
         # Create vector store with embeddings
         try:
-            self.vector_store = Chroma.from_texts(
                 splits,
                 self.embeddings,
                 collection_name=domain_key
             )
             # Create retriever for similarity search
-            self.retriever = self.vector_store.as_retriever(
                 search_type="similarity",
                 search_kwargs={"k": 3}
             )
             log_workflow("Vector store created successfully")
         except Exception as e:
             log_workflow("Error creating vector store", str(e))
@@ -298,14 +442,19 @@ class KnowledgeBase:
             # Initialize the vector store if needed
             self._initialize_for_country(country)
-            if self.retriever:
-                # Use the retriever to find similar content
-                documents = self.retriever.get_relevant_documents(query)
-                results = [doc.page_content for doc in documents]
-                log_workflow(f"Retrieved {len(results)} relevant documents")
-                return results
-            else:
-                raise ValueError("Retriever not initialized properly")
         except Exception as e:
             log_workflow("Error in RAG retrieval, falling back to direct retrieval", str(e))
             # Fallback to direct retrieval if vector storage fails
@@ -323,6 +472,9 @@ class KnowledgeBase:
                 return self.data_collector.get_career_data(country)
             elif self.domain == "legal":
                 return self.data_collector.get_legal_data(country)
             else:
                 return [f"Information about {self.domain} for {country} international students."]
@@ -425,6 +577,12 @@ class LegalFinanceAdvisor(SpecialistAgent):
         super().__init__(name="Legal Finance Advisor", domain="legal", llm=llm)
 # =======================================
 # Coordinator Agent (Central Agent)
 # =======================================
@@ -444,6 +602,7 @@ class CoordinatorAgent:
         self.loan_advisor = StudentLoanAdvisor(self.llm)
         self.career_planner = CareerFinancePlanner(self.llm)
         self.legal_advisor = LegalFinanceAdvisor(self.llm)
         # Map domains to specialists
         self.specialists = {
@@ -453,7 +612,8 @@ class CoordinatorAgent:
             "currency": self.currency_specialist,
             "loans": self.loan_advisor,
             "career": self.career_planner,
-            "legal": self.legal_advisor
         }
     def _identify_relevant_specialists(self, query: str) -> List[str]:
@@ -472,6 +632,7 @@ class CoordinatorAgent:
         - loans (Student Loan Advisor: educational loans, repayment strategies)
         - career (Career Finance Planner: internships, CPT/OPT, job preparation)
         - legal (Legal Finance Advisor: visa regulations, tax implications)
         Return a comma-separated list of ONLY the relevant domain codes (e.g., "banking,credit").
         """
@@ -485,8 +646,12 @@ class CoordinatorAgent:
             if "budget" not in valid_domains and ("stipend" in query.lower() or "expense" in query.lower()):
                 valid_domains.append("budget")
-            # Add legal domain if query mentions tax or visa
-            if "legal" not in valid_domains and ("tax" in query.lower() or "visa" in query.lower()):
                 valid_domains.append("legal")
             # Add career domain if query mentions internship, CPT, or OPT
@@ -499,6 +664,8 @@ class CoordinatorAgent:
             log_workflow("Error identifying specialists", str(e))
             # Default to essential domains if there's an error
             default_domains = ["banking", "budget"]
             if "credit" in query.lower():
                 default_domains.append("credit")
             return default_domains
@@ -560,448 +727,4 @@ class CoordinatorAgent:
             "winner": winner[0],
             "votes": voting_results,
             "specialist_votes": specialist_votes
-        }
-    def _generate_plans(self, financial_goal: str, constraints: str, country: str) -> Dict[str, str]:
-        """Implement Multi-path Plan Generator pattern"""
-        log_workflow("Multi-path Plan Generator: Creating financial plans",
-                    {"goal": financial_goal[:50], "country": country})
-        # Create prompts for different risk approaches
-        planning_prompt_template = f"""
-        As a financial advisor for international students from {country}, create a {{approach}} financial strategy for:
-        GOAL: {financial_goal}
-        CONSTRAINTS: {constraints}
-        Your {{approach}} strategy should include:
-        1. Detailed step-by-step actions with timeline
-        2. Specific financial products/services with exact names and costs
-        3. Precise breakdown of benefits and risks
-        4. Expected outcomes with realistic numbers
-        5. Mitigation strategies for potential challenges
-        Format with clear headings, bullet points, and numbered steps.
-        Include specific bank names, service providers, dollar amounts, and time frames.
-        """
-        plans = {}
-        try:
-            # Create conservative plan using Budget Manager
-            log_workflow("Generating conservative plan")
-            conservative_prompt = planning_prompt_template.format(approach="CONSERVATIVE (lowest risk)")
-            plans["conservative"] = self.budget_manager.run(conservative_prompt, country)
-            # Create balanced plan using Banking Advisor
-            log_workflow("Generating balanced plan")
-            balanced_prompt = planning_prompt_template.format(approach="BALANCED (moderate risk/reward)")
-            plans["balanced"] = self.banking_advisor.run(balanced_prompt, country)
-            # Create growth plan using Credit Builder
-            log_workflow("Generating growth plan")
-            growth_prompt = planning_prompt_template.format(approach="GROWTH-ORIENTED (higher potential returns)")
-            plans["growth"] = self.credit_builder.run(growth_prompt, country)
-            log_workflow("All plans generated successfully")
-            return plans
-        except Exception as e:
-            log_workflow("Error generating financial plans", str(e))
-            return {
-                "conservative": f"Error generating conservative plan: {str(e)}",
-                "balanced": f"Error generating balanced plan: {str(e)}",
-                "growth": f"Error generating growth plan: {str(e)}"
-            }
-    def _reflect_on_recommendation(self, recommendation: str, student_profile: Dict[str, Any]) -> str:
-        """Implement Self-reflection pattern"""
-        country = student_profile.get("home_country", "unknown")
-        visa_type = student_profile.get("visa_type", "unknown")
-        log_workflow("Self-reflection: Reviewing recommendations",
-                    {"country": country, "visa_type": visa_type})
-        reflection_prompt = f"""
-        As a Legal Financial Advisor for international students, evaluate this financial recommendation:
-        STUDENT PROFILE:
-        Home Country: {country}
-        Visa Type: {visa_type}
-        University: {student_profile.get('university', 'unknown')}
-        Funding: {student_profile.get('funding', 'unknown')}
-        Additional Info: {student_profile.get('additional_info', 'none')}
-        RECOMMENDATION:
-        {recommendation}
-        Please reflect on:
-        1. Does this recommendation fully comply with {visa_type} visa restrictions?
-        2. Is the advice properly tailored to {country} students' unique circumstances?
-        3. Are there any assumptions that might not apply to international students?
-        4. Could any part of this advice create legal/immigration issues?
-        5. Is the recommendation practical given typical international student constraints?
-        6. Does it address all aspects of the original query completely?
-        Provide a detailed assessment with specific recommendations for improvement.
-        """
-        try:
-            log_workflow("Generating legal reflection")
-            reflection = self.legal_advisor.run(reflection_prompt, country)
-            log_workflow("Reflection complete")
-            return reflection
-        except Exception as e:
-            log_workflow("Error during self-reflection", str(e))
-            return f"Unable to complete self-reflection due to an error: {str(e)}"
-    def run(self, query: str, student_profile: Dict[str, Any]) -> str:
-        """Orchestrate the specialist agents to create a comprehensive response"""
-        log_workflow("COORDINATOR: Processing new query", {"query": query[:100]})
-        country = student_profile.get("home_country", "unknown")
-        # 1. Analyze the query to identify which specialists to consult
-        relevant_domains = self._identify_relevant_specialists(query)
-        # 2. Collect advice from relevant specialists
-        specialist_advice = {}
-        for domain in relevant_domains:
-            if domain in self.specialists:
-                specialist = self.specialists[domain]
-                advice = specialist.run(query, country)
-                specialist_advice[domain] = advice
-        # 3. Generate multi-path financial plans for the query
-        constraints = f"""
-        Home Country: {country}
-        Visa Type: {student_profile.get('visa_type', 'F-1')}
-        University: {student_profile.get('university', 'unknown')}
-        Funding: {student_profile.get('funding', 'unknown')}
-        Additional Info: {student_profile.get('additional_info', 'none')}
-        """
-        plans = self._generate_plans(query, constraints, country)
-        # 4. Synthesize the collected advice and plans into a coherent response
-        log_workflow("Synthesizing comprehensive response")
-        # Create the specialist advice text separately
-        specialist_advice_text = "\n".join([f"--- {domain.upper()} SPECIALIST ---\n{advice[:1000]}\n" for domain, advice in specialist_advice.items()])
-        synthesis_prompt = f"""
-        As the coordinator for an International Student Finance Portal, synthesize specialist advice and financial plans into a comprehensive response.
-        STUDENT:
-        - Home Country: {country}
-        - Visa Type: {student_profile.get('visa_type', 'F-1')}
-        - University: {student_profile.get('university', 'unknown')}
-        - Funding: {student_profile.get('funding', 'unknown')}
-        - Additional Info: {student_profile.get('additional_info', 'none')}
-        QUERY:
-        {query}
-        SPECIALIST ADVICE:
-        {specialist_advice_text}
-        FINANCIAL APPROACHES:
-        --- CONSERVATIVE APPROACH ---
-        {plans.get('conservative', 'No conservative plan available.')[:1000]}
-        --- BALANCED APPROACH ---
-        {plans.get('balanced', 'No balanced plan available.')[:1000]}
-        --- GROWTH-ORIENTED APPROACH ---
-        {plans.get('growth', 'No growth-oriented plan available.')[:1000]}
-        Create a detailed response with:
-        1. PART 1: Direct answers to each specific aspect of the query - banking, credit, stipend management, etc.
-        2. PART 2: Multiple financial approaches (conservative, balanced, growth-oriented)
-        Each section must be extremely detailed with:
-        - Specific bank/service names
-        - Exact documentation requirements
-        - Step-by-step processes
-        - Precise dollar amounts
-        - Concrete timelines
-        Format with clear headings, bullet points, and numbered steps.
-        """
-        try:
-            # Generate the synthesized response
-            log_workflow("Generating final synthesized response")
-            synthesis_response = self.llm.invoke(synthesis_prompt)
-            # 5. Self-reflection (check for international student appropriateness)
-            log_workflow("Performing self-reflection")
-            reflection = self._reflect_on_recommendation(synthesis_response.content, student_profile)
-            # 6. Final response with reflection incorporated
-            log_workflow("Creating final response with reflection incorporated")
-            final_prompt = f"""
-            Revise this financial advice based on legal reflection:
-            ORIGINAL ADVICE:
-            {synthesis_response.content}
-            LEGAL REFLECTION:
-            {reflection}
-            Create a final version that:
-            1. Incorporates all legal considerations
-            2. Maintains the comprehensive nature of the original advice
-            3. Addresses EVERY aspect of the original query specifically and in detail:
-               - Bank account setup (specific banks, fees, documents)
-               - Credit building (specific cards, exact steps)
-               - Money transfers (exact services, fees, processes)
-               - Stipend management (precise budget breakdown)
-               - Tax implications (specific treaty benefits, forms)
-               - CPT/internship planning (exact timeline, requirements)
-            4. Includes all three financial approaches (conservative, balanced, growth)
-            Format with clear headings, bullet points, and numbered steps.
-            """
-            log_workflow("Generating final response")
-            final_response = self.llm.invoke(final_prompt)
-            log_workflow("Response generation complete")
-            # Return both the response and the workflow log
-            return final_response.content
-        except Exception as e:
-            log_workflow("Error in coordinator synthesis", str(e))
-            # Fallback response if synthesis fails
-            fallback = "## Financial Advice Summary\n\n"
-            for domain, advice in specialist_advice.items():
-                domain_name = domain.replace("_", " ").title()
-                fallback += f"### {domain_name} Advice\n{advice[:500]}...\n\n"
-            fallback += "\n## Multiple Financial Approaches\n\n"
-            for approach, plan in plans.items():
-                approach_name = approach.replace("_", " ").title()
-                fallback += f"### {approach_name} Approach\n{plan[:500]}...\n\n"
-            return fallback
-# =======================================
-# Main Portal Interface
-# =======================================
-class FinancePortal:
-    """Main interface for the International Student Finance Portal"""
-    def __init__(self):
-        """Initialize the finance portal with a coordinator agent"""
-        self.coordinator = CoordinatorAgent()
-        self.student_profiles = {}
-    def register_student(self, student_id: str, profile: Dict[str, Any]):
-        """Register a new student profile"""
-        self.student_profiles[student_id] = profile
-    def get_student_profile(self, student_id: str) -> Optional[Dict[str, Any]]:
-        """Get a student's profile"""
-        return self.student_profiles.get(student_id)
-    def handle_query(self, student_id: str, query: str) -> str:
-        """Process a student query"""
-        profile = self.get_student_profile(student_id)
-        if not profile:
-            return "Please provide your profile information first."
-        if not query or query.strip() == "":
-            return "Please enter a specific financial question."
-        log_workflow(f"Processing query for student {student_id}", {"query": query[:50]})
-        # Clear workflow log for new query
-        clear_workflow_log()
-        try:
-            # Process the query with the coordinator
-            response = self.coordinator.run(query, profile)
-            # Get the workflow log
-            workflow_log = get_workflow_log()
-            # Combine the response and workflow log
-            full_response = f"{response}\n\n---\n\n{workflow_log}"
-            return full_response
-        except Exception as e:
-            log_workflow(f"Error handling query", str(e))
-            # Return the error with the workflow log
-            workflow_log = get_workflow_log()
-            return f"I encountered an error while processing your request: {str(e)}\n\n---\n\n{workflow_log}"
-def create_interface():
-    """Create the Gradio interface for the finance portal"""
-    portal = FinancePortal()
-    def handle_query(query, country, visa_type, university, funding, additional_info):
-        """Handler for query submission"""
-        if not query or query.strip() == "":
-            return "Please enter a financial question."
-        if not country:
-            return "Please select your home country."
-        if not visa_type:
-            return "Please select your visa type."
-        # Create a composite student profile
-        student_id = "current_user"
-        profile = {
-            "home_country": country,
-            "visa_type": visa_type,
-            "university": university,
-            "funding": funding,
-            "additional_info": additional_info
-        }
-        portal.register_student(student_id, profile)
-        return portal.handle_query(student_id, query)
-    # Create Gradio interface
-    with gr.Blocks(title="International Student Finance Portal") as demo:
-        gr.Markdown("# International Student Finance Portal")
-        gr.Markdown("Get personalized financial advice tailored for international graduate students with visible workflow.")
-        with gr.Row():
-            with gr.Column(scale=2):
-                country = gr.Dropdown(
-                    label="Home Country",
-                    choices=["", "India", "China", "Brazil", "Other"],
-                    value=""
-                )
-                visa_type = gr.Dropdown(
-                    label="Visa Type",
-                    choices=["", "F-1", "J-1", "M-1", "Other"],
-                    value=""
-                )
-                university = gr.Textbox(
-                    label="University",
-                    placeholder="e.g., Stanford University"
-                )
-                funding = gr.Dropdown(
-                    label="Primary Funding Source",
-                    choices=["", "Self/Family", "Scholarship", "TA/RA Position", "Education Loan", "Other"],
-                    value=""
-                )
-                additional_info = gr.Textbox(
-                    label="Additional Information (Optional)",
-                    placeholder="Program, expected duration, family situation, etc."
-                )
-                # Predefined query templates
-                query_templates = gr.Dropdown(
-                    label="Common Questions (Select or type your own below)",
-                    choices=[
-                        "",
-                        "How do I open a bank account as an international student?",
-                        "What's the best way to build credit in the US?",
-                        "How should I manage my TA/RA stipend?",
-                        "What are my options for sending/receiving money from home?",
-                        "How do CPT/OPT affect my financial situation?",
-                        "What student loan options are available to me?",
-                        "How should I budget for living expenses in the US?",
-                        "I just arrived in the US from India on an F-1 visa to start my PhD program at MIT with a teaching assistantship. I need advice on opening a bank account with minimal fees, building credit from scratch since I have no US history, sending money between India and the US at the best rates, managing my $2,500 monthly TA stipend while saving for emergencies, and understanding tax implications under the US-India tax treaty. Also, how should I financially prepare for a potential CPT internship next summer?"
-                    ],
-                    value=""
-                )
-                query = gr.Textbox(
-                    label="Your Financial Question",
-                    placeholder="Type your financial question here...",
-                    lines=4
-                )
-                # Update query box when template is selected
-                query_templates.change(
-                    fn=lambda x: x if x else "",
-                    inputs=query_templates,
-                    outputs=query
-                )
-                submit_btn = gr.Button("Get Financial Advice", variant="primary")
-                clear_btn = gr.Button("Reset")
-            with gr.Column(scale=3):
-                # Use a textbox with markdown enabled
-                with gr.Group():
-                    gr.Markdown("### Your Personalized Financial Advice")
-                    response = gr.Markdown()
-                    # Add a loading message while waiting for response
-                    submit_btn.click(
-                        fn=lambda: "## Processing Your Query\n\nConsulting specialist advisors and generating multiple financial approaches...\n\nPlease wait a moment as this may take up to a minute.",
-                        inputs=None,
-                        outputs=response,
-                        queue=False
-                    )
-        # Handle main query submission
-        submit_btn.click(
-            fn=handle_query,
-            inputs=[query, country, visa_type, university, funding, additional_info],
-            outputs=response,
-            queue=True
-        )
-        # Handle reset button
-        clear_btn.click(
-            fn=lambda: (
-                "",
-                "",
-                "",
-                "",
-                "",
-                "",
-                ""
-            ),
-            inputs=None,
-            outputs=[query, country, visa_type, university, funding, additional_info, response]
-        )
-        # Feature explanation section
-        with gr.Accordion("How This System Works", open=False):
-            gr.Markdown("""
-            ### Financial Advisory Features
-            This portal uses advanced AI with multiple agent design patterns to provide personalized financial guidance:
-            1. **Retrieval Augmented Generation (RAG)**: Uses vector embeddings to retrieve country-specific financial knowledge
-            2. **Role-based Cooperation**: Specialized agents collaborate based on their domain expertise
-               - Banking Advisor: Account setup, transfers, banking documentation
-               - Credit Builder: Credit cards, credit history building, credit scores
-               - Budget Manager: Expense tracking, savings goals, stipend management
-               - Currency Exchange Specialist: International transfers, exchange rates
-               - Student Loan Advisor: Loan options, repayment strategies
-               - Career Finance Planner: CPT/OPT financial planning, internships
-               - Legal Finance Advisor: Visa compliance, tax treaties, reporting requirements
-            3. **Voting-based Cooperation**: Specialists vote on recommendations when multiple options exist
-            4. **Self-reflection**: Legal/visa compliance check on all financial advice
-            5. **Multi-path Plan Generator**: Different financial strategies based on risk tolerance
-            The workflow log at the bottom of each response shows you exactly which components ran and in what order.
-            """)
-    return demo
-# If this is the main script being run
-if __name__ == "__main__":
-    print("Starting International Student Finance Portal with Visible Workflow...")
-    print("This implementation tests all components and shows the workflow in real-time.")
-    interface = create_interface()
-    interface.launch()

 import sys
 import time
 import json
+import threading
 from typing import List, Dict, Any, Optional
+from functools import lru_cache
 try:
     # Import required libraries
     global WORKFLOW_LOG
     WORKFLOW_LOG = []
+# =======================================
+# Tax Regulation Database
+# =======================================
+class TaxRegulationDatabase:
+    """Database of tax regulations for international students"""
+    def __init__(self):
+        """Initialize the tax regulation database"""
+        self.llm = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")
+        self.tax_regulations = {}
+        self.tax_treaties = {}
+        self.lock = threading.Lock()
+    def preload_common_countries(self):
+        """Preload tax regulations for common countries"""
+        common_countries = ["India", "China", "South Korea", "Brazil", "Saudi Arabia",
+                           "Canada", "Mexico", "Taiwan", "Japan", "Vietnam"]
+        log_workflow("Preloading tax regulations for common countries")
+        for country in common_countries:
+            # Start loading in background threads to avoid blocking startup
+            thread = threading.Thread(target=self._load_country_tax_info, args=(country,))
+            thread.daemon = True
+            thread.start()
+    def _load_country_tax_info(self, country):
+        """Load tax information for a specific country"""
+        self._get_tax_regulations(country)
+        self._get_tax_treaty(country)
+    @lru_cache(maxsize=32)
+    def _get_tax_regulations(self, country):
+        """Get tax regulations for a specific country"""
+        if country in self.tax_regulations:
+            return self.tax_regulations[country]
+        log_workflow(f"Loading tax regulations for {country}")
+        prompt = f"""
+        Provide 5 specific, factual statements about tax regulations that directly affect international students from {country} studying in the United States.
+        Focus on:
+        1. FICA tax exemption status for F-1/J-1 students from {country}
+        2. Federal income tax filing requirements for {country} students
+        3. State tax considerations specifically relevant to {country} students
+        4. Any special tax forms required for {country} citizens (beyond standard 1040NR, 8843, etc.)
+        5. Tax implications for various types of income (scholarships, stipends, OPT income, passive income)
+        Format as a list of factual, specific statements, one per line.
+        Include exact form numbers, specific dollar thresholds, and deadlines where applicable.
+        """
+        try:
+            response = self.llm.invoke(prompt)
+            regulations = [line.strip() for line in response.content.split('\n') if line.strip()]
+            with self.lock:
+                self.tax_regulations[country] = regulations
+            log_workflow(f"Loaded {len(regulations)} tax regulations for {country}")
+            return regulations
+        except Exception as e:
+            log_workflow(f"Error loading tax regulations for {country}", str(e))
+            return [f"Error retrieving tax regulations for {country}: {str(e)}"]
+    @lru_cache(maxsize=32)
+    def _get_tax_treaty(self, country):
+        """Get tax treaty information for a specific country"""
+        if country in self.tax_treaties:
+            return self.tax_treaties[country]
+        log_workflow(f"Loading tax treaty information for {country}")
+        prompt = f"""
+        Provide 5 specific, factual statements about the tax treaty between the United States and {country} that are especially relevant to students.
+        Focus on:
+        1. Specific treaty articles that apply to students/scholars
+        2. Income exemption limits with exact dollar amounts and time limits
+        3. Special provisions for research assistants or teaching assistants from {country}
+        4. Documentation required to claim treaty benefits as a {country} student
+        5. Step-by-step process for claiming treaty benefits on tax returns
+        Format as a list of factual, specific statements, one per line.
+        Include exact article numbers, specific dollar thresholds, and time periods where applicable.
+        If there is no tax treaty with {country}, state this fact and provide alternative information relevant to {country} nationals.
+        """
+        try:
+            response = self.llm.invoke(prompt)
+            treaty_info = [line.strip() for line in response.content.split('\n') if line.strip()]
+            with self.lock:
+                self.tax_treaties[country] = treaty_info
+            log_workflow(f"Loaded {len(treaty_info)} tax treaty facts for {country}")
+            return treaty_info
+        except Exception as e:
+            log_workflow(f"Error loading tax treaty for {country}", str(e))
+            return [f"Error retrieving tax treaty information for {country}: {str(e)}"]
+    def get_tax_information(self, country):
+        """Get comprehensive tax information for a specific country"""
+        regulations = self._get_tax_regulations(country)
+        treaty = self._get_tax_treaty(country)
+        return {
+            "regulations": regulations,
+            "treaty": treaty
+        }
 # Data collector for international students
 class InternationalStudentDataCollector:
     """Collects financial data for international students from different countries"""
         """Initialize the data collector with a model for generating data"""
         self.llm = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")
         self.cache = {}
+        self.tax_database = TaxRegulationDatabase()
+    def preload_common_countries(self):
+        """Preload data for common source countries"""
+        log_workflow("Preloading data for common source countries")
+        # Start tax database preloading
+        self.tax_database.preload_common_countries()
+        # Common countries to preload
+        common_countries = ["India", "China"]
+        # Preload basic information for common domains
+        for country in common_countries:
+            for domain_func in [self.get_banking_data, self.get_credit_data]:
+                thread = threading.Thread(target=domain_func, args=(country,))
+                thread.daemon = True
+                thread.start()
     def _get_data_with_caching(self, prompt_key, prompt):
         """Get data with caching to avoid repeated API calls"""
         return self._get_data_with_caching(prompt_key, legal_prompt)
+    def get_tax_data(self, country):
+        """Get comprehensive tax information for international students from specific country"""
+        return self.tax_database.get_tax_information(country)
 # =======================================
 # Knowledge Base (RAG Implementation)
     def __init__(self, domain: str):
         """Initialize the knowledge base for a specific domain"""
         self.domain = domain
+        self.vector_stores = {}  # Dictionary to store vector stores by country
+        self.retrievers = {}  # Dictionary to store retrievers by country
         self.data_collector = InternationalStudentDataCollector()
         self.embeddings = OpenAIEmbeddings()
+        self.lock = threading.Lock()
     def _initialize_for_country(self, country: str):
         """Initialize the vector store for a specific country"""
         domain_key = f"{self.domain}_{country.lower()}"
+        # Check if already initialized
+        with self.lock:
+            if country.lower() in self.vector_stores:
+                log_workflow("Using existing vector store")
+                return
+        log_workflow(f"Initializing knowledge base", {"domain": self.domain, "country": country})
         # Get country-specific data from the data collector
         if self.domain == "banking":
             domain_texts = self.data_collector.get_career_data(country)
         elif self.domain == "legal":
             domain_texts = self.data_collector.get_legal_data(country)
+        elif self.domain == "tax":
+            tax_info = self.data_collector.get_tax_data(country)
+            domain_texts = tax_info["regulations"] + tax_info["treaty"]
         else:
             domain_texts = [f"General information for {self.domain} domain for {country} international students."]
         # Create vector store with embeddings
         try:
+            vector_store = Chroma.from_texts(
                 splits,
                 self.embeddings,
                 collection_name=domain_key
             )
             # Create retriever for similarity search
+            retriever = vector_store.as_retriever(
                 search_type="similarity",
                 search_kwargs={"k": 3}
             )
+            with self.lock:
+                self.vector_stores[country.lower()] = vector_store
+                self.retrievers[country.lower()] = retriever
             log_workflow("Vector store created successfully")
         except Exception as e:
             log_workflow("Error creating vector store", str(e))
             # Initialize the vector store if needed
             self._initialize_for_country(country)
+            # Check if retriever exists for this country
+            country_key = country.lower()
+            with self.lock:
+                if country_key in self.retrievers:
+                    retriever = self.retrievers[country_key]
+                else:
+                    raise ValueError(f"Retriever not initialized for {country}")
+            # Use the retriever to find similar content
+            documents = retriever.get_relevant_documents(query)
+            results = [doc.page_content for doc in documents]
+            log_workflow(f"Retrieved {len(results)} relevant documents")
+            return results
         except Exception as e:
             log_workflow("Error in RAG retrieval, falling back to direct retrieval", str(e))
             # Fallback to direct retrieval if vector storage fails
                 return self.data_collector.get_career_data(country)
             elif self.domain == "legal":
                 return self.data_collector.get_legal_data(country)
+            elif self.domain == "tax":
+                tax_info = self.data_collector.get_tax_data(country)
+                return tax_info["regulations"] + tax_info["treaty"]
             else:
                 return [f"Information about {self.domain} for {country} international students."]
         super().__init__(name="Legal Finance Advisor", domain="legal", llm=llm)
+class TaxSpecialist(SpecialistAgent):
+    """Specialist agent for tax advice"""
+    def __init__(self, llm=None):
+        super().__init__(name="Tax Specialist", domain="tax", llm=llm)
 # =======================================
 # Coordinator Agent (Central Agent)
 # =======================================
         self.loan_advisor = StudentLoanAdvisor(self.llm)
         self.career_planner = CareerFinancePlanner(self.llm)
         self.legal_advisor = LegalFinanceAdvisor(self.llm)
+        self.tax_specialist = TaxSpecialist(self.llm)
         # Map domains to specialists
         self.specialists = {
             "currency": self.currency_specialist,
             "loans": self.loan_advisor,
             "career": self.career_planner,
+            "legal": self.legal_advisor,
+            "tax": self.tax_specialist
         }
     def _identify_relevant_specialists(self, query: str) -> List[str]:
         - loans (Student Loan Advisor: educational loans, repayment strategies)
         - career (Career Finance Planner: internships, CPT/OPT, job preparation)
         - legal (Legal Finance Advisor: visa regulations, tax implications)
+        - tax (Tax Specialist: income taxes, tax treaties, FBAR, tax forms)
         Return a comma-separated list of ONLY the relevant domain codes (e.g., "banking,credit").
         """
             if "budget" not in valid_domains and ("stipend" in query.lower() or "expense" in query.lower()):
                 valid_domains.append("budget")
+            # Add tax domain if query mentions tax
+            if "tax" not in valid_domains and "tax" in query.lower():
+                valid_domains.append("tax")
+            # Add legal domain if query mentions visa
+            if "legal" not in valid_domains and "visa" in query.lower():
                 valid_domains.append("legal")
             # Add career domain if query mentions internship, CPT, or OPT
             log_workflow("Error identifying specialists", str(e))
             # Default to essential domains if there's an error
             default_domains = ["banking", "budget"]
+            if "tax" in query.lower():
+                default_domains.append("tax")
             if "credit" in query.lower():
                 default_domains.append("credit")
             return default_domains
             "winner": winner[0],
             "votes": voting_results,
             "specialist_votes": specialist_votes
+        }