Nyha15 commited on
Commit
2938edb
·
1 Parent(s): c704d60

Refactored

Browse files
Files changed (1) hide show
  1. app.py +187 -464
app.py CHANGED
@@ -9,7 +9,9 @@ import os
9
  import sys
10
  import time
11
  import json
 
12
  from typing import List, Dict, Any, Optional
 
13
 
14
  try:
15
  # Import required libraries
@@ -70,6 +72,114 @@ def clear_workflow_log():
70
  global WORKFLOW_LOG
71
  WORKFLOW_LOG = []
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  # Data collector for international students
74
  class InternationalStudentDataCollector:
75
  """Collects financial data for international students from different countries"""
@@ -78,6 +188,24 @@ class InternationalStudentDataCollector:
78
  """Initialize the data collector with a model for generating data"""
79
  self.llm = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")
80
  self.cache = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  def _get_data_with_caching(self, prompt_key, prompt):
83
  """Get data with caching to avoid repeated API calls"""
@@ -223,6 +351,10 @@ class InternationalStudentDataCollector:
223
 
224
  return self._get_data_with_caching(prompt_key, legal_prompt)
225
 
 
 
 
 
226
 
227
  # =======================================
228
  # Knowledge Base (RAG Implementation)
@@ -234,19 +366,23 @@ class KnowledgeBase:
234
  def __init__(self, domain: str):
235
  """Initialize the knowledge base for a specific domain"""
236
  self.domain = domain
237
- self.vector_store = None
238
- self.retriever = None
239
  self.data_collector = InternationalStudentDataCollector()
240
  self.embeddings = OpenAIEmbeddings()
 
241
 
242
  def _initialize_for_country(self, country: str):
243
  """Initialize the vector store for a specific country"""
244
  domain_key = f"{self.domain}_{country.lower()}"
245
- log_workflow(f"Initializing knowledge base", {"domain": self.domain, "country": country})
246
 
247
- if self.vector_store is not None:
248
- log_workflow("Using existing vector store")
249
- return
 
 
 
 
250
 
251
  # Get country-specific data from the data collector
252
  if self.domain == "banking":
@@ -263,6 +399,9 @@ class KnowledgeBase:
263
  domain_texts = self.data_collector.get_career_data(country)
264
  elif self.domain == "legal":
265
  domain_texts = self.data_collector.get_legal_data(country)
 
 
 
266
  else:
267
  domain_texts = [f"General information for {self.domain} domain for {country} international students."]
268
 
@@ -274,17 +413,22 @@ class KnowledgeBase:
274
 
275
  # Create vector store with embeddings
276
  try:
277
- self.vector_store = Chroma.from_texts(
278
  splits,
279
  self.embeddings,
280
  collection_name=domain_key
281
  )
282
 
283
  # Create retriever for similarity search
284
- self.retriever = self.vector_store.as_retriever(
285
  search_type="similarity",
286
  search_kwargs={"k": 3}
287
  )
 
 
 
 
 
288
  log_workflow("Vector store created successfully")
289
  except Exception as e:
290
  log_workflow("Error creating vector store", str(e))
@@ -298,14 +442,19 @@ class KnowledgeBase:
298
  # Initialize the vector store if needed
299
  self._initialize_for_country(country)
300
 
301
- if self.retriever:
302
- # Use the retriever to find similar content
303
- documents = self.retriever.get_relevant_documents(query)
304
- results = [doc.page_content for doc in documents]
305
- log_workflow(f"Retrieved {len(results)} relevant documents")
306
- return results
307
- else:
308
- raise ValueError("Retriever not initialized properly")
 
 
 
 
 
309
  except Exception as e:
310
  log_workflow("Error in RAG retrieval, falling back to direct retrieval", str(e))
311
  # Fallback to direct retrieval if vector storage fails
@@ -323,6 +472,9 @@ class KnowledgeBase:
323
  return self.data_collector.get_career_data(country)
324
  elif self.domain == "legal":
325
  return self.data_collector.get_legal_data(country)
 
 
 
326
  else:
327
  return [f"Information about {self.domain} for {country} international students."]
328
 
@@ -425,6 +577,12 @@ class LegalFinanceAdvisor(SpecialistAgent):
425
  super().__init__(name="Legal Finance Advisor", domain="legal", llm=llm)
426
 
427
 
 
 
 
 
 
 
428
  # =======================================
429
  # Coordinator Agent (Central Agent)
430
  # =======================================
@@ -444,6 +602,7 @@ class CoordinatorAgent:
444
  self.loan_advisor = StudentLoanAdvisor(self.llm)
445
  self.career_planner = CareerFinancePlanner(self.llm)
446
  self.legal_advisor = LegalFinanceAdvisor(self.llm)
 
447
 
448
  # Map domains to specialists
449
  self.specialists = {
@@ -453,7 +612,8 @@ class CoordinatorAgent:
453
  "currency": self.currency_specialist,
454
  "loans": self.loan_advisor,
455
  "career": self.career_planner,
456
- "legal": self.legal_advisor
 
457
  }
458
 
459
  def _identify_relevant_specialists(self, query: str) -> List[str]:
@@ -472,6 +632,7 @@ class CoordinatorAgent:
472
  - loans (Student Loan Advisor: educational loans, repayment strategies)
473
  - career (Career Finance Planner: internships, CPT/OPT, job preparation)
474
  - legal (Legal Finance Advisor: visa regulations, tax implications)
 
475
 
476
  Return a comma-separated list of ONLY the relevant domain codes (e.g., "banking,credit").
477
  """
@@ -485,8 +646,12 @@ class CoordinatorAgent:
485
  if "budget" not in valid_domains and ("stipend" in query.lower() or "expense" in query.lower()):
486
  valid_domains.append("budget")
487
 
488
- # Add legal domain if query mentions tax or visa
489
- if "legal" not in valid_domains and ("tax" in query.lower() or "visa" in query.lower()):
 
 
 
 
490
  valid_domains.append("legal")
491
 
492
  # Add career domain if query mentions internship, CPT, or OPT
@@ -499,6 +664,8 @@ class CoordinatorAgent:
499
  log_workflow("Error identifying specialists", str(e))
500
  # Default to essential domains if there's an error
501
  default_domains = ["banking", "budget"]
 
 
502
  if "credit" in query.lower():
503
  default_domains.append("credit")
504
  return default_domains
@@ -560,448 +727,4 @@ class CoordinatorAgent:
560
  "winner": winner[0],
561
  "votes": voting_results,
562
  "specialist_votes": specialist_votes
563
- }
564
-
565
- def _generate_plans(self, financial_goal: str, constraints: str, country: str) -> Dict[str, str]:
566
- """Implement Multi-path Plan Generator pattern"""
567
- log_workflow("Multi-path Plan Generator: Creating financial plans",
568
- {"goal": financial_goal[:50], "country": country})
569
-
570
- # Create prompts for different risk approaches
571
- planning_prompt_template = f"""
572
- As a financial advisor for international students from {country}, create a {{approach}} financial strategy for:
573
-
574
- GOAL: {financial_goal}
575
-
576
- CONSTRAINTS: {constraints}
577
-
578
- Your {{approach}} strategy should include:
579
- 1. Detailed step-by-step actions with timeline
580
- 2. Specific financial products/services with exact names and costs
581
- 3. Precise breakdown of benefits and risks
582
- 4. Expected outcomes with realistic numbers
583
- 5. Mitigation strategies for potential challenges
584
-
585
- Format with clear headings, bullet points, and numbered steps.
586
- Include specific bank names, service providers, dollar amounts, and time frames.
587
- """
588
-
589
- plans = {}
590
-
591
- try:
592
- # Create conservative plan using Budget Manager
593
- log_workflow("Generating conservative plan")
594
- conservative_prompt = planning_prompt_template.format(approach="CONSERVATIVE (lowest risk)")
595
- plans["conservative"] = self.budget_manager.run(conservative_prompt, country)
596
-
597
- # Create balanced plan using Banking Advisor
598
- log_workflow("Generating balanced plan")
599
- balanced_prompt = planning_prompt_template.format(approach="BALANCED (moderate risk/reward)")
600
- plans["balanced"] = self.banking_advisor.run(balanced_prompt, country)
601
-
602
- # Create growth plan using Credit Builder
603
- log_workflow("Generating growth plan")
604
- growth_prompt = planning_prompt_template.format(approach="GROWTH-ORIENTED (higher potential returns)")
605
- plans["growth"] = self.credit_builder.run(growth_prompt, country)
606
-
607
- log_workflow("All plans generated successfully")
608
- return plans
609
- except Exception as e:
610
- log_workflow("Error generating financial plans", str(e))
611
- return {
612
- "conservative": f"Error generating conservative plan: {str(e)}",
613
- "balanced": f"Error generating balanced plan: {str(e)}",
614
- "growth": f"Error generating growth plan: {str(e)}"
615
- }
616
-
617
- def _reflect_on_recommendation(self, recommendation: str, student_profile: Dict[str, Any]) -> str:
618
- """Implement Self-reflection pattern"""
619
- country = student_profile.get("home_country", "unknown")
620
- visa_type = student_profile.get("visa_type", "unknown")
621
-
622
- log_workflow("Self-reflection: Reviewing recommendations",
623
- {"country": country, "visa_type": visa_type})
624
-
625
- reflection_prompt = f"""
626
- As a Legal Financial Advisor for international students, evaluate this financial recommendation:
627
-
628
- STUDENT PROFILE:
629
- Home Country: {country}
630
- Visa Type: {visa_type}
631
- University: {student_profile.get('university', 'unknown')}
632
- Funding: {student_profile.get('funding', 'unknown')}
633
- Additional Info: {student_profile.get('additional_info', 'none')}
634
-
635
- RECOMMENDATION:
636
- {recommendation}
637
-
638
- Please reflect on:
639
- 1. Does this recommendation fully comply with {visa_type} visa restrictions?
640
- 2. Is the advice properly tailored to {country} students' unique circumstances?
641
- 3. Are there any assumptions that might not apply to international students?
642
- 4. Could any part of this advice create legal/immigration issues?
643
- 5. Is the recommendation practical given typical international student constraints?
644
- 6. Does it address all aspects of the original query completely?
645
-
646
- Provide a detailed assessment with specific recommendations for improvement.
647
- """
648
-
649
- try:
650
- log_workflow("Generating legal reflection")
651
- reflection = self.legal_advisor.run(reflection_prompt, country)
652
- log_workflow("Reflection complete")
653
- return reflection
654
- except Exception as e:
655
- log_workflow("Error during self-reflection", str(e))
656
- return f"Unable to complete self-reflection due to an error: {str(e)}"
657
-
658
- def run(self, query: str, student_profile: Dict[str, Any]) -> str:
659
- """Orchestrate the specialist agents to create a comprehensive response"""
660
- log_workflow("COORDINATOR: Processing new query", {"query": query[:100]})
661
-
662
- country = student_profile.get("home_country", "unknown")
663
-
664
- # 1. Analyze the query to identify which specialists to consult
665
- relevant_domains = self._identify_relevant_specialists(query)
666
-
667
- # 2. Collect advice from relevant specialists
668
- specialist_advice = {}
669
- for domain in relevant_domains:
670
- if domain in self.specialists:
671
- specialist = self.specialists[domain]
672
- advice = specialist.run(query, country)
673
- specialist_advice[domain] = advice
674
-
675
- # 3. Generate multi-path financial plans for the query
676
- constraints = f"""
677
- Home Country: {country}
678
- Visa Type: {student_profile.get('visa_type', 'F-1')}
679
- University: {student_profile.get('university', 'unknown')}
680
- Funding: {student_profile.get('funding', 'unknown')}
681
- Additional Info: {student_profile.get('additional_info', 'none')}
682
- """
683
-
684
- plans = self._generate_plans(query, constraints, country)
685
-
686
- # 4. Synthesize the collected advice and plans into a coherent response
687
- log_workflow("Synthesizing comprehensive response")
688
-
689
- # Create the specialist advice text separately
690
- specialist_advice_text = "\n".join([f"--- {domain.upper()} SPECIALIST ---\n{advice[:1000]}\n" for domain, advice in specialist_advice.items()])
691
-
692
- synthesis_prompt = f"""
693
- As the coordinator for an International Student Finance Portal, synthesize specialist advice and financial plans into a comprehensive response.
694
-
695
- STUDENT:
696
- - Home Country: {country}
697
- - Visa Type: {student_profile.get('visa_type', 'F-1')}
698
- - University: {student_profile.get('university', 'unknown')}
699
- - Funding: {student_profile.get('funding', 'unknown')}
700
- - Additional Info: {student_profile.get('additional_info', 'none')}
701
-
702
- QUERY:
703
- {query}
704
-
705
- SPECIALIST ADVICE:
706
- {specialist_advice_text}
707
-
708
- FINANCIAL APPROACHES:
709
- --- CONSERVATIVE APPROACH ---
710
- {plans.get('conservative', 'No conservative plan available.')[:1000]}
711
-
712
- --- BALANCED APPROACH ---
713
- {plans.get('balanced', 'No balanced plan available.')[:1000]}
714
-
715
- --- GROWTH-ORIENTED APPROACH ---
716
- {plans.get('growth', 'No growth-oriented plan available.')[:1000]}
717
-
718
- Create a detailed response with:
719
- 1. PART 1: Direct answers to each specific aspect of the query - banking, credit, stipend management, etc.
720
- 2. PART 2: Multiple financial approaches (conservative, balanced, growth-oriented)
721
-
722
- Each section must be extremely detailed with:
723
- - Specific bank/service names
724
- - Exact documentation requirements
725
- - Step-by-step processes
726
- - Precise dollar amounts
727
- - Concrete timelines
728
-
729
- Format with clear headings, bullet points, and numbered steps.
730
- """
731
-
732
- try:
733
- # Generate the synthesized response
734
- log_workflow("Generating final synthesized response")
735
- synthesis_response = self.llm.invoke(synthesis_prompt)
736
-
737
- # 5. Self-reflection (check for international student appropriateness)
738
- log_workflow("Performing self-reflection")
739
- reflection = self._reflect_on_recommendation(synthesis_response.content, student_profile)
740
-
741
- # 6. Final response with reflection incorporated
742
- log_workflow("Creating final response with reflection incorporated")
743
- final_prompt = f"""
744
- Revise this financial advice based on legal reflection:
745
-
746
- ORIGINAL ADVICE:
747
- {synthesis_response.content}
748
-
749
- LEGAL REFLECTION:
750
- {reflection}
751
-
752
- Create a final version that:
753
- 1. Incorporates all legal considerations
754
- 2. Maintains the comprehensive nature of the original advice
755
- 3. Addresses EVERY aspect of the original query specifically and in detail:
756
- - Bank account setup (specific banks, fees, documents)
757
- - Credit building (specific cards, exact steps)
758
- - Money transfers (exact services, fees, processes)
759
- - Stipend management (precise budget breakdown)
760
- - Tax implications (specific treaty benefits, forms)
761
- - CPT/internship planning (exact timeline, requirements)
762
- 4. Includes all three financial approaches (conservative, balanced, growth)
763
-
764
- Format with clear headings, bullet points, and numbered steps.
765
- """
766
-
767
- log_workflow("Generating final response")
768
- final_response = self.llm.invoke(final_prompt)
769
- log_workflow("Response generation complete")
770
-
771
- # Return both the response and the workflow log
772
- return final_response.content
773
- except Exception as e:
774
- log_workflow("Error in coordinator synthesis", str(e))
775
-
776
- # Fallback response if synthesis fails
777
- fallback = "## Financial Advice Summary\n\n"
778
- for domain, advice in specialist_advice.items():
779
- domain_name = domain.replace("_", " ").title()
780
- fallback += f"### {domain_name} Advice\n{advice[:500]}...\n\n"
781
-
782
- fallback += "\n## Multiple Financial Approaches\n\n"
783
- for approach, plan in plans.items():
784
- approach_name = approach.replace("_", " ").title()
785
- fallback += f"### {approach_name} Approach\n{plan[:500]}...\n\n"
786
-
787
- return fallback
788
-
789
-
790
- # =======================================
791
- # Main Portal Interface
792
- # =======================================
793
-
794
- class FinancePortal:
795
- """Main interface for the International Student Finance Portal"""
796
-
797
- def __init__(self):
798
- """Initialize the finance portal with a coordinator agent"""
799
- self.coordinator = CoordinatorAgent()
800
- self.student_profiles = {}
801
-
802
- def register_student(self, student_id: str, profile: Dict[str, Any]):
803
- """Register a new student profile"""
804
- self.student_profiles[student_id] = profile
805
-
806
- def get_student_profile(self, student_id: str) -> Optional[Dict[str, Any]]:
807
- """Get a student's profile"""
808
- return self.student_profiles.get(student_id)
809
-
810
- def handle_query(self, student_id: str, query: str) -> str:
811
- """Process a student query"""
812
- profile = self.get_student_profile(student_id)
813
-
814
- if not profile:
815
- return "Please provide your profile information first."
816
-
817
- if not query or query.strip() == "":
818
- return "Please enter a specific financial question."
819
-
820
- log_workflow(f"Processing query for student {student_id}", {"query": query[:50]})
821
-
822
- # Clear workflow log for new query
823
- clear_workflow_log()
824
-
825
- try:
826
- # Process the query with the coordinator
827
- response = self.coordinator.run(query, profile)
828
-
829
- # Get the workflow log
830
- workflow_log = get_workflow_log()
831
-
832
- # Combine the response and workflow log
833
- full_response = f"{response}\n\n---\n\n{workflow_log}"
834
-
835
- return full_response
836
- except Exception as e:
837
- log_workflow(f"Error handling query", str(e))
838
-
839
- # Return the error with the workflow log
840
- workflow_log = get_workflow_log()
841
- return f"I encountered an error while processing your request: {str(e)}\n\n---\n\n{workflow_log}"
842
-
843
-
844
- def create_interface():
845
- """Create the Gradio interface for the finance portal"""
846
- portal = FinancePortal()
847
-
848
- def handle_query(query, country, visa_type, university, funding, additional_info):
849
- """Handler for query submission"""
850
- if not query or query.strip() == "":
851
- return "Please enter a financial question."
852
-
853
- if not country:
854
- return "Please select your home country."
855
-
856
- if not visa_type:
857
- return "Please select your visa type."
858
-
859
- # Create a composite student profile
860
- student_id = "current_user"
861
- profile = {
862
- "home_country": country,
863
- "visa_type": visa_type,
864
- "university": university,
865
- "funding": funding,
866
- "additional_info": additional_info
867
- }
868
-
869
- portal.register_student(student_id, profile)
870
- return portal.handle_query(student_id, query)
871
-
872
- # Create Gradio interface
873
- with gr.Blocks(title="International Student Finance Portal") as demo:
874
- gr.Markdown("# International Student Finance Portal")
875
- gr.Markdown("Get personalized financial advice tailored for international graduate students with visible workflow.")
876
-
877
- with gr.Row():
878
- with gr.Column(scale=2):
879
- country = gr.Dropdown(
880
- label="Home Country",
881
- choices=["", "India", "China", "Brazil", "Other"],
882
- value=""
883
- )
884
- visa_type = gr.Dropdown(
885
- label="Visa Type",
886
- choices=["", "F-1", "J-1", "M-1", "Other"],
887
- value=""
888
- )
889
- university = gr.Textbox(
890
- label="University",
891
- placeholder="e.g., Stanford University"
892
- )
893
- funding = gr.Dropdown(
894
- label="Primary Funding Source",
895
- choices=["", "Self/Family", "Scholarship", "TA/RA Position", "Education Loan", "Other"],
896
- value=""
897
- )
898
- additional_info = gr.Textbox(
899
- label="Additional Information (Optional)",
900
- placeholder="Program, expected duration, family situation, etc."
901
- )
902
-
903
- # Predefined query templates
904
- query_templates = gr.Dropdown(
905
- label="Common Questions (Select or type your own below)",
906
- choices=[
907
- "",
908
- "How do I open a bank account as an international student?",
909
- "What's the best way to build credit in the US?",
910
- "How should I manage my TA/RA stipend?",
911
- "What are my options for sending/receiving money from home?",
912
- "How do CPT/OPT affect my financial situation?",
913
- "What student loan options are available to me?",
914
- "How should I budget for living expenses in the US?",
915
- "I just arrived in the US from India on an F-1 visa to start my PhD program at MIT with a teaching assistantship. I need advice on opening a bank account with minimal fees, building credit from scratch since I have no US history, sending money between India and the US at the best rates, managing my $2,500 monthly TA stipend while saving for emergencies, and understanding tax implications under the US-India tax treaty. Also, how should I financially prepare for a potential CPT internship next summer?"
916
- ],
917
- value=""
918
- )
919
-
920
- query = gr.Textbox(
921
- label="Your Financial Question",
922
- placeholder="Type your financial question here...",
923
- lines=4
924
- )
925
-
926
- # Update query box when template is selected
927
- query_templates.change(
928
- fn=lambda x: x if x else "",
929
- inputs=query_templates,
930
- outputs=query
931
- )
932
-
933
- submit_btn = gr.Button("Get Financial Advice", variant="primary")
934
- clear_btn = gr.Button("Reset")
935
-
936
- with gr.Column(scale=3):
937
- # Use a textbox with markdown enabled
938
- with gr.Group():
939
- gr.Markdown("### Your Personalized Financial Advice")
940
- response = gr.Markdown()
941
-
942
- # Add a loading message while waiting for response
943
- submit_btn.click(
944
- fn=lambda: "## Processing Your Query\n\nConsulting specialist advisors and generating multiple financial approaches...\n\nPlease wait a moment as this may take up to a minute.",
945
- inputs=None,
946
- outputs=response,
947
- queue=False
948
- )
949
-
950
- # Handle main query submission
951
- submit_btn.click(
952
- fn=handle_query,
953
- inputs=[query, country, visa_type, university, funding, additional_info],
954
- outputs=response,
955
- queue=True
956
- )
957
-
958
- # Handle reset button
959
- clear_btn.click(
960
- fn=lambda: (
961
- "",
962
- "",
963
- "",
964
- "",
965
- "",
966
- "",
967
- ""
968
- ),
969
- inputs=None,
970
- outputs=[query, country, visa_type, university, funding, additional_info, response]
971
- )
972
-
973
- # Feature explanation section
974
- with gr.Accordion("How This System Works", open=False):
975
- gr.Markdown("""
976
- ### Financial Advisory Features
977
-
978
- This portal uses advanced AI with multiple agent design patterns to provide personalized financial guidance:
979
-
980
- 1. **Retrieval Augmented Generation (RAG)**: Uses vector embeddings to retrieve country-specific financial knowledge
981
-
982
- 2. **Role-based Cooperation**: Specialized agents collaborate based on their domain expertise
983
- - Banking Advisor: Account setup, transfers, banking documentation
984
- - Credit Builder: Credit cards, credit history building, credit scores
985
- - Budget Manager: Expense tracking, savings goals, stipend management
986
- - Currency Exchange Specialist: International transfers, exchange rates
987
- - Student Loan Advisor: Loan options, repayment strategies
988
- - Career Finance Planner: CPT/OPT financial planning, internships
989
- - Legal Finance Advisor: Visa compliance, tax treaties, reporting requirements
990
-
991
- 3. **Voting-based Cooperation**: Specialists vote on recommendations when multiple options exist
992
-
993
- 4. **Self-reflection**: Legal/visa compliance check on all financial advice
994
-
995
- 5. **Multi-path Plan Generator**: Different financial strategies based on risk tolerance
996
-
997
- The workflow log at the bottom of each response shows you exactly which components ran and in what order.
998
- """)
999
-
1000
- return demo
1001
-
1002
- # If this is the main script being run
1003
- if __name__ == "__main__":
1004
- print("Starting International Student Finance Portal with Visible Workflow...")
1005
- print("This implementation tests all components and shows the workflow in real-time.")
1006
- interface = create_interface()
1007
- interface.launch()
 
9
  import sys
10
  import time
11
  import json
12
+ import threading
13
  from typing import List, Dict, Any, Optional
14
+ from functools import lru_cache
15
 
16
  try:
17
  # Import required libraries
 
72
  global WORKFLOW_LOG
73
  WORKFLOW_LOG = []
74
 
75
+ # =======================================
76
+ # Tax Regulation Database
77
+ # =======================================
78
+
79
+ class TaxRegulationDatabase:
80
+ """Database of tax regulations for international students"""
81
+
82
+ def __init__(self):
83
+ """Initialize the tax regulation database"""
84
+ self.llm = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")
85
+ self.tax_regulations = {}
86
+ self.tax_treaties = {}
87
+ self.lock = threading.Lock()
88
+
89
+ def preload_common_countries(self):
90
+ """Preload tax regulations for common countries"""
91
+ common_countries = ["India", "China", "South Korea", "Brazil", "Saudi Arabia",
92
+ "Canada", "Mexico", "Taiwan", "Japan", "Vietnam"]
93
+
94
+ log_workflow("Preloading tax regulations for common countries")
95
+ for country in common_countries:
96
+ # Start loading in background threads to avoid blocking startup
97
+ thread = threading.Thread(target=self._load_country_tax_info, args=(country,))
98
+ thread.daemon = True
99
+ thread.start()
100
+
101
+ def _load_country_tax_info(self, country):
102
+ """Load tax information for a specific country"""
103
+ self._get_tax_regulations(country)
104
+ self._get_tax_treaty(country)
105
+
106
+ @lru_cache(maxsize=32)
107
+ def _get_tax_regulations(self, country):
108
+ """Get tax regulations for a specific country"""
109
+ if country in self.tax_regulations:
110
+ return self.tax_regulations[country]
111
+
112
+ log_workflow(f"Loading tax regulations for {country}")
113
+ prompt = f"""
114
+ Provide 5 specific, factual statements about tax regulations that directly affect international students from {country} studying in the United States.
115
+ Focus on:
116
+ 1. FICA tax exemption status for F-1/J-1 students from {country}
117
+ 2. Federal income tax filing requirements for {country} students
118
+ 3. State tax considerations specifically relevant to {country} students
119
+ 4. Any special tax forms required for {country} citizens (beyond standard 1040NR, 8843, etc.)
120
+ 5. Tax implications for various types of income (scholarships, stipends, OPT income, passive income)
121
+
122
+ Format as a list of factual, specific statements, one per line.
123
+ Include exact form numbers, specific dollar thresholds, and deadlines where applicable.
124
+ """
125
+
126
+ try:
127
+ response = self.llm.invoke(prompt)
128
+ regulations = [line.strip() for line in response.content.split('\n') if line.strip()]
129
+
130
+ with self.lock:
131
+ self.tax_regulations[country] = regulations
132
+
133
+ log_workflow(f"Loaded {len(regulations)} tax regulations for {country}")
134
+ return regulations
135
+ except Exception as e:
136
+ log_workflow(f"Error loading tax regulations for {country}", str(e))
137
+ return [f"Error retrieving tax regulations for {country}: {str(e)}"]
138
+
139
+ @lru_cache(maxsize=32)
140
+ def _get_tax_treaty(self, country):
141
+ """Get tax treaty information for a specific country"""
142
+ if country in self.tax_treaties:
143
+ return self.tax_treaties[country]
144
+
145
+ log_workflow(f"Loading tax treaty information for {country}")
146
+ prompt = f"""
147
+ Provide 5 specific, factual statements about the tax treaty between the United States and {country} that are especially relevant to students.
148
+ Focus on:
149
+ 1. Specific treaty articles that apply to students/scholars
150
+ 2. Income exemption limits with exact dollar amounts and time limits
151
+ 3. Special provisions for research assistants or teaching assistants from {country}
152
+ 4. Documentation required to claim treaty benefits as a {country} student
153
+ 5. Step-by-step process for claiming treaty benefits on tax returns
154
+
155
+ Format as a list of factual, specific statements, one per line.
156
+ Include exact article numbers, specific dollar thresholds, and time periods where applicable.
157
+ If there is no tax treaty with {country}, state this fact and provide alternative information relevant to {country} nationals.
158
+ """
159
+
160
+ try:
161
+ response = self.llm.invoke(prompt)
162
+ treaty_info = [line.strip() for line in response.content.split('\n') if line.strip()]
163
+
164
+ with self.lock:
165
+ self.tax_treaties[country] = treaty_info
166
+
167
+ log_workflow(f"Loaded {len(treaty_info)} tax treaty facts for {country}")
168
+ return treaty_info
169
+ except Exception as e:
170
+ log_workflow(f"Error loading tax treaty for {country}", str(e))
171
+ return [f"Error retrieving tax treaty information for {country}: {str(e)}"]
172
+
173
+ def get_tax_information(self, country):
174
+ """Get comprehensive tax information for a specific country"""
175
+ regulations = self._get_tax_regulations(country)
176
+ treaty = self._get_tax_treaty(country)
177
+
178
+ return {
179
+ "regulations": regulations,
180
+ "treaty": treaty
181
+ }
182
+
183
  # Data collector for international students
184
  class InternationalStudentDataCollector:
185
  """Collects financial data for international students from different countries"""
 
188
  """Initialize the data collector with a model for generating data"""
189
  self.llm = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")
190
  self.cache = {}
191
+ self.tax_database = TaxRegulationDatabase()
192
+
193
+ def preload_common_countries(self):
194
+ """Preload data for common source countries"""
195
+ log_workflow("Preloading data for common source countries")
196
+
197
+ # Start tax database preloading
198
+ self.tax_database.preload_common_countries()
199
+
200
+ # Common countries to preload
201
+ common_countries = ["India", "China"]
202
+
203
+ # Preload basic information for common domains
204
+ for country in common_countries:
205
+ for domain_func in [self.get_banking_data, self.get_credit_data]:
206
+ thread = threading.Thread(target=domain_func, args=(country,))
207
+ thread.daemon = True
208
+ thread.start()
209
 
210
  def _get_data_with_caching(self, prompt_key, prompt):
211
  """Get data with caching to avoid repeated API calls"""
 
351
 
352
  return self._get_data_with_caching(prompt_key, legal_prompt)
353
 
354
+ def get_tax_data(self, country):
355
+ """Get comprehensive tax information for international students from specific country"""
356
+ return self.tax_database.get_tax_information(country)
357
+
358
 
359
  # =======================================
360
  # Knowledge Base (RAG Implementation)
 
366
  def __init__(self, domain: str):
367
  """Initialize the knowledge base for a specific domain"""
368
  self.domain = domain
369
+ self.vector_stores = {} # Dictionary to store vector stores by country
370
+ self.retrievers = {} # Dictionary to store retrievers by country
371
  self.data_collector = InternationalStudentDataCollector()
372
  self.embeddings = OpenAIEmbeddings()
373
+ self.lock = threading.Lock()
374
 
375
  def _initialize_for_country(self, country: str):
376
  """Initialize the vector store for a specific country"""
377
  domain_key = f"{self.domain}_{country.lower()}"
 
378
 
379
+ # Check if already initialized
380
+ with self.lock:
381
+ if country.lower() in self.vector_stores:
382
+ log_workflow("Using existing vector store")
383
+ return
384
+
385
+ log_workflow(f"Initializing knowledge base", {"domain": self.domain, "country": country})
386
 
387
  # Get country-specific data from the data collector
388
  if self.domain == "banking":
 
399
  domain_texts = self.data_collector.get_career_data(country)
400
  elif self.domain == "legal":
401
  domain_texts = self.data_collector.get_legal_data(country)
402
+ elif self.domain == "tax":
403
+ tax_info = self.data_collector.get_tax_data(country)
404
+ domain_texts = tax_info["regulations"] + tax_info["treaty"]
405
  else:
406
  domain_texts = [f"General information for {self.domain} domain for {country} international students."]
407
 
 
413
 
414
  # Create vector store with embeddings
415
  try:
416
+ vector_store = Chroma.from_texts(
417
  splits,
418
  self.embeddings,
419
  collection_name=domain_key
420
  )
421
 
422
  # Create retriever for similarity search
423
+ retriever = vector_store.as_retriever(
424
  search_type="similarity",
425
  search_kwargs={"k": 3}
426
  )
427
+
428
+ with self.lock:
429
+ self.vector_stores[country.lower()] = vector_store
430
+ self.retrievers[country.lower()] = retriever
431
+
432
  log_workflow("Vector store created successfully")
433
  except Exception as e:
434
  log_workflow("Error creating vector store", str(e))
 
442
  # Initialize the vector store if needed
443
  self._initialize_for_country(country)
444
 
445
+ # Check if retriever exists for this country
446
+ country_key = country.lower()
447
+ with self.lock:
448
+ if country_key in self.retrievers:
449
+ retriever = self.retrievers[country_key]
450
+ else:
451
+ raise ValueError(f"Retriever not initialized for {country}")
452
+
453
+ # Use the retriever to find similar content
454
+ documents = retriever.get_relevant_documents(query)
455
+ results = [doc.page_content for doc in documents]
456
+ log_workflow(f"Retrieved {len(results)} relevant documents")
457
+ return results
458
  except Exception as e:
459
  log_workflow("Error in RAG retrieval, falling back to direct retrieval", str(e))
460
  # Fallback to direct retrieval if vector storage fails
 
472
  return self.data_collector.get_career_data(country)
473
  elif self.domain == "legal":
474
  return self.data_collector.get_legal_data(country)
475
+ elif self.domain == "tax":
476
+ tax_info = self.data_collector.get_tax_data(country)
477
+ return tax_info["regulations"] + tax_info["treaty"]
478
  else:
479
  return [f"Information about {self.domain} for {country} international students."]
480
 
 
577
  super().__init__(name="Legal Finance Advisor", domain="legal", llm=llm)
578
 
579
 
580
+ class TaxSpecialist(SpecialistAgent):
581
+ """Specialist agent for tax advice"""
582
+ def __init__(self, llm=None):
583
+ super().__init__(name="Tax Specialist", domain="tax", llm=llm)
584
+
585
+
586
  # =======================================
587
  # Coordinator Agent (Central Agent)
588
  # =======================================
 
602
  self.loan_advisor = StudentLoanAdvisor(self.llm)
603
  self.career_planner = CareerFinancePlanner(self.llm)
604
  self.legal_advisor = LegalFinanceAdvisor(self.llm)
605
+ self.tax_specialist = TaxSpecialist(self.llm)
606
 
607
  # Map domains to specialists
608
  self.specialists = {
 
612
  "currency": self.currency_specialist,
613
  "loans": self.loan_advisor,
614
  "career": self.career_planner,
615
+ "legal": self.legal_advisor,
616
+ "tax": self.tax_specialist
617
  }
618
 
619
  def _identify_relevant_specialists(self, query: str) -> List[str]:
 
632
  - loans (Student Loan Advisor: educational loans, repayment strategies)
633
  - career (Career Finance Planner: internships, CPT/OPT, job preparation)
634
  - legal (Legal Finance Advisor: visa regulations, tax implications)
635
+ - tax (Tax Specialist: income taxes, tax treaties, FBAR, tax forms)
636
 
637
  Return a comma-separated list of ONLY the relevant domain codes (e.g., "banking,credit").
638
  """
 
646
  if "budget" not in valid_domains and ("stipend" in query.lower() or "expense" in query.lower()):
647
  valid_domains.append("budget")
648
 
649
+ # Add tax domain if query mentions tax
650
+ if "tax" not in valid_domains and "tax" in query.lower():
651
+ valid_domains.append("tax")
652
+
653
+ # Add legal domain if query mentions visa
654
+ if "legal" not in valid_domains and "visa" in query.lower():
655
  valid_domains.append("legal")
656
 
657
  # Add career domain if query mentions internship, CPT, or OPT
 
664
  log_workflow("Error identifying specialists", str(e))
665
  # Default to essential domains if there's an error
666
  default_domains = ["banking", "budget"]
667
+ if "tax" in query.lower():
668
+ default_domains.append("tax")
669
  if "credit" in query.lower():
670
  default_domains.append("credit")
671
  return default_domains
 
727
  "winner": winner[0],
728
  "votes": voting_results,
729
  "specialist_votes": specialist_votes
730
+ }