mishrabp commited on
Commit
226b286
·
verified ·
1 Parent(s): bc55b00

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +36 -0
  2. README.md +68 -10
  3. common/__init__.py +0 -0
  4. common/aagents/__init__.py +0 -0
  5. common/aagents/core/__init__.py +4 -0
  6. common/aagents/core/model.py +36 -0
  7. common/aagents/google_agent.py +123 -0
  8. common/aagents/healthcare_agent.py +79 -0
  9. common/aagents/news_agent.py +80 -0
  10. common/aagents/search_agent.py +39 -0
  11. common/aagents/weather_agent.py +44 -0
  12. common/aagents/web_agent.py +27 -0
  13. common/aagents/web_research_agent.py +58 -0
  14. common/aagents/yf_agent.py +68 -0
  15. common/mcp/README.md +139 -0
  16. common/mcp/__init__.py +0 -0
  17. common/mcp/mcp_server.py +171 -0
  18. common/mcp/tools/__init__.py +0 -0
  19. common/mcp/tools/google_tools.py +141 -0
  20. common/mcp/tools/news_tools.py +206 -0
  21. common/mcp/tools/rag_tool.py +93 -0
  22. common/mcp/tools/search_tools.py +142 -0
  23. common/mcp/tools/time_tools.py +35 -0
  24. common/mcp/tools/weather_tools.py +237 -0
  25. common/mcp/tools/yf_tools.py +274 -0
  26. common/rag/rag.py +94 -0
  27. common/utility/__init__.py +0 -0
  28. common/utility/autogen_model_factory.py +88 -0
  29. common/utility/bkp/embedding_factory.py +49 -0
  30. common/utility/bkp/llm_factory.py +130 -0
  31. common/utility/bkp/llm_factory2.py +75 -0
  32. common/utility/langchain_model_factory.py +70 -0
  33. common/utility/logger.py +22 -0
  34. common/utility/model_factory_notused.py +302 -0
  35. common/utility/openai_model_factory.py +179 -0
  36. pyproject.toml +184 -0
  37. run.py +246 -0
  38. src/interview-assistant/Dockerfile +36 -0
  39. src/interview-assistant/README.md +68 -0
  40. src/interview-assistant/aagents/__init__.py +0 -0
  41. src/interview-assistant/aagents/candidate_profiler.py +54 -0
  42. src/interview-assistant/aagents/evaluator.py +36 -0
  43. src/interview-assistant/aagents/interview_designer.py +44 -0
  44. src/interview-assistant/aagents/job_analyst.py +34 -0
  45. src/interview-assistant/aagents/job_analyst_reviewer.py +17 -0
  46. src/interview-assistant/aagents/team_lead.py +21 -0
  47. src/interview-assistant/app.py +219 -0
  48. src/interview-assistant/debug_profiler.py +67 -0
  49. src/interview-assistant/teams/__init__.py +0 -0
  50. src/interview-assistant/teams/team.py +75 -0
Dockerfile ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ ENV PYTHONUNBUFFERED=1 \
4
+ DEBIAN_FRONTEND=noninteractive \
5
+ PYTHONPATH=/app/src/interview-assistant:$PYTHONPATH
6
+
7
+ WORKDIR /app
8
+
9
+ # System deps
10
+ RUN apt-get update && apt-get install -y \
11
+ git build-essential curl \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Install uv
15
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
16
+ ENV PATH="/root/.local/bin:$PATH"
17
+
18
+ # Copy project metadata
19
+ COPY pyproject.toml .
20
+ COPY uv.lock .
21
+
22
+ # Copy application code
23
+ COPY common/ ./common/
24
+ COPY src/interview-assistant/ ./src/interview-assistant/
25
+
26
+ # Install dependencies using uv, then export and install with pip to system
27
+ # We use --no-dev to exclude dev dependencies if any
28
+ RUN uv sync --frozen --no-dev && \
29
+ uv pip install -e . --system
30
+
31
+ # Copy entry point
32
+ COPY run.py .
33
+
34
+ EXPOSE 7860
35
+
36
+ CMD ["python", "run.py", "interview-assistant", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,68 @@
1
- ---
2
- title: Interview Assistant
3
- emoji: 🌍
4
- colorFrom: indigo
5
- colorTo: pink
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Interviewer Assistant
3
+ emoji: 👔
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ license: mit
10
+ short_description: Agentic AI for Resume Analysis & Interview Prep
11
+ ---
12
+
13
+ # 👔 Interviewer Assistant
14
+
15
+ A **Multi-Agent System** designed to assist HR and technical interviewers by automatically analyzing job descriptions and resumes to generate tailored interview questions and fitness scores.
16
+
17
+ ## 🚀 Features
18
+
19
+ - **🧠 Multi-Agent Swarm**:
20
+ - **Job Analyst**: Extracts key requirements from JDs.
21
+ - **Candidate Profiler**: Analyzes resumes for strengths and gaps.
22
+ - **Evaluator**: Scores candidates on Technical, Behavioral, and Leadership metrics.
23
+ - **Interview Designer**: Generates bespoke interview questions.
24
+ - **📄 Input Flexibility**: Accepts raw text for JD and Resume/LinkedIn profile.
25
+ - **📊 Structured Evaluation**: Provides a clear score and justification.
26
+
27
+ ## 🛠️ Architecture
28
+
29
+ ```
30
+ src/interview-assistant/
31
+ ├── app.py # Streamlit UI (Orchestrator)
32
+ ├── teams/ # Team Definitions
33
+ │ └── team.py # GroupChat Configuration
34
+ ├── aagents/ # Agent Definitions
35
+ │ ├── job_analyst.py
36
+ │ ├── candidate_profiler.py
37
+ │ ├── evaluator.py
38
+ │ ├── interview_designer.py
39
+ │ └── admin.py
40
+ └── Dockerfile # Deployment Configuration
41
+ ```
42
+
43
+ ## 📦 Startup
44
+
45
+ ### Local Run
46
+
47
+ 1. **Install Dependencies**:
48
+ ```bash
49
+ pip install -r src/interviewer-assistant/requirements.txt
50
+ ```
51
+
52
+ 2. **Run Application**:
53
+ ```bash
54
+ streamlit run src/interviewer-assistant/app.py
55
+ ```
56
+ The app will open at `http://localhost:8501`.
57
+
58
+ ## 🐳 Docker / Deployment
59
+
60
+ The project is packaged for **Hugging Face Spaces** (Docker SDK).
61
+
62
+ ```bash
63
+ # Build
64
+ docker build -t interviewer-assistant -f src/interviewer-assistant/Dockerfile .
65
+
66
+ # Run
67
+ docker run -p 7860:7860 interviewer-assistant
68
+ ```
common/__init__.py ADDED
File without changes
common/aagents/__init__.py ADDED
File without changes
common/aagents/core/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+
2
+ from .model import get_model_client
3
+
4
+ __all__ = ["get_model_client"]
common/aagents/core/model.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from common.utility.openai_model_factory import OpenAIModelFactory
2
+
3
+ def get_model_client(provider:str = "openai"):
4
+ if provider.lower() == "google":
5
+ return OpenAIModelFactory.get_model(
6
+ provider="google",
7
+ model_name="gemini-2.5-flash",
8
+ temperature=0
9
+ )
10
+ elif provider.lower() == "openai":
11
+ return OpenAIModelFactory.get_model(
12
+ provider="openai",
13
+ model_name="gpt-4.1-mini",
14
+ temperature=0
15
+ )
16
+ elif provider.lower() == "azure":
17
+ return OpenAIModelFactory.get_model(
18
+ provider="azure",
19
+ model_name="gpt-4o-mini",
20
+ temperature=0
21
+ )
22
+ elif provider.lower() == "groq":
23
+ return OpenAIModelFactory.get_model(
24
+ provider="groq",
25
+ model_name="gpt-4o-mini",
26
+ temperature=0
27
+ )
28
+ elif provider.lower() == "ollama":
29
+ return OpenAIModelFactory.get_model(
30
+ provider="ollama",
31
+ model_name="gpt-4o-mini",
32
+ temperature=0
33
+ )
34
+ else:
35
+ raise ValueError(f"Unsupported provider: {provider}")
36
+
common/aagents/google_agent.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Google search agent module for web search and information retrieval."""
2
+ from agents import Agent
3
+ from common.mcp.tools.google_tools import google_search, google_search_recent
4
+ from common.mcp.tools.search_tools import duckduckgo_search, fetch_page_content
5
+ from common.mcp.tools.time_tools import current_datetime
6
+ from .core.model import get_model_client
7
+
8
+ google_agent = Agent(
9
+ name="GoogleSearchAgent",
10
+ model=get_model_client(),
11
+ tools=[current_datetime, google_search, google_search_recent, duckduckgo_search, fetch_page_content],
12
+ instructions="""
13
+ You are a GoogleSearchAgent specialized in finding and retrieving information from the web.
14
+ Your role is to help users find accurate, relevant, and up-to-date information using web search.
15
+
16
+ ## Tool Priority & Usage
17
+
18
+ **PRIMARY TOOLS (Google via Serper.dev API):**
19
+
20
+ 1. 'google_search': General Google search with recent results (last 24 hours by default)
21
+ - Use for most search queries
22
+ - Returns: Title, Link, Snippet
23
+ - Input: { "query": "search terms", "num_results": 3 }
24
+
25
+ 2. 'google_search_recent': Time-filtered Google search
26
+ - Use when user specifies a time range (today, this week, this month, this year)
27
+ - Timeframes: "d" (day), "w" (week), "m" (month), "y" (year)
28
+ - Input: { "query": "search terms", "num_results": 3, "timeframe": "d" }
29
+
30
+ **FALLBACK TOOL (DuckDuckGo Search):**
31
+
32
+ 3. 'duckduckgo_search': Use ONLY when Google tools fail or SERPER_API_KEY is missing
33
+ - Provides similar search functionality
34
+ - Input: { "query": "search terms", "max_results": 5, "search_type": "text", "timelimit": "d" }
35
+
36
+ **CONTENT EXTRACTION:**
37
+
38
+ 4. 'fetch_page_content': Extract full text content from a specific URL
39
+ - Use when user wants detailed information from a specific page
40
+ - Use after search to get complete content for analysis
41
+ - Input: { "url": "https://example.com", "timeout": 3 }
42
+
43
+ **TIME CONTEXT:**
44
+
45
+ 5. 'current_datetime': Get current date/time for context
46
+ - Input: { "format": "natural" }
47
+
48
+ ## Workflow
49
+
50
+ 1. **Understand the Query**: Determine what information the user needs
51
+ - General search → use google_search
52
+ - Time-specific search → use google_search_recent with appropriate timeframe
53
+ - Deep dive into a page → use fetch_page_content after getting the URL
54
+
55
+ 2. **Try Primary Tools First**: Always attempt Google tools (Serper.dev) before fallback
56
+
57
+ 3. **Fallback if Needed**: If Google tools return an error (missing API key, no results),
58
+ automatically use duckduckgo_search
59
+
60
+ 4. **Extract Content if Needed**: If user wants detailed information or summary,
61
+ use fetch_page_content on relevant URLs from search results
62
+
63
+ 5. **Provide Context**: Use current_datetime when temporal context is important
64
+
65
+ ## Search Strategy
66
+
67
+ **For factual queries:**
68
+ - Use google_search or google_search_recent
69
+ - Summarize findings from multiple sources
70
+ - Cite sources with URLs
71
+
72
+ **For recent events/news:**
73
+ - Use google_search_recent with timeframe="d" or "w"
74
+ - Focus on most recent information
75
+ - Include publication dates if available
76
+
77
+ **For in-depth research:**
78
+ - First: Use google_search to find relevant pages
79
+ - Then: Use fetch_page_content to extract full content from top results
80
+ - Synthesize information from multiple sources
81
+
82
+ ## Output Format
83
+
84
+ Structure your response based on the query type:
85
+
86
+ **For Search Results:**
87
+
88
+ **Search Results for "[Query]"** - [Current Date]
89
+
90
+ 1. **[Title]**
91
+ - Source: [URL]
92
+ - Summary: [Snippet or extracted info]
93
+
94
+ 2. **[Next Result]**
95
+ ...
96
+
97
+ **Key Findings:**
98
+ - [Synthesized insight 1]
99
+ - [Synthesized insight 2]
100
+
101
+ **For Content Extraction:**
102
+
103
+ **Analysis of [Page Title]**
104
+
105
+ [Summarized content with key points]
106
+
107
+ Source: [URL]
108
+
109
+ ## Important Rules
110
+
111
+ - Always cite sources with URLs
112
+ - Prioritize recent information when relevant
113
+ - If API key is missing, inform user and use fallback automatically
114
+ - Never fabricate information or sources
115
+ - Synthesize information from multiple sources when possible
116
+ - Be transparent about limitations (e.g., "Based on search results from...")
117
+ - Use fetch_page_content sparingly (only when deep content is needed)
118
+ - Respect timeouts and handle errors gracefully
119
+ """,
120
+ )
121
+ google_agent.description = "A Google search agent that finds accurate, up-to-date information and recent news using Google Search."
122
+
123
+ __all__ = ["google_agent", "google_search", "google_search_recent", "duckduckgo_search", "fetch_page_content", "current_datetime"]
common/aagents/healthcare_agent.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Healthcare RAG Agent - Combines RAG retrieval with web search for comprehensive medical information."""
2
+ from agents import Agent
3
+ from common.mcp.tools.rag_tool import rag_search, UserContext
4
+ from common.mcp.tools.search_tools import duckduckgo_search, fetch_page_content
5
+ from common.mcp.tools.time_tools import current_datetime
6
+ from .core.model import get_model_client
7
+
8
+
9
+ # ---------------------------------------------------------
10
+ # Healthcare RAG Agent
11
+ # ---------------------------------------------------------
12
+ healthcare_agent = Agent[UserContext](
13
+ name="HealthcareRAGAgent",
14
+ model=get_model_client(),
15
+ tools=[rag_search, duckduckgo_search, fetch_page_content],
16
+ instructions="""
17
+ You are a healthcare information retrieval agent. You retrieve information from tools and synthesize it into well-formatted markdown responses.
18
+
19
+ ## CRITICAL RULES
20
+
21
+ 1. **NEVER use your pre-trained knowledge** - Only use tool results
22
+ 2. **ALWAYS call rag_search first** for every question
23
+ 3. **Evaluate RAG results carefully** - if content is useless (just references, acknowledgments, page numbers), call duckduckgo_search
24
+ 4. **If rag_search returns "No relevant information", MUST call duckduckgo_search**
25
+ 5. **Synthesize tool results into clear, well-structured markdown**
26
+ 6. **If both tools fail, say "I don't have information on this topic"**
27
+
28
+ ## Workflow (MANDATORY)
29
+
30
+ For EVERY question:
31
+
32
+ Step 1: Call `rag_search(query="user question")`
33
+
34
+ Step 2: Evaluate the result:
35
+ - Returns "No relevant information"? → MUST call duckduckgo_search (go to Step 3)
36
+ - Returns content BUT it's NOT useful (just references, acknowledgments, page numbers, file names, credits)? → MUST call duckduckgo_search (go to Step 3)
37
+ - Returns useful information (definitions, explanations, medical details)? → Synthesize and format (go to Step 4)
38
+
39
+ Step 3: Call `duckduckgo_search(params={"query": "user question", "max_results": 3})`
40
+
41
+ Step 4: Synthesize and format response using markdown
42
+
43
+ ## Response Format (Markdown)
44
+
45
+ ## [Topic Name]
46
+
47
+ [Brief introduction/definition]
48
+
49
+ ### Key Points
50
+ - **Point 1**: Description
51
+ - **Point 2**: Description
52
+
53
+ ### Detailed Information
54
+
55
+ [Organized paragraphs with medical details]
56
+
57
+ ---
58
+
59
+ **Source:** Knowledge Base / Web Search
60
+
61
+ **Disclaimer:** This information is for educational purposes only. Always consult a qualified healthcare provider for medical advice.
62
+
63
+ ## Critical Reminders
64
+
65
+ 🚨 You MUST:
66
+ - Call rag_search first, evaluate if content is useful
67
+ - If RAG content is useless (references/credits), call duckduckgo_search
68
+ - Use proper markdown formatting
69
+ - Cite the source
70
+
71
+ 🚨 You MUST NOT:
72
+ - Use your pre-trained knowledge
73
+ - Skip evaluating RAG content quality
74
+ - Accept useless RAG results without calling web search
75
+ """,
76
+ )
77
+ healthcare_agent.description = "A healthcare agent that combines RAG (Retrieval Augmented Generation) with web search to answer medical questions."
78
+
79
+ __all__ = ["healthcare_agent"]
common/aagents/news_agent.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """News agent module for fetching and analyzing news articles."""
2
+ from agents import Agent
3
+ from common.mcp.tools.news_tools import get_top_headlines, search_news, get_news_by_category
4
+ from common.mcp.tools.time_tools import current_datetime
5
+ from .core.model import get_model_client
6
+
7
+ news_agent = Agent(
8
+ name="NewsAgent",
9
+ model=get_model_client(),
10
+ tools=[current_datetime, get_top_headlines, search_news, get_news_by_category],
11
+ instructions="""
12
+ You are a NewsAgent specialized in fetching and analyzing recent news articles and headlines.
13
+ Your role is to provide users with up-to-date, relevant news information from reliable sources.
14
+
15
+ ## Tool Priority & Usage
16
+
17
+ **PRIMARY TOOLS (NewsAPI.org):**
18
+ 1. 'get_top_headlines': Fetch the latest top headlines for a specific country
19
+ - Use when user asks for general news, breaking news, or top stories
20
+ - Input: { "country": "us", "num_results": 5 }
21
+
22
+ 2. 'search_news': Search for news articles about a specific topic
23
+ - Use when user asks about a specific subject, company, person, or event
24
+ - Input: { "query": "topic name", "num_results": 5, "days_back": 7 }
25
+
26
+ 3. 'get_news_by_category': Fetch headlines by category
27
+ - Use when user asks for category-specific news (business, tech, sports, etc.)
28
+ - Categories: "business", "entertainment", "general", "health", "science", "sports", "technology"
29
+ - Input: { "category": "business", "country": "us", "num_results": 5 }
30
+
31
+ **TIME CONTEXT:**
32
+ 4. 'current_datetime': Use to provide current date/time context in your responses
33
+ - Input: { "format": "natural" }
34
+
35
+ ## Workflow
36
+
37
+ 1. **Determine Intent**: Understand what type of news the user wants
38
+ - General headlines → use get_top_headlines
39
+ - Topic-specific → use search_news
40
+ - Category-specific → use get_news_by_category
41
+
42
+ 2. **Execute Search**: Use the appropriate NewsAPI tool.
43
+
44
+ 3. **Include Time Context**: Use current_datetime to provide temporal context.
45
+
46
+ 4. **Format Response**: Present news in a clear, organized format with:
47
+ - Headlines/titles
48
+ - Sources
49
+ - Publication dates
50
+ - Brief summaries
51
+ - URLs for full articles
52
+
53
+ ## Output Format
54
+
55
+ Structure your response as:
56
+
57
+ **[News Category/Topic] - [Current Date]**
58
+
59
+ 1. **[Headline]**
60
+ - Source: [News Source]
61
+ - Published: [Date/Time]
62
+ - Summary: [Brief description]
63
+ - Read more: [URL]
64
+
65
+ 2. **[Next Headline]**
66
+ ...
67
+
68
+ ## Important Rules
69
+
70
+ - Always cite sources and include publication dates
71
+ - Prioritize recent news (within last 7 days unless specified otherwise)
72
+ - Never fabricate news or sources
73
+ - Present news objectively without bias
74
+ - Include URLs so users can read full articles
75
+ - Use current_datetime to ensure temporal accuracy
76
+ """,
77
+ )
78
+ news_agent.description = "A news agent that fetches top headlines and searches for news articles by category or topic."
79
+
80
+ __all__ = ["news_agent", "get_top_headlines", "search_news", "get_news_by_category", "current_datetime"]
common/aagents/search_agent.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Search agent module for comprehensive web searches."""
2
+ from agents import Agent
3
+ from common.mcp.tools.search_tools import duckduckgo_search, fetch_page_content
4
+ from common.mcp.tools.time_tools import current_datetime
5
+ from .core.model import get_model_client
6
+
7
+ search_agent = Agent(
8
+ name="Web Search Agent",
9
+ model=get_model_client(),
10
+ tools=[current_datetime, duckduckgo_search, fetch_page_content],
11
+ instructions="""
12
+ You are a highly efficient and specialized **Web Search Agent** 🌐. Your sole function is to retrieve and analyze information from the internet using the **duckduckgo_search** and **fetch_page_content** functions. You must act as a digital librarian and researcher, providing synthesized, cited, and up-to-date answers.
13
+
14
+ ## Core Directives & Priorities
15
+ 1. **Time Awareness First:** ALWAYS invoke **current_datetime** at the very beginning of your execution to establish the current temporal context. This is crucial for answering questions about "today", "yesterday", or recent events.
16
+ 2. **Search Strategy:**
17
+ * Analyze the user's request and construct 1-3 targeted search queries.
18
+ * Use **duckduckgo_search** to find relevant information. Use the 'news' type for current events.
19
+ * **Mandatory Deep Dive:** You MUST select the **top 3** most relevant search results and use **fetch_page_content** to retrieve their full text. *Do not rely solely on the short search snippets.*
20
+ 3. **Synthesis & Answer Construction:**
21
+ * Read the fetched content thoroughly.
22
+ * Synthesize the information into a coherent answer.
23
+ * **Conflict Resolution:** If sources disagree, note the discrepancy and favor the most recent or authoritative source.
24
+ * **Citations:** You **must** cite your sources. At the end of your response, list the *Title* and *URL* of the pages you used.
25
+ 4. **Clarity:** Use professional, plain language. Use headings and bullet points for readability.
26
+ 5. **Data Gaps:** If you cannot find a conclusive answer after searching and fetching, state: **"A conclusive answer could not be verified by current web search results."**
27
+
28
+ ## Workflow Example
29
+ 1. Call `current_datetime()`.
30
+ 2. Call `duckduckgo_search(query="...")`.
31
+ 3. Loop through top 3 results: `fetch_page_content(url=...)`.
32
+ 4. Synthesize findings into final answer.
33
+
34
+ **Crucially, never fabricate information. Your answer must be grounded in the text you have fetched.**
35
+ """,
36
+ )
37
+ search_agent.description = "A web search agent that retrieves information using DuckDuckGo and fetches page content for detailed answers."
38
+
39
+ __all__ = ["search_agent"]
common/aagents/weather_agent.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Web search agent module for internet queries."""
2
+ import os
3
+ from agents import Agent
4
+ from pydantic import BaseModel, Field
5
+ from common.mcp.tools.weather_tools import get_weather_forecast, search_weather_fallback_ddgs, search_weather_fallback_bs
6
+ from common.mcp.tools.time_tools import current_datetime
7
+ from .core.model import get_model_client
8
+
9
+
10
+ weather_agent = Agent(
11
+ name="WeatherAgent",
12
+ model=get_model_client(),
13
+ tools=[current_datetime, get_weather_forecast, search_weather_fallback_ddgs, search_weather_fallback_bs],
14
+ instructions="""
15
+ You are a Weather Forecast agent who forecasts weather information ONLY.
16
+ You can use the 'current_datetime' tool to determine the current date as reference for the weather forecast.
17
+ When given a query, you use the 'get_weather_forecast' tool to retrieve weather data.
18
+ If the API key is missing or the API fails to get the forecast, you use the 'search_weather_fallback_ddgs' or 'search_weather_fallback_bs' as fallback tools to perform a web search for weather information.
19
+ Tool: get_weather_forecast Input:
20
+ A JSON object with the following structure:
21
+ { "city": "The city name to get the weather for.",
22
+ "date": "Optional date in YYYY-MM-DD format to get the forecast for a specific day. If not provided, return the current weather."
23
+ }
24
+
25
+ Output the weather information MUST be in a JSON well-formatted form as below:
26
+ {
27
+ "city": "City name",
28
+ "forecasts": [
29
+ {
30
+ "date": "Date of the forecast in YYYY-MM-DD format",
31
+ "weather": {
32
+
33
+ "description": "Weather description",
34
+ "temperature": "Temperature in Fahrenheit. Report both the high and low temperatures.",
35
+ "humidity": "Humidity percentage",
36
+ "wind_speed": "Wind speed in Miles per Hour (MPH)"
37
+ }
38
+ }.
39
+ ]
40
+ """,
41
+ )
42
+ weather_agent.description = "A weather agent that provides current and forecasted weather information for specific cities."
43
+
44
+ __all__ = ["weather_agent", "get_weather_forecast", "search_weather_fallback_ddgs", "search_weather_fallback_bs"]
common/aagents/web_agent.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Web search agent module for internet queries."""
2
+ from agents import Agent
3
+ from common.mcp.tools.search_tools import duckduckgo_search, searchQuery, searchResult
4
+ from .core.model import get_model_client
5
+
6
+ web_agent = Agent(
7
+ name="WebAgent",
8
+ model=get_model_client(),
9
+ tools=[duckduckgo_search],
10
+ instructions="""
11
+ You are a WebAgent that can perform web searches to find information on the internet.
12
+ When given a query, use the 'duckduckgo_search' tool to retrieve relevant search results.
13
+ Tool: duckduckgo_search Input:
14
+ A JSON object with the following structure:
15
+ { "query": "The search query string.",
16
+ "max_results": "The maximum number of search results to return (default is 5).",
17
+ "search_type": "The type of search to perform. Options: 'text' (default) or 'news'. Use 'news' to get publication dates.",
18
+ "timelimit": "Time limit for search results. Options: 'd' (day), 'w' (week), 'm' (month), 'y' (year).",
19
+ "region": "Region for search results (e.g., 'us-en', 'uk-en'). Default is 'wt-wt' (world)."
20
+ }
21
+ """,
22
+ # output_type=AgentOutputSchema(list[searchResult], strict_json_schema=False),
23
+ # output_type=list[dict], # safer than list[searchResult],
24
+ output_type=list[searchResult],
25
+ )
26
+
27
+ __all__ = ["web_agent", "duckduckgo_search", "searchQuery", "searchResult"]
common/aagents/web_research_agent.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Web search agent module for internet queries."""
2
+ from agents import AgentOutputSchema, function_tool, Agent
3
+ from pydantic import BaseModel, Field
4
+ from common.mcp.tools.search_tools import duckduckgo_search, searchQuery, searchResult, fetch_page_content
5
+ from .core.model import get_model_client
6
+
7
+ web_research_agent = Agent(
8
+ model=get_model_client(),
9
+ tools=[duckduckgo_search, fetch_page_content],
10
+ instructions="""
11
+ You are WebResearchAgent — an advanced internet research assistant with two core abilities:
12
+
13
+ 1) Use the tool `duckduckgo_search` to discover relevant webpages for the user’s query.
14
+ 2) Use the tool `fetch_page_content` to retrieve full text content from any webpage returned by the search tool.
15
+
16
+ ===========================
17
+ AGENT RESPONSIBILITIES
18
+ ===========================
19
+
20
+ • Always begin by invoking `duckduckgo_search` to gather an initial set of webpages relevant to the user's question.
21
+
22
+ • After receiving the search results, you MUST fetch the full content for *all result URLs* by invoking
23
+ `fetch_page_content` once per URL.
24
+
25
+ • These fetch calls should be made **in parallel**:
26
+ - Do NOT wait for one fetch call to finish before issuing the next.
27
+ - Issue all fetch calls immediately after you receive the search results.
28
+
29
+ • You MUST NOT wait more than 3 seconds for any individual page to respond.
30
+ If content is missing or a fetch fails, continue with what you have.
31
+
32
+ ===========================
33
+ ANALYSIS & FINAL ANSWER
34
+ ===========================
35
+
36
+ • After search and fetch operations complete, analyze:
37
+ – the snippets from the search results
38
+ – the full content from `fetch_page_content` (for pages that responded)
39
+
40
+ • Synthesize the collected information and provide a clear, factual, concise answer.
41
+
42
+ • Your final output MUST be a structured, easy-to-read Markdown summary.
43
+
44
+ ===========================
45
+ IMPORTANT RULES
46
+ ===========================
47
+
48
+ • Never fabricate URLs or content not returned by the tools.
49
+ • Never claim to have visited pages without using `fetch_page_content`.
50
+ • Use the tools exactly as required — search first, fetch after.
51
+ • The final response should answer the user’s query using the combined evidence.
52
+ • MUST provide references to the research.
53
+ """
54
+ ,
55
+ )
56
+ web_research_agent.description = "A deep research agent that performs extensive web searches and content fetching for complex research queries."
57
+
58
+ __all__ = ["web_research_agent", "duckduckgo_search", "fetch_page_content", "searchQuery", "searchResult"]
common/aagents/yf_agent.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Yahoo Finance agent module for financial analysis and market research."""
2
+ from agents import Agent
3
+ from common.mcp.tools.yf_tools import get_summary, get_market_sentiment, get_history, get_analyst_recommendations, get_earnings_calendar
4
+ from common.mcp.tools.time_tools import current_datetime
5
+ from .core.model import get_model_client
6
+
7
+
8
+ yf_agent = Agent(
9
+ name="YahooFinanceAgent",
10
+ model=get_model_client(),
11
+ tools=[current_datetime, get_summary, get_market_sentiment, get_history, get_analyst_recommendations, get_earnings_calendar],
12
+ instructions="""
13
+ You are a specialized **Financial Analysis Agent** 💰, expert in market research, financial data retrieval, and market analysis.
14
+ Your primary role is to provide *actionable*, *data-driven*, and *concise* financial reports based on the available tools.
15
+
16
+ ## Core Directives & Priorities
17
+
18
+ 1. **Time Sensitivity:** Always use the 'current_datetime' tool to ensure all analysis is contextually relevant to the current date and time.
19
+ Financial data is extremely time-sensitive.
20
+
21
+ 2. **Financial Data Integrity:** Use the Yahoo Finance tools for specific stock/index data:
22
+ - 'get_summary': Get latest summary information and intraday price data for a ticker.
23
+ - 'get_market_sentiment': Analyze recent price changes and provide market sentiment (Bullish/Bearish/Neutral).
24
+ - 'get_history': Fetch historical price data for a given ticker.
25
+ - 'get_analyst_recommendations': Fetch latest analyst ratings (Buy/Sell/Hold) for a symbol to provide **trading recommendations**.
26
+ - 'get_earnings_calendar': Fetch upcoming earnings dates for a symbol.
27
+
28
+ Be precise about the date range and data source.
29
+
30
+ 3. **Synthesis and Analysis:** Do not just list data. You must **synthesize** financial data (prices, volume, sentiment, recommendations)
31
+ to provide a complete analytical perspective (e.g., "Stock X is up 5% today driven by strong market momentum and a generic 'Buy' rating from analysts").
32
+
33
+ 4. **Professional Clarity:** Present information in a clear, professional, and structured format.
34
+ Use numerical data and financial terminology correctly.
35
+
36
+ 5. **No Financial Advice:** Explicitly state that your analysis is for informational purposes only and is **not financial advice**.
37
+
38
+ 6. **Tool Mandatory:** For any request involving a stock, index, or current market conditions, you **must** use
39
+ the appropriate tool(s) to verify data. **Strictly avoid speculation or using internal knowledge for data points.**
40
+
41
+ ## Tool Usage Examples
42
+
43
+ Tool: current_datetime
44
+ Input: { "format": "natural" }
45
+
46
+ Tool: get_summary
47
+ Input: { "symbol": "AAPL", "period": "1d", "interval": "1h" }
48
+
49
+ Tool: get_market_sentiment
50
+ Input: { "symbol": "AAPL", "period": "1mo" }
51
+
52
+ Tool: get_analyst_recommendations
53
+ Input: { "symbol": "AAPL" }
54
+
55
+ Tool: get_earnings_calendar
56
+ Input: { "symbol": "AAPL" }
57
+
58
+ ## Output Format Guidelines
59
+
60
+ * Use **bold** for key financial metrics (e.g., Stock Symbol, Price, Volume).
61
+ * Cite the tools used to obtain the data (e.g., "Data sourced from Yahoo Finance as of [Date]").
62
+ * If a symbol or data point cannot be found, clearly state "Data for [X] is unavailable or invalid."
63
+ * Always include a disclaimer: "This analysis is for informational purposes only and is not financial advice."
64
+ """,
65
+ )
66
+ yf_agent.description = "A financial analysis agent that provides stock summaries, market sentiment, and historical data using Yahoo Finance."
67
+
68
+ __all__ = ["yf_agent", "get_summary", "get_market_sentiment", "get_history", "get_analyst_recommendations", "get_earnings_calendar", "current_datetime"]
common/mcp/README.md ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MCP Tools Server
2
+
3
+ A Model Context Protocol (MCP) server that exposes all tools from the `tools/` folder via stdio transport.
4
+
5
+ ## Features
6
+
7
+ - **Dynamic Tool Discovery**: Automatically discovers and registers all tools from the tools folder
8
+ - **Stdio Transport**: Compatible with Claude Desktop and other MCP clients
9
+ - **Comprehensive Tool Coverage**: Exposes ~13 tools across 6 categories:
10
+ - Google Search (google_tools)
11
+ - News API (news_tools)
12
+ - DuckDuckGo Search (search_tools)
13
+ - Time Utilities (time_tools)
14
+ - Weather Forecast (weather_tools)
15
+ - Yahoo Finance (yf_tools)
16
+
17
+ ## Installation
18
+
19
+ 1. Install required dependencies:
20
+ ```bash
21
+ pip install mcp requests beautifulsoup4 ddgs yfinance python-dotenv pydantic
22
+ ```
23
+
24
+ 2. Set up environment variables in `.env`:
25
+ ```bash
26
+ # Google Search (Serper.dev)
27
+ SERPER_API_KEY=your_serper_api_key
28
+
29
+ # News API
30
+ NEWS_API_KEY=your_news_api_key
31
+
32
+ # Weather API
33
+ OPENWEATHER_API_KEY=your_openweather_api_key
34
+
35
+ # Google AI (for agents)
36
+ GOOGLE_API_KEY=your_google_api_key
37
+
38
+ # Groq (for agents)
39
+ GROQ_API_KEY=your_groq_api_key
40
+ ```
41
+
42
+ ## Usage
43
+
44
+ ### Running the Server
45
+
46
+ ```bash
47
+ cd common/mcp
48
+ python mcp_server.py
49
+ ```
50
+
51
+ The server will:
52
+ 1. Discover all tools from the `tools/` folder
53
+ 2. Print registered tools to stderr
54
+ 3. Start listening on stdio for MCP protocol messages
55
+
56
+ ### Integrating with Claude Desktop
57
+
58
+ Add to your Claude Desktop config (`claude_desktop_config.json`):
59
+
60
+ ```json
61
+ {
62
+ "mcpServers": {
63
+ "tools-server": {
64
+ "command": "python",
65
+ "args": ["/absolute/path/to/agenticaiprojects/common/mcp/mcp_server.py"],
66
+ "env": {
67
+ "SERPER_API_KEY": "your_key",
68
+ "NEWS_API_KEY": "your_key",
69
+ "OPENWEATHER_API_KEY": "your_key"
70
+ }
71
+ }
72
+ }
73
+ }
74
+ ```
75
+
76
+ ### Available Tools
77
+
78
+ The server exposes the following tools:
79
+
80
+ **Google Search:**
81
+ - `google_tools.google_search` - General Google search
82
+ - `google_tools.google_search_recent` - Time-filtered Google search
83
+
84
+ **News:**
85
+ - `news_tools.get_top_headlines` - Top headlines by country
86
+ - `news_tools.search_news` - Search news by topic
87
+ - `news_tools.get_news_by_category` - News by category
88
+
89
+ **Search & Content:**
90
+ - `search_tools.duckduckgo_search` - DuckDuckGo search
91
+ - `search_tools.fetch_page_content` - Extract page content
92
+
93
+ **Time:**
94
+ - `time_tools.current_datetime` - Get current date/time
95
+
96
+ **Weather:**
97
+ - `weather_tools.get_weather_forecast` - Weather forecast via API
98
+ - `weather_tools.search_weather_fallback_ddgs` - Weather via DuckDuckGo
99
+ - `weather_tools.search_weather_fallback_bs` - Weather via web scraping
100
+
101
+ **Finance:**
102
+ - `yf_tools.get_summary` - Stock summary
103
+ - `yf_tools.get_market_sentiment` - Market sentiment analysis
104
+ - `yf_tools.get_history` - Historical stock data
105
+
106
+ ## Development
107
+
108
+ ### Adding New Tools
109
+
110
+ 1. Create a new file in `tools/` folder (e.g., `my_tools.py`)
111
+ 2. Decorate functions with `@function_tool`
112
+ 3. The server will automatically discover and register them on next restart
113
+
114
+ ### Testing
115
+
116
+ ```bash
117
+ # Test the server
118
+ cd common/mcp
119
+ python mcp_server.py
120
+
121
+ # In another terminal, you can send MCP protocol messages via stdin
122
+ # Or use an MCP client library to test
123
+ ```
124
+
125
+ ## Troubleshooting
126
+
127
+ **Tools not discovered:**
128
+ - Check that functions are decorated with `@function_tool`
129
+ - Verify the module is in the `tools/` folder
130
+ - Check stderr output for registration messages
131
+
132
+ **API errors:**
133
+ - Verify environment variables are set correctly
134
+ - Check API key validity
135
+ - Review tool-specific error messages in stderr
136
+
137
+ ## License
138
+
139
+ Part of the agenticaiprojects repository.
common/mcp/__init__.py ADDED
File without changes
common/mcp/mcp_server.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ MCP Server with stdio transport that exposes all tools from the tools folder.
4
+ """
5
+ import asyncio
6
+ import sys
7
+ import os
8
+ import inspect
9
+ import importlib
10
+ from pathlib import Path
11
+ from typing import Any, Callable
12
+
13
+ # Add parent directory to path for imports
14
+ sys.path.insert(0, str(Path(__file__).parent.parent))
15
+
16
+ from mcp.server import Server
17
+ from mcp.server.stdio import stdio_server
18
+ from mcp.types import Tool, TextContent
19
+
20
+ # Initialize MCP server
21
+ app = Server("tools-server")
22
+
23
+ # Dictionary to store all discovered tools
24
+ TOOLS_REGISTRY: dict[str, Callable] = {}
25
+
26
+ def discover_tools():
27
+ """
28
+ Dynamically discover all @function_tool decorated functions from the tools folder.
29
+ """
30
+ tools_dir = Path(__file__).parent / "tools"
31
+ tool_modules = [
32
+ "google_tools",
33
+ "news_tools",
34
+ "search_tools",
35
+ "time_tools",
36
+ "weather_tools",
37
+ "yf_tools"
38
+ ]
39
+
40
+ print(f"[MCP Server] Discovering tools from: {tools_dir}", file=sys.stderr)
41
+
42
+ for module_name in tool_modules:
43
+ try:
44
+ # Import the module
45
+ module = importlib.import_module(f"mcp.tools.{module_name}")
46
+
47
+ # Find all functions in the module
48
+ for name, obj in inspect.getmembers(module, inspect.isfunction):
49
+ # Check if it has the function_tool decorator
50
+ # The @function_tool decorator typically adds metadata to the function
51
+ if hasattr(obj, '__wrapped__') or name.startswith('_'):
52
+ continue
53
+
54
+ # Check if it's a tool by looking for common patterns
55
+ if callable(obj) and not name.startswith('_'):
56
+ # Register the tool
57
+ tool_name = f"{module_name}.{name}"
58
+ TOOLS_REGISTRY[tool_name] = obj
59
+ print(f"[MCP Server] Registered tool: {tool_name}", file=sys.stderr)
60
+
61
+ except Exception as e:
62
+ print(f"[MCP Server] Error loading module {module_name}: {e}", file=sys.stderr)
63
+
64
+ print(f"[MCP Server] Total tools registered: {len(TOOLS_REGISTRY)}", file=sys.stderr)
65
+
66
+
67
+ @app.list_tools()
68
+ async def list_tools() -> list[Tool]:
69
+ """
70
+ List all available tools.
71
+ """
72
+ tools = []
73
+
74
+ for tool_name, tool_func in TOOLS_REGISTRY.items():
75
+ # Extract function signature and docstring
76
+ sig = inspect.signature(tool_func)
77
+ doc = inspect.getdoc(tool_func) or "No description available"
78
+
79
+ # Build input schema from function parameters
80
+ properties = {}
81
+ required = []
82
+
83
+ for param_name, param in sig.parameters.items():
84
+ param_type = "string" # Default type
85
+ param_desc = ""
86
+
87
+ # Try to infer type from annotation
88
+ if param.annotation != inspect.Parameter.empty:
89
+ annotation = param.annotation
90
+ if annotation == int:
91
+ param_type = "integer"
92
+ elif annotation == bool:
93
+ param_type = "boolean"
94
+ elif annotation == float:
95
+ param_type = "number"
96
+
97
+ properties[param_name] = {
98
+ "type": param_type,
99
+ "description": param_desc or f"Parameter: {param_name}"
100
+ }
101
+
102
+ # Check if parameter is required (no default value)
103
+ if param.default == inspect.Parameter.empty:
104
+ required.append(param_name)
105
+
106
+ # Create tool definition
107
+ tool = Tool(
108
+ name=tool_name,
109
+ description=doc.split('\n')[0][:200], # First line, max 200 chars
110
+ inputSchema={
111
+ "type": "object",
112
+ "properties": properties,
113
+ "required": required
114
+ }
115
+ )
116
+ tools.append(tool)
117
+
118
+ return tools
119
+
120
+
121
+ @app.call_tool()
122
+ async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
123
+ """
124
+ Execute a tool with the provided arguments.
125
+ """
126
+ print(f"[MCP Server] Calling tool: {name} with args: {arguments}", file=sys.stderr)
127
+
128
+ if name not in TOOLS_REGISTRY:
129
+ raise ValueError(f"Tool not found: {name}")
130
+
131
+ tool_func = TOOLS_REGISTRY[name]
132
+
133
+ try:
134
+ # Call the tool function
135
+ if inspect.iscoroutinefunction(tool_func):
136
+ result = await tool_func(**arguments)
137
+ else:
138
+ result = tool_func(**arguments)
139
+
140
+ # Convert result to string if needed
141
+ if not isinstance(result, str):
142
+ result = str(result)
143
+
144
+ return [TextContent(type="text", text=result)]
145
+
146
+ except Exception as e:
147
+ error_msg = f"Error executing tool {name}: {str(e)}"
148
+ print(f"[MCP Server] {error_msg}", file=sys.stderr)
149
+ return [TextContent(type="text", text=error_msg)]
150
+
151
+
152
+ async def main():
153
+ """
154
+ Main entry point for the MCP server.
155
+ """
156
+ # Discover all tools before starting the server
157
+ discover_tools()
158
+
159
+ print(f"[MCP Server] Starting MCP server with {len(TOOLS_REGISTRY)} tools", file=sys.stderr)
160
+
161
+ # Run the server with stdio transport
162
+ async with stdio_server() as (read_stream, write_stream):
163
+ await app.run(
164
+ read_stream,
165
+ write_stream,
166
+ app.create_initialization_options()
167
+ )
168
+
169
+
170
+ if __name__ == "__main__":
171
+ asyncio.run(main())
common/mcp/tools/__init__.py ADDED
File without changes
common/mcp/tools/google_tools.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+
4
+ from agents import function_tool
5
+ from typing import Optional
6
+
7
+
8
+
9
+ # ---------------------------------------------------------
10
+ # Load environment variables
11
+ # ---------------------------------------------------------
12
+
13
+
14
+ # ============================================================
15
+ # 🔹 GOOGLE SEARCH TOOLSET (Serper.dev API)
16
+ # ============================================================
17
+
18
+ @function_tool
19
+ def google_search(query: str, num_results: int = 3) -> str:
20
+ """
21
+ Perform a general Google search using Serper.dev API.
22
+
23
+ Parameters:
24
+ -----------
25
+ query : str
26
+ The search query string, e.g., "latest Tesla stock news".
27
+ num_results : int, optional (default=3)
28
+ Maximum number of search results to return.
29
+
30
+ Returns:
31
+ --------
32
+ str
33
+ Formatted string of top search results, each including:
34
+ - Title of the page
35
+ - URL link
36
+ - Snippet / description
37
+ If no results are found or API key is missing, returns an error message.
38
+
39
+ Example:
40
+ --------
41
+ google_search("AI in finance", num_results=2)
42
+
43
+ Output:
44
+ Title: How AI is Transforming Finance
45
+ Link: https://example.com/ai-finance
46
+ Snippet: AI is increasingly used for trading, risk management...
47
+
48
+ Title: AI Applications in Banking
49
+ Link: https://example.com/ai-banking
50
+ Snippet: Banks are leveraging AI for customer service, fraud detection...
51
+ """
52
+ print(f"[DEBUG] google_search called with query='{query}', num_results={num_results}")
53
+
54
+ try:
55
+ api_key = os.getenv("SERPER_API_KEY")
56
+ if not api_key:
57
+ return "Error: SERPER_API_KEY missing in environment variables."
58
+
59
+ url = "https://google.serper.dev/search"
60
+ headers = {"X-API-KEY": api_key, "Content-Type": "application/json"}
61
+ payload = {"q": query, "num": num_results, "tbs": "qdr:d"} # results from last 24h
62
+
63
+ response = requests.post(url, headers=headers, json=payload, timeout=10)
64
+ response.raise_for_status()
65
+ data = response.json()
66
+
67
+ if "organic" not in data or not data["organic"]:
68
+ return f"No results found for query: '{query}'"
69
+
70
+ formatted_results = [
71
+ f"Title: {item.get('title')}\n"
72
+ f"Link: {item.get('link')}\n"
73
+ f"Snippet: {item.get('snippet', '')}\n"
74
+ for item in data["organic"][:num_results]
75
+ ]
76
+ return "\n".join(formatted_results)
77
+
78
+ except requests.exceptions.RequestException as e:
79
+ print(f"[DEBUG] Network error during Google search: {e}")
80
+ return f"Network error during Google search: {e}"
81
+ except Exception as e:
82
+ print(f"[DEBUG] Error performing Google search: {e}")
83
+ return f"Error performing Google search: {e}"
84
+
85
+
86
+ @function_tool
87
+ def google_search_recent(query: str, num_results: int = 3, timeframe: str = "d") -> str:
88
+ """
89
+ Perform a Google search with time-based filtering using Serper.dev API.
90
+
91
+ Parameters:
92
+ -----------
93
+ query : str
94
+ The search query string.
95
+ num_results : int, optional (default=3)
96
+ Maximum number of search results to return.
97
+ timeframe : str, optional (default="d")
98
+ Time range for results:
99
+ - "d" = past day
100
+ - "w" = past week
101
+ - "m" = past month
102
+ - "y" = past year
103
+
104
+ Returns:
105
+ --------
106
+ str
107
+ Formatted string of recent search results.
108
+ """
109
+ print(f"[DEBUG] google_search_recent called with query='{query}', timeframe={timeframe}")
110
+
111
+ try:
112
+ api_key = os.getenv("SERPER_API_KEY")
113
+ if not api_key:
114
+ return "Error: SERPER_API_KEY missing in environment variables."
115
+
116
+ url = "https://google.serper.dev/search"
117
+ headers = {"X-API-KEY": api_key, "Content-Type": "application/json"}
118
+ payload = {"q": query, "num": num_results, "tbs": f"qdr:{timeframe}"}
119
+
120
+ response = requests.post(url, headers=headers, json=payload, timeout=10)
121
+ response.raise_for_status()
122
+ data = response.json()
123
+
124
+ if "organic" not in data or not data["organic"]:
125
+ return f"No recent results found for query: '{query}'"
126
+
127
+ formatted_results = [
128
+ f"Title: {item.get('title')}\n"
129
+ f"Link: {item.get('link')}\n"
130
+ f"Snippet: {item.get('snippet', '')}\n"
131
+ for item in data["organic"][:num_results]
132
+ ]
133
+
134
+ return f"Recent results ({timeframe}):\n\n" + "\n".join(formatted_results)
135
+
136
+ except requests.exceptions.RequestException as e:
137
+ print(f"[DEBUG] Network error: {e}")
138
+ return f"Network error during Google search: {e}"
139
+ except Exception as e:
140
+ print(f"[DEBUG] Error: {e}")
141
+ return f"Error performing Google search: {e}"
common/mcp/tools/news_tools.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+
4
+ from agents import function_tool
5
+ from typing import Optional
6
+ import datetime
7
+
8
+
9
+
10
+ # ---------------------------------------------------------
11
+ # Load environment variables
12
+ # ---------------------------------------------------------
13
+
14
+
15
+ # ============================================================
16
+ # 🔹 NEWS TOOLSET (NewsAPI.org)
17
+ # ============================================================
18
+
19
+ def _search_news(query: str, num_results: int = 5, days_back: int = 7) -> str:
20
+
21
+ print(f"[DEBUG] search_news called with query='{query}', num_results={num_results}, days_back={days_back}")
22
+
23
+ try:
24
+ api_key = os.getenv("NEWS_API_KEY")
25
+ if not api_key:
26
+ return "Error: NEWS_API_KEY missing in environment variables."
27
+
28
+ # Calculate date range
29
+ today = datetime.datetime.utcnow()
30
+ from_date = (today - datetime.timedelta(days=days_back)).strftime('%Y-%m-%dT%H:%M:%SZ')
31
+
32
+ url = "https://newsapi.org/v2/everything"
33
+ params = {
34
+ "q": query,
35
+ "pageSize": num_results,
36
+ "apiKey": api_key,
37
+ "sortBy": "publishedAt",
38
+ "language": "en",
39
+ "from": from_date
40
+ }
41
+
42
+ response = requests.get(url, params=params, timeout=10)
43
+ response.raise_for_status()
44
+ data = response.json()
45
+
46
+ if not data.get("articles"):
47
+ return f"No news found for query: '{query}'"
48
+
49
+ formatted = []
50
+ for article in data["articles"][:num_results]:
51
+ formatted.append(
52
+ f"📰 {article.get('title')}\n"
53
+ f" Source: {article.get('source', {}).get('name')}\n"
54
+ f" Published: {article.get('publishedAt', 'N/A')}\n"
55
+ f" URL: {article.get('url')}\n"
56
+ )
57
+
58
+ return f"News Search Results for '{query}' (last {days_back} days):\n\n" + "\n".join(formatted)
59
+
60
+ except requests.exceptions.RequestException as e:
61
+ print(f"[DEBUG] Network error: {e}")
62
+ return f"Network error while calling News API: {e}"
63
+ except Exception as e:
64
+ print(f"[DEBUG] Error: {e}")
65
+ return f"Unexpected error fetching news: {e}"
66
+
67
+ @function_tool
68
+ def get_top_headlines(country: str = "us", num_results: int = 5) -> str:
69
+ """
70
+ Fetch the latest top headlines for a country using NewsAPI.org.
71
+
72
+ Parameters:
73
+ -----------
74
+ country : str, optional (default="us")
75
+ Two-letter country code (e.g., "us", "gb", "in").
76
+ num_results : int, optional (default=5)
77
+ Number of articles to fetch.
78
+
79
+ Returns:
80
+ --------
81
+ str
82
+ Formatted headlines with title, source, published date, and URL.
83
+ If API key is missing or no results found, returns an error message.
84
+ """
85
+ print(f"[DEBUG] get_top_headlines called for country={country}, num_results={num_results}")
86
+
87
+ try:
88
+ api_key = os.getenv("NEWS_API_KEY")
89
+ if not api_key:
90
+ return "Error: NEWS_API_KEY missing in environment variables."
91
+
92
+ url = "https://newsapi.org/v2/top-headlines"
93
+ params = {
94
+ "country": country,
95
+ "pageSize": num_results,
96
+ "apiKey": api_key
97
+ }
98
+
99
+ response = requests.get(url, params=params, timeout=10)
100
+ response.raise_for_status()
101
+ data = response.json()
102
+
103
+ if not data.get("articles"):
104
+ return f"No top headlines found for country: {country}"
105
+
106
+ formatted = []
107
+ for article in data["articles"][:num_results]:
108
+ formatted.append(
109
+ f"📰 {article.get('title')}\n"
110
+ f" Source: {article.get('source', {}).get('name')}\n"
111
+ f" Published: {article.get('publishedAt', 'N/A')}\n"
112
+ f" URL: {article.get('url')}\n"
113
+ )
114
+
115
+ return f"Top Headlines ({country.upper()}):\n\n" + "\n".join(formatted)
116
+
117
+ except requests.exceptions.RequestException as e:
118
+ print(f"[DEBUG] Network error: {e}")
119
+ return f"Network error while calling News API: {e}"
120
+ except Exception as e:
121
+ print(f"[DEBUG] Error: {e}")
122
+ return f"Unexpected error fetching news: {e}"
123
+
124
+
125
+ @function_tool
126
+ def search_news(query: str, num_results: int = 5, days_back: int = 7) -> str:
127
+ """
128
+ Search for recent news articles about a specific topic using NewsAPI.org.
129
+
130
+ Parameters:
131
+ -----------
132
+ query : str
133
+ Keyword or topic to search (e.g., "Tesla earnings", "AI healthcare").
134
+ num_results : int, optional (default=5)
135
+ Number of articles to fetch.
136
+ days_back : int, optional (default=7)
137
+ Number of days to look back for articles (1-30).
138
+
139
+ Returns:
140
+ --------
141
+ str
142
+ Formatted news articles with title, source, published date, and URL.
143
+ If API key is missing or no results found, returns an error message.
144
+ """
145
+ return _search_news(query, num_results, days_back)
146
+
147
+
148
+ @function_tool
149
+ def get_news_by_category(category: str = "business", country: str = "us", num_results: int = 5) -> str:
150
+ """
151
+ Fetch top headlines by category using NewsAPI.org.
152
+
153
+ Parameters:
154
+ -----------
155
+ category : str, optional (default="business")
156
+ News category: "business", "entertainment", "general", "health",
157
+ "science", "sports", "technology".
158
+ country : str, optional (default="us")
159
+ Two-letter country code.
160
+ num_results : int, optional (default=5)
161
+ Number of articles to fetch.
162
+
163
+ Returns:
164
+ --------
165
+ str
166
+ Formatted headlines for the specified category.
167
+ """
168
+ print(f"[DEBUG] get_news_by_category called for category={category}, country={country}")
169
+
170
+ try:
171
+ api_key = os.getenv("NEWS_API_KEY")
172
+ if not api_key:
173
+ return "Error: NEWS_API_KEY missing in environment variables."
174
+
175
+ url = "https://newsapi.org/v2/top-headlines"
176
+ params = {
177
+ "category": category,
178
+ "country": country,
179
+ "pageSize": num_results,
180
+ "apiKey": api_key
181
+ }
182
+
183
+ response = requests.get(url, params=params, timeout=10)
184
+ response.raise_for_status()
185
+ data = response.json()
186
+
187
+ if not data.get("articles"):
188
+ return f"No headlines found for category: {category}"
189
+
190
+ formatted = []
191
+ for article in data["articles"][:num_results]:
192
+ formatted.append(
193
+ f"📰 {article.get('title')}\n"
194
+ f" Source: {article.get('source', {}).get('name')}\n"
195
+ f" Published: {article.get('publishedAt', 'N/A')}\n"
196
+ f" URL: {article.get('url')}\n"
197
+ )
198
+
199
+ return f"Top {category.capitalize()} Headlines ({country.upper()}):\n\n" + "\n".join(formatted)
200
+
201
+ except requests.exceptions.RequestException as e:
202
+ print(f"[DEBUG] Network error: {e}")
203
+ return f"Network error while calling News API: {e}"
204
+ except Exception as e:
205
+ print(f"[DEBUG] Error: {e}")
206
+ return f"Unexpected error fetching news: {e}"
common/mcp/tools/rag_tool.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """RAG Search Tool - Search the local healthcare knowledge base"""
2
+ import os
3
+ from pathlib import Path
4
+ from agents import function_tool, RunContextWrapper
5
+
6
+ from common.rag.rag import Retriever
7
+ from dataclasses import dataclass
8
+
9
+ @dataclass
10
+ class UserContext:
11
+ uid: str
12
+ db_path: str = ""
13
+ file_path: str = ""
14
+ similarity_threshold: float = 0.4 # FAISS L2 distance threshold for RAG relevance
15
+
16
+
17
+ # ---------------------------------------------------------
18
+ # RAG Search Tool
19
+ # ---------------------------------------------------------
20
+ @function_tool
21
+ def rag_search(wrapper: RunContextWrapper[UserContext], query: str) -> str:
22
+ """
23
+ Search the local healthcare knowledge base for relevant information.
24
+
25
+ Args:
26
+ query: The medical question or topic to search for
27
+
28
+ Returns:
29
+ Relevant information from the healthcare knowledge base
30
+ """
31
+ print(f"[DEBUG] RAG_SEARCH called with query: '{query}'")
32
+
33
+ # Get similarity threshold from user context
34
+ similarity_threshold = wrapper.context.similarity_threshold
35
+ print(f"[DEBUG] RAG_SEARCH: Using similarity threshold: {similarity_threshold}")
36
+
37
+ try:
38
+ # Initialize retriever with user context
39
+ retriever = Retriever(
40
+ db_path=wrapper.context.db_path,
41
+ file_path=wrapper.context.file_path
42
+ )
43
+
44
+ # Get results with similarity scores
45
+ results_with_scores = retriever.retrieve_with_scores(query, k=5) # Increased from 4 to 5
46
+
47
+ if not results_with_scores:
48
+ print("[DEBUG] RAG_SEARCH: No results found in knowledge base")
49
+ return "No relevant information found in the knowledge base."
50
+
51
+ print(f"[DEBUG] RAG_SEARCH: Found {len(results_with_scores)} results")
52
+
53
+ # Check if the best match meets the threshold
54
+ # FAISS returns (document, distance) where lower distance = better match
55
+ best_score = results_with_scores[0][1]
56
+ print(f"[DEBUG] RAG_SEARCH: Best similarity score (distance): {best_score:.4f} (threshold: {similarity_threshold})")
57
+
58
+ if best_score > similarity_threshold:
59
+ print(f"[DEBUG] RAG_SEARCH: Best match score {best_score:.4f} is above threshold {similarity_threshold}")
60
+ print("[DEBUG] RAG_SEARCH: Results not relevant enough, triggering web search fallback")
61
+ return "No relevant information found in the knowledge base."
62
+
63
+ print(f"[DEBUG] RAG_SEARCH: Results are relevant (score: {best_score:.4f} <= {similarity_threshold})")
64
+
65
+ # Log all scores for debugging
66
+ all_scores = [f"{score:.4f}" for _, score in results_with_scores]
67
+ print(f"[DEBUG] RAG_SEARCH: All scores: {', '.join(all_scores)}")
68
+
69
+ # Format results - only include documents that meet the similarity threshold
70
+ formatted_results = []
71
+ for i, (doc, score) in enumerate(results_with_scores[:5], 1): # Top 5 results
72
+ if score <= similarity_threshold:
73
+ content = doc.page_content.strip()
74
+ formatted_results.append(f"Result {i} (score: {score:.4f}):\n{content}\n")
75
+
76
+ if not formatted_results:
77
+ print("[DEBUG] RAG_SEARCH: No results met the similarity threshold")
78
+ print("[DEBUG] RAG_SEARCH: Triggering web search fallback")
79
+ return "No relevant information found in the knowledge base."
80
+
81
+ result_text = "\n".join(formatted_results)
82
+ print(f"[DEBUG] RAG_SEARCH: Returning {len(formatted_results)} results, total length: {len(result_text)} characters")
83
+ print(f"[DEBUG] RAG_SEARCH: First 300 chars: {result_text[:300]}...")
84
+
85
+ return result_text
86
+
87
+ except Exception as e:
88
+ print(f"[DEBUG] RAG_SEARCH: Error occurred - {str(e)}")
89
+ return f"Error retrieving from knowledge base: {str(e)}"
90
+
91
+
92
+
93
+ __all__ = ["rag_search", "retriever"]
common/mcp/tools/search_tools.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from ddgs import DDGS
3
+ from agents import function_tool
4
+
5
+ from pydantic import BaseModel, Field
6
+ from bs4 import BeautifulSoup
7
+ from typing import Optional
8
+
9
+
10
+
11
+ # ---------------------------------------------------------
12
+ # Load environment variables
13
+ # ---------------------------------------------------------
14
+
15
+
16
+ # ---------------------- MODELS ---------------------------
17
+ class searchQuery(BaseModel):
18
+ query: str = Field(..., description="The search query string.")
19
+ max_results: int = Field(5, description="The maximum number of search results to return.")
20
+ search_type: str = Field(
21
+ "text",
22
+ description="Search type: 'text' (default) or 'news'. Use 'news' to get publication dates."
23
+ )
24
+ timelimit: str = Field(
25
+ 'd',
26
+ description="Time limit for search results: 'd' (day), 'w' (week), 'm' (month), 'y' (year)."
27
+ )
28
+ region: str = Field("us-en", description="Region for search results (e.g., 'us-en').")
29
+
30
+
31
+ class searchResult(BaseModel):
32
+ title: str
33
+ link: str
34
+ snippet: str
35
+ datetime: Optional[str] = None
36
+
37
+
38
+ # ---------------------- PAGE FETCH TOOL ---------------------------
39
+ def _fetch_page_content(url: str, timeout: int = 3) -> Optional[str]:
40
+ """Fetch and extract text content from a web page."""
41
+ print(f"[DEBUG] fetch_page_content called with: {url} - timeout: {timeout}")
42
+ try:
43
+ headers = {
44
+ 'User-Agent': (
45
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
46
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
47
+ 'Chrome/91.0.4472.124 Safari/537.36'
48
+ )
49
+ }
50
+ response = requests.get(url, headers=headers, timeout=timeout)
51
+ response.raise_for_status()
52
+
53
+ soup = BeautifulSoup(response.content, 'html.parser')
54
+
55
+ # Remove irrelevant elements
56
+ for tag in soup(["script", "style", "nav", "footer", "header"]):
57
+ tag.decompose()
58
+
59
+ # Extract text
60
+ text = soup.get_text(separator='\n', strip=True)
61
+
62
+ # Clean whitespace
63
+ lines = (line.strip() for line in text.splitlines())
64
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
65
+ text = '\n'.join(chunk for chunk in chunks if chunk)
66
+
67
+ return text
68
+ except Exception as e:
69
+ print(f"[WARNING] Failed to fetch content from {url}: {str(e)}")
70
+ return None
71
+
72
+
73
+ @function_tool
74
+ def fetch_page_content(url: str, timeout: int = 3) -> Optional[str]:
75
+ """Fetch and extract text content from a web page."""
76
+ return _fetch_page_content(url, timeout)
77
+
78
+
79
+ # ---------------------- SEARCH TOOL ---------------------------
80
+ def _duckduckgo_search(params: searchQuery) -> list[dict]:
81
+ """Perform a DuckDuckGo search and return only snippets.
82
+ No page content fetched here."""
83
+ print(f"[DEBUG] duckduckgo_search called with: {params}")
84
+
85
+ results = []
86
+ with DDGS() as ddgs:
87
+ if params.search_type == "news":
88
+ search_results = ddgs.news(
89
+ params.query,
90
+ max_results=params.max_results,
91
+ timelimit=params.timelimit,
92
+ region=params.region
93
+ )
94
+ for result in search_results:
95
+ results.append(
96
+ searchResult(
97
+ title=result.get("title", ""),
98
+ link=result.get("url", ""),
99
+ snippet=result.get("body", ""),
100
+ datetime=result.get("date", "")
101
+ ).model_dump()
102
+ )
103
+ else:
104
+ search_results = ddgs.text(
105
+ params.query,
106
+ max_results=params.max_results,
107
+ timelimit=params.timelimit,
108
+ region=params.region
109
+ )
110
+ for result in search_results:
111
+ results.append(
112
+ searchResult(
113
+ title=result.get("title", ""),
114
+ link=result.get("href", ""),
115
+ snippet=result.get("body", "")
116
+ ).model_dump()
117
+ )
118
+
119
+ print(f"[DEBUG] duckduckgo_search returning {len(results)} results")
120
+ return results
121
+
122
+ @function_tool
123
+ def duckduckgo_search(query: str, max_results: int = 5, search_type: str = "text", timelimit: str = "d", region: str = "us-en") -> list[dict]:
124
+ """
125
+ Perform a DuckDuckGo search and return only snippets.
126
+
127
+ Args:
128
+ query: The search query string.
129
+ max_results: The maximum number of search results to return (default: 5).
130
+ search_type: Search type: 'text' (default) or 'news'. Use 'news' to get publication dates.
131
+ timelimit: Time limit for search results: 'd' (day), 'w' (week), 'm' (month), 'y' (year).
132
+ region: Region for search results (e.g., 'us-en').
133
+ """
134
+ params = searchQuery(
135
+ query=query,
136
+ max_results=max_results,
137
+ search_type=search_type,
138
+ timelimit=timelimit,
139
+ region=region
140
+ )
141
+ return _duckduckgo_search(params)
142
+
common/mcp/tools/time_tools.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from agents import function_tool
3
+ # from ..common.utility.logger import log_call
4
+
5
+
6
+
7
+ @function_tool
8
+ # @log_call
9
+ def current_datetime(format: str = "natural") -> str:
10
+ """
11
+ Returns the current date and time as a formatted string.
12
+
13
+ Args:
14
+ format (str): Format style for the datetime. Options:
15
+ - "natural" (default): "Saturday, December 7, 2025 at 3:59 PM"
16
+ - "natural_short": "Dec 7, 2025 at 3:59 PM"
17
+ - "natural_full": "Saturday, December 7, 2025 at 3:59:30 PM CST"
18
+ - Custom strftime format string (e.g., "%Y-%m-%d %H:%M:%S")
19
+
20
+ Returns:
21
+ str: Current date and time in the specified format
22
+ """
23
+ print(f"[DEBUG] current_datetime called with format='{format}'")
24
+ now = datetime.now()
25
+
26
+ # Natural format options
27
+ if format == "natural":
28
+ return now.strftime("%A, %B %d, %Y at %I:%M %p")
29
+ elif format == "natural_short":
30
+ return now.strftime("%b %d, %Y at %I:%M %p")
31
+ elif format == "natural_full":
32
+ return now.strftime("%A, %B %d, %Y at %I:%M:%S %p %Z")
33
+ else:
34
+ # Custom format string
35
+ return now.strftime(format)
common/mcp/tools/weather_tools.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import requests
4
+ import datetime
5
+
6
+ from typing import Optional
7
+
8
+ from ddgs import DDGS
9
+ from agents import function_tool
10
+
11
+
12
+
13
+ # ---------------------------------------------------------
14
+ # Load environment variables
15
+ # ---------------------------------------------------------
16
+
17
+
18
+ @function_tool
19
+ def get_weather_forecast(city: str, date: Optional[str] = None) -> str:
20
+ """
21
+ PRIMARY TOOL: Fetch weather using OpenWeatherMap API.
22
+ """
23
+ print(f"[DEBUG] Primary API get_weather_forecast called for city={city}")
24
+
25
+ api_key = os.getenv("OPENWEATHER_API_KEY")
26
+ if not api_key:
27
+ return "Error: OPENWEATHER_API_KEY missing. Please use the fallback search tool."
28
+
29
+ url = "https://api.openweathermap.org/data/2.5/forecast"
30
+
31
+ try:
32
+ response = requests.get(
33
+ url,
34
+ params={"q": city, "appid": api_key, "units": "metric"},
35
+ timeout=5
36
+ )
37
+ data = response.json()
38
+ except Exception as e:
39
+ return f"Error calling weather API: {str(e)}"
40
+
41
+ if str(data.get("cod")) != "200":
42
+ return f"Error from API: {data.get('message', 'Unknown error')}"
43
+
44
+ # Build the report string
45
+ report_lines = []
46
+ found_date = False
47
+
48
+ for entry in data.get("list", []):
49
+ dt_txt = entry["dt_txt"].split(" ")[0]
50
+
51
+ if date and dt_txt != date:
52
+ continue
53
+
54
+ found_date = True
55
+ desc = entry['weather'][0]['description'].capitalize()
56
+ temp = entry['main']['temp']
57
+ hum = entry['main']['humidity']
58
+ wind = entry['wind']['speed']
59
+
60
+ report_lines.append(f"{dt_txt}: {desc}, Temp: {temp}°C, Humidity: {hum}%, Wind: {wind} m/s")
61
+
62
+ # Handle "Date not found" case
63
+ if date and not found_date:
64
+ return f"API valid, but date {date} is out of range (5-day limit). Try the search fallback tool."
65
+
66
+ final_report = "\n".join(report_lines)
67
+
68
+ return f"API Forecast for {city}:\n{final_report}"
69
+
70
+ # ---------------------------------------------------------
71
+ # Tool 2: Web Search Fallback (Secondary)
72
+ # ---------------------------------------------------------
73
+
74
+ @function_tool
75
+ def search_weather_fallback_ddgs(city: str, date: Optional[str] = None) -> str:
76
+ """
77
+ SECONDARY TOOL: Search-based fallback that produces an API-like structured forecast.
78
+ """
79
+ print(f"[DEBUG] Fallback API (DDGS) called for city={city}, date={date}")
80
+
81
+ # --- Build Query ---
82
+ try:
83
+ if date:
84
+ try:
85
+ dt_obj = datetime.strptime(date, "%Y-%m-%d")
86
+ natural_date = dt_obj.strftime("%B %d, %Y")
87
+ month_name = dt_obj.strftime("%B")
88
+ except ValueError:
89
+ natural_date = date
90
+ month_name = ""
91
+ else:
92
+ natural_date = datetime.now().strftime("%B %d, %Y")
93
+ month_name = natural_date.split()[0] # Month name
94
+
95
+ query = f"weather {city} {natural_date}"
96
+ print(f"[DEBUG] Search query: {query}")
97
+
98
+ # --- Perform Search ---
99
+ results = list(DDGS().text(query, max_results=3))
100
+ print(f"[DEBUG] Number of search results: {len(results)}")
101
+
102
+ if not results:
103
+ return f"Web Estimated Forecast for {city}:\nNo reliable search data found."
104
+
105
+ # --- Aggregate Text ---
106
+ full_text = " ".join([r.get("body", "") for r in results])
107
+
108
+ # --- Extract Values with Robust Regex ---
109
+ temp_match = re.findall(r'(-?\d+)\s*(?:°|deg|C|F)', full_text, re.I)
110
+ temperature = temp_match[0] if temp_match else "?"
111
+
112
+ humidity_match = re.findall(r'(\d+)\s*%', full_text)
113
+ humidity = humidity_match[0] if humidity_match else "?"
114
+
115
+ wind_match = re.findall(r'(\d+)\s*(?:mph|km/h|m/s)', full_text, re.I)
116
+ wind = wind_match[0] if wind_match else "?"
117
+
118
+ # --- Condition ---
119
+ # Take first word(s) of first title as best guess
120
+ condition_raw = results[0].get("title", "Unknown").split("-")[0].strip()
121
+ condition = condition_raw[0].upper() + condition_raw[1:] if condition_raw else "Unknown"
122
+
123
+ # --- Construct API-like Forecast ---
124
+ forecast = (
125
+ f"Web Estimated Forecast for {city}:\n"
126
+ f"{natural_date}: {condition}, Temp: {temperature}° (approx), "
127
+ f"Humidity: {humidity}%, Wind: {wind}\n"
128
+ )
129
+
130
+ # Optional: add raw snippets for debugging
131
+ # snippet_block = "\nSearch Snippets (Raw):\n" + "\n".join(
132
+ # f"- {r['title']}: {r['body']}" for r in results
133
+ # )
134
+ # return forecast + snippet_block
135
+
136
+ return forecast
137
+
138
+ except Exception as e:
139
+ print(f"[DEBUG] Error in fallback: {e}")
140
+ return f"Error performing web search: {str(e)}"
141
+
142
+
143
+ import requests
144
+ from bs4 import BeautifulSoup
145
+ import re
146
+ from typing import Optional
147
+ from agents import function_tool
148
+ from datetime import datetime
149
+
150
+ @function_tool
151
+ def search_weather_fallback_bs(city: str, date: Optional[str] = None) -> str:
152
+ """
153
+ SECONDARY TOOL: Web-scraping fallback using BeautifulSoup.
154
+ Produces an API-like structured forecast.
155
+ """
156
+ import requests
157
+ from bs4 import BeautifulSoup
158
+ import re
159
+ from datetime import datetime
160
+
161
+ print(f"[DEBUG] Fallback API (BeautifulSoup) called for city={city}, date={date}")
162
+
163
+ try:
164
+ # --- Build Query ---
165
+ if date:
166
+ try:
167
+ dt_obj = datetime.strptime(date, "%Y-%m-%d")
168
+ natural_date = dt_obj.strftime("%B %d, %Y")
169
+ except ValueError:
170
+ natural_date = date
171
+ else:
172
+ natural_date = datetime.now().strftime("%B %d, %Y")
173
+
174
+ query = f"weather {city} {natural_date}"
175
+ print(f"[DEBUG] Search query: {query}")
176
+
177
+ # --- DuckDuckGo Search ---
178
+ search_url = f"https://duckduckgo.com/html/?q={query.replace(' ', '+')}"
179
+ headers = {"User-Agent": "Mozilla/5.0"}
180
+ response = requests.get(search_url, headers=headers, timeout=5)
181
+ if response.status_code != 200:
182
+ return f"Error fetching search results: {response.status_code}"
183
+
184
+ soup = BeautifulSoup(response.text, "html.parser")
185
+ results = []
186
+ for result in soup.select(".result__body"):
187
+ title_tag = result.select_one(".result__title a")
188
+ snippet_tag = result.select_one(".result__snippet")
189
+ if title_tag and snippet_tag:
190
+ results.append({
191
+ "title": title_tag.get_text(strip=True),
192
+ "body": snippet_tag.get_text(strip=True)
193
+ })
194
+
195
+ if not results:
196
+ return f"Web Estimated Forecast for {city}:\nNo reliable search data found."
197
+
198
+ # --- Aggregate Text ---
199
+ full_text = " ".join([r["body"] for r in results])
200
+
201
+ # --- Extract Temperature ---
202
+ temp_matches = re.findall(r'(-?\d{1,2})\s*(?:°|deg|C|F)', full_text, re.I)
203
+ temperature = temp_matches[0] if temp_matches else "?"
204
+
205
+ # --- Extract Humidity ---
206
+ humidity_matches = re.findall(r'(\d{1,3})\s*%', full_text)
207
+ humidity = humidity_matches[0] if humidity_matches else "?"
208
+
209
+ # --- Extract Wind ---
210
+ wind_matches = re.findall(r'(\d{1,3})\s*(?:mph|km/h|m/s)', full_text, re.I)
211
+ wind = wind_matches[0] if wind_matches else "?"
212
+
213
+ # --- Extract Condition ---
214
+ # Look in all results first, fallback to first title
215
+ condition = "Unknown"
216
+ for r in results:
217
+ m = re.search(r'(clear|sunny|cloudy|rain|snow|storm|fog|mist)', r["body"], re.I)
218
+ if m:
219
+ condition = m.group(1).capitalize()
220
+ break
221
+ if condition == "Unknown":
222
+ # Fallback
223
+ condition_raw = results[0]["title"].split("-")[0].strip()
224
+ condition = condition_raw[0].upper() + condition_raw[1:] if condition_raw else "Unknown"
225
+
226
+ # --- Build Forecast ---
227
+ forecast = (
228
+ f"Web Estimated Forecast for {city}:\n"
229
+ f"{natural_date}: {condition}, Temp: {temperature}° (approx), "
230
+ f"Humidity: {humidity}%, Wind: {wind}\n"
231
+ )
232
+
233
+ return forecast
234
+
235
+ except Exception as e:
236
+ print(f"[DEBUG] Error in fallback: {e}")
237
+ return f"Error performing web search: {str(e)}"
common/mcp/tools/yf_tools.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import yfinance as yf
4
+
5
+ from agents import function_tool
6
+ from datetime import datetime, timedelta
7
+
8
+
9
+
10
+ # Load environment variables
11
+
12
+
13
+
14
+ # ============================================================
15
+ # 🔹 YAHOO FINANCE TOOLSET
16
+ # ============================================================
17
+ def _get_summary(symbol: str, period: str = "1d", interval: str = "1h") -> str:
18
+ print(f"[DEBUG] get_summary called for symbol='{symbol}', period='{period}', interval='{interval}'")
19
+ try:
20
+ ticker = yf.Ticker(symbol)
21
+
22
+ # Calculate start and end dates based on period
23
+ end_date = datetime.today()
24
+ if period.endswith("d"):
25
+ days = int(period[:-1])
26
+ elif period.endswith("mo"):
27
+ days = int(period[:-2]) * 30
28
+ elif period.endswith("y"):
29
+ days = int(period[:-1]) * 365
30
+ else:
31
+ days = 30 # default 1 month
32
+ start_date = end_date - timedelta(days=days)
33
+
34
+ # Fetch recent data explicitly
35
+ data = ticker.history(
36
+ start=start_date.strftime("%Y-%m-%d"),
37
+ end=end_date.strftime("%Y-%m-%d"),
38
+ interval=interval
39
+ )
40
+
41
+ if data.empty:
42
+ return f"No data found for symbol '{symbol}'."
43
+
44
+ latest = data.iloc[-1]
45
+ current_price = round(latest["Close"], 2)
46
+ open_price = round(latest["Open"], 2)
47
+ change = round(current_price - open_price, 2)
48
+ pct_change = round((change / open_price) * 100, 2)
49
+
50
+ info = ticker.info
51
+ long_name = info.get("longName", symbol)
52
+ currency = info.get("currency", "USD")
53
+
54
+ formatted = [
55
+ f"📈 {long_name} ({symbol})",
56
+ f"Current Price: {current_price} {currency}",
57
+ f"Change: {change} ({pct_change}%)",
58
+ f"Open: {open_price} | High: {round(latest['High'], 2)} | Low: {round(latest['Low'], 2)}",
59
+ f"Volume: {int(latest['Volume'])}",
60
+ f"Period: {period} | Interval: {interval}",
61
+ ]
62
+ return "\n".join(formatted)
63
+
64
+ except Exception as e:
65
+ return f"Error fetching data for '{symbol}': {e}"
66
+
67
+ def _get_market_sentiment(symbol: str, period: str = "1mo") -> str:
68
+ print(f"[DEBUG] get_market_sentiment called for symbol='{symbol}', period='{period}'")
69
+ try:
70
+ ticker = yf.Ticker(symbol)
71
+
72
+ # Calculate start/end dynamically
73
+ end_date = datetime.today()
74
+ if period.endswith("d"):
75
+ days = int(period[:-1])
76
+ elif period.endswith("mo"):
77
+ days = int(period[:-2]) * 30
78
+ elif period.endswith("y"):
79
+ days = int(period[:-1]) * 365
80
+ else:
81
+ days = 30
82
+ start_date = end_date - timedelta(days=days)
83
+
84
+ data = ticker.history(
85
+ start=start_date.strftime("%Y-%m-%d"),
86
+ end=end_date.strftime("%Y-%m-%d")
87
+ )
88
+
89
+ if data.empty:
90
+ return f"No data for {symbol}."
91
+
92
+ recent_change = data["Close"].iloc[-1] - data["Close"].iloc[0]
93
+ pct_change = (recent_change / data["Close"].iloc[0]) * 100
94
+
95
+ sentiment = "Neutral"
96
+ if pct_change > 2:
97
+ sentiment = "Bullish"
98
+ elif pct_change < -2:
99
+ sentiment = "Bearish"
100
+
101
+ return f"{symbol} market sentiment ({period}): {sentiment} ({pct_change:.2f}% change)"
102
+
103
+ except Exception as e:
104
+ return f"Error fetching market sentiment for '{symbol}': {e}"
105
+
106
+ def _get_history(symbol: str, period: str = "1mo") -> str:
107
+ print(f"[DEBUG] get_history called for symbol='{symbol}', period='{period}'")
108
+ try:
109
+ ticker = yf.Ticker(symbol)
110
+
111
+ # Calculate start/end dynamically
112
+ end_date = datetime.today()
113
+ if period.endswith("d"):
114
+ days = int(period[:-1])
115
+ elif period.endswith("mo"):
116
+ days = int(period[:-2]) * 30
117
+ elif period.endswith("y"):
118
+ days = int(period[:-1]) * 365
119
+ else:
120
+ days = 30
121
+ start_date = end_date - timedelta(days=days)
122
+
123
+ data = ticker.history(
124
+ start=start_date.strftime("%Y-%m-%d"),
125
+ end=end_date.strftime("%Y-%m-%d")
126
+ )
127
+
128
+ if data.empty:
129
+ return f"No historical data found for '{symbol}'."
130
+ return f"Historical data for {symbol} ({period}):\n{data.tail(5).to_string()}"
131
+
132
+ except Exception as e:
133
+ return f"Error fetching historical data for '{symbol}': {e}"
134
+
135
+ def _get_analyst_recommendations(symbol: str) -> str:
136
+ print(f"[DEBUG] get_analyst_recommendations called for symbol='{symbol}'")
137
+ try:
138
+ ticker = yf.Ticker(symbol)
139
+ recs = ticker.recommendations
140
+ if recs is None or recs.empty:
141
+ return f"No analyst recommendations found for {symbol}."
142
+
143
+ # Format the last few recommendations
144
+ latest = recs.tail(5)
145
+ return f"Analyst Recommendations for {symbol}:\n{latest.to_string()}"
146
+ except Exception as e:
147
+ return f"Error fetching recommendations for '{symbol}': {e}"
148
+
149
+ def _get_earnings_calendar(symbol: str) -> str:
150
+ print(f"[DEBUG] get_earnings_calendar called for symbol='{symbol}'")
151
+ try:
152
+ ticker = yf.Ticker(symbol)
153
+ calendar = ticker.calendar
154
+ if calendar is None:
155
+ return f"No earnings calendar found for {symbol}."
156
+
157
+ # Handle dict (new yfinance) or DataFrame (old yfinance)
158
+ if isinstance(calendar, dict):
159
+ if not calendar:
160
+ return f"No earnings calendar found for {symbol}."
161
+ elif hasattr(calendar, 'empty') and calendar.empty:
162
+ return f"No earnings calendar found for {symbol}."
163
+
164
+ return f"Earnings Calendar for {symbol}:\n{calendar}"
165
+ except Exception as e:
166
+ return f"Error fetching earnings calendar for '{symbol}': {e}"
167
+
168
+ @function_tool
169
+ def get_summary(symbol: str, period: str = "1d", interval: str = "1h") -> str:
170
+ """
171
+ Fetch the latest summary information and intraday price data for a given ticker.
172
+ Ensures recent data is retrieved by calculating start/end dates dynamically.
173
+
174
+ Parameters:
175
+ -----------
176
+ symbol : str
177
+ The ticker symbol (e.g., "AAPL", "GOOG", "BTC-USD").
178
+ period : str, optional (default="1d")
179
+ Time range for price data. Examples: "1d", "5d", "1mo", "3mo".
180
+ interval : str, optional (default="1h")
181
+ Granularity of the data. Examples: "1m", "5m", "1h", "1d".
182
+
183
+ Returns:
184
+ --------
185
+ str
186
+ A formatted string containing:
187
+ - Company/ticker name
188
+ - Current price and change
189
+ - Open, High, Low prices
190
+ - Volume
191
+ - Period and interval used
192
+ """
193
+ return _get_summary(symbol, period, interval)
194
+
195
+ @function_tool
196
+ def get_market_sentiment(symbol: str, period: str = "1mo") -> str:
197
+ """
198
+ Analyze recent price changes and provide a simple market sentiment.
199
+ Uses dynamic start/end dates to ensure recent data.
200
+
201
+ This tool computes the percentage change over the specified period and
202
+ classifies the sentiment as:
203
+ - Bullish (if price increased >2%)
204
+ - Bearish (if price decreased >2%)
205
+ - Neutral (otherwise)
206
+
207
+ Parameters:
208
+ -----------
209
+ symbol : str
210
+ The ticker symbol (e.g., "AAPL", "GOOG", "BTC-USD").
211
+ period : str, optional (default="1mo")
212
+ Time range to analyze. Examples: "7d", "1mo", "3mo".
213
+
214
+ Returns:
215
+ --------
216
+ str
217
+ A human-readable sentiment string including percentage change.
218
+ """
219
+ return _get_market_sentiment(symbol, period)
220
+
221
+ @function_tool
222
+ def get_history(symbol: str, period: str = "1mo") -> str:
223
+ """
224
+ Fetch historical price data for a given ticker.
225
+ Ensures recent data is retrieved dynamically using start/end dates.
226
+
227
+ Parameters:
228
+ -----------
229
+ symbol : str
230
+ The ticker symbol (e.g., "AAPL", "GOOG", "BTC-USD").
231
+ period : str, optional (default="1mo")
232
+ The length of historical data to retrieve. Examples: "1d", "5d", "1mo", "3mo", "1y", "5y".
233
+
234
+ Returns:
235
+ --------
236
+ str
237
+ A formatted string showing the last 5 rows of historical prices (Open, High, Low, Close, Volume).
238
+ """
239
+ return _get_history(symbol, period)
240
+
241
+ @function_tool
242
+ def get_analyst_recommendations(symbol: str) -> str:
243
+ """
244
+ Fetch analyst recommendations for a given ticker.
245
+
246
+ Parameters:
247
+ -----------
248
+ symbol : str
249
+ The ticker symbol.
250
+
251
+ Returns:
252
+ --------
253
+ str
254
+ Formatted string string of analyst recommendations.
255
+ """
256
+ return _get_analyst_recommendations(symbol)
257
+
258
+ @function_tool
259
+ def get_earnings_calendar(symbol: str) -> str:
260
+ """
261
+ Fetch the next earnings date for a ticker.
262
+
263
+ Parameters:
264
+ -----------
265
+ symbol : str
266
+ The ticker symbol.
267
+
268
+ Returns:
269
+ --------
270
+ str
271
+ Next earnings date info.
272
+ """
273
+ return _get_earnings_calendar(symbol)
274
+
common/rag/rag.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+
8
+ DB_NAME = 'healthcare_db'
9
+ DIRECTORY_NAME = "healthcare"
10
+
11
+ class Retriever:
12
+ def __init__(self,
13
+ file_path:str = os.path.join(os.getcwd(), "data"),
14
+ db_path:str = os.path.join(os.getcwd(), "db") ):
15
+ self.directory_path = os.path.join(file_path, DIRECTORY_NAME)
16
+ self.db_path = os.path.join(db_path, DB_NAME)
17
+ self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
18
+ self.text_splitter = RecursiveCharacterTextSplitter(
19
+ chunk_size=1024,
20
+ chunk_overlap=300,
21
+ length_function=len,
22
+ # separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
23
+ is_separator_regex=False,
24
+ )
25
+ self.retriever = None
26
+
27
+ def load_knowledge_base(self):
28
+ if os.path.exists(self.db_path):
29
+ self.retriever = FAISS.load_local(
30
+ self.db_path,
31
+ self.embeddings,
32
+ allow_dangerous_deserialization=True
33
+ ).as_retriever()
34
+ else:
35
+ self.retriever = self._create_knowledge_base()
36
+
37
+ def _create_knowledge_base(self):
38
+ documents = self._load_documents()
39
+ chunks = self._split_documents(documents)
40
+ # embeddings = self._embed_documents(texts)
41
+ vectorstore = FAISS.from_documents(chunks, self.embeddings)
42
+ vectorstore.save_local(self.db_path)
43
+ return vectorstore.as_retriever()
44
+
45
+ def _load_documents(self):
46
+ documents = []
47
+ loader = DirectoryLoader(
48
+ self.directory_path,
49
+ glob="**/*.pdf",
50
+ loader_cls=PyPDFLoader,
51
+ show_progress=True
52
+ )
53
+ documents = loader.load()
54
+ return documents
55
+
56
+ def _split_documents(self, documents):
57
+ chunks = []
58
+ for doc in documents:
59
+ chunks.extend(self.text_splitter.split_documents([doc]))
60
+ return chunks
61
+
62
+ # def _embed_documents(self, texts):
63
+ # return [self.embeddings.embed_query(text.page_content) for text in texts]
64
+
65
+ def retrieve(self, query, k=4):
66
+ """Retrieve documents without scores (backward compatible)"""
67
+ if not self.retriever:
68
+ self.load_knowledge_base()
69
+ return self.retriever.invoke(query)
70
+
71
+ def retrieve_with_scores(self, query, k=4):
72
+ """Retrieve documents with similarity scores"""
73
+ if not self.retriever:
74
+ self.load_knowledge_base()
75
+
76
+ # Get the underlying vectorstore from the retriever
77
+ vectorstore = self.retriever.vectorstore
78
+
79
+ # Use similarity_search_with_score to get scores
80
+ # Note: FAISS returns L2 distance, lower is better
81
+ results = vectorstore.similarity_search_with_score(query, k=k)
82
+
83
+ return results
84
+
85
+
86
+ def update_knowledge_base(self):
87
+ self._create_knowledge_base()
88
+
89
+ def delete_knowledge_base(self):
90
+ if os.path.exists(self.db_path):
91
+ shutil.rmtree(self.db_path)
92
+
93
+ # No cleanup needed for VectorStoreRetriever
94
+
common/utility/__init__.py ADDED
File without changes
common/utility/autogen_model_factory.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from azure.identity import DefaultAzureCredential, get_bearer_token_provider
3
+
4
+ class AutoGenModelFactory:
5
+ """
6
+ Factory for creating AutoGen compatible model instances.
7
+ """
8
+
9
+ @staticmethod
10
+ def get_model(provider: str = "azure", # azure, openai, google, groq, ollama
11
+ model_name: str = "gpt-4o",
12
+ temperature: float = 0,
13
+ model_info: dict = None
14
+ ):
15
+ """
16
+ Returns an AutoGen OpenAIChatCompletionClient instance.
17
+ """
18
+
19
+ # Lazy import to avoid dependency issues if autogen is not installed
20
+ try:
21
+ from autogen_ext.models.openai import OpenAIChatCompletionClient
22
+ except ImportError as e:
23
+ raise ImportError("AutoGen libraries (autogen-agentchat, autogen-ext[openai]) are not installed.") from e
24
+
25
+ # ----------------------------------------------------------------------
26
+ # AZURE
27
+ # ----------------------------------------------------------------------
28
+ if provider.lower() == "azure":
29
+ token_provider = get_bearer_token_provider(
30
+ DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
31
+ )
32
+ return OpenAIChatCompletionClient(
33
+ model=model_name,
34
+ azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
35
+ api_version=os.environ["AZURE_OPENAI_API_VERSION"],
36
+ azure_ad_token_provider=token_provider,
37
+ temperature=temperature,
38
+ )
39
+
40
+ # ----------------------------------------------------------------------
41
+ # OPENAI
42
+ # ----------------------------------------------------------------------
43
+ elif provider.lower() == "openai":
44
+ return OpenAIChatCompletionClient(
45
+ model=model_name,
46
+ api_key=os.environ["OPENAI_API_KEY"],
47
+ temperature=temperature,
48
+ )
49
+
50
+ # ----------------------------------------------------------------------
51
+ # GOOGLE (GEMINI) via OpenAI Compat
52
+ # ----------------------------------------------------------------------
53
+ elif provider.lower() == "google" or provider.lower() == "gemini":
54
+ return OpenAIChatCompletionClient(
55
+ model=model_name,
56
+ base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
57
+ api_key=os.environ["GOOGLE_API_KEY"],
58
+ model_info=model_info, # Pass full model_info for capabilities
59
+ temperature=temperature,
60
+ )
61
+
62
+ # ----------------------------------------------------------------------
63
+ # GROQ
64
+ # ----------------------------------------------------------------------
65
+ elif provider.lower() == "groq":
66
+ return OpenAIChatCompletionClient(
67
+ model=model_name,
68
+ base_url="https://api.groq.com/openai/v1",
69
+ api_key=os.environ["GROQ_API_KEY"],
70
+ temperature=temperature,
71
+ )
72
+
73
+ # ----------------------------------------------------------------------
74
+ # OLLAMA
75
+ # ----------------------------------------------------------------------
76
+ elif provider.lower() == "ollama":
77
+ # Ensure model_info defaults to empty dict if None
78
+ info = model_info if model_info is not None else {}
79
+ return OpenAIChatCompletionClient(
80
+ model=model_name,
81
+ base_url="http://localhost:11434/v1",
82
+ api_key="ollama", # dummy key
83
+ model_info=info,
84
+ temperature=temperature,
85
+ )
86
+
87
+ else:
88
+ raise ValueError(f"Unsupported AutoGen provider: {provider}")
common/utility/bkp/embedding_factory.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Union
3
+ # from azure.identity import DefaultAzureCredential
4
+ from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
5
+ from langchain_ollama import OllamaEmbeddings
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+
8
+
9
+ class EmbeddingFactory:
10
+ """
11
+ A static utility class to create and return LLM Embedding instances based on the input type.
12
+ """
13
+
14
+ @staticmethod
15
+ def get_llm(llm_type: str) -> Union[AzureOpenAIEmbeddings, OpenAIEmbeddings]:
16
+ """
17
+ Returns an LLM instance based on the specified type.
18
+
19
+ Parameters:
20
+ llm_type (str): The type of LLM to return. Valid values are 'azure' or 'openai'.
21
+
22
+ Returns:
23
+ Union[AzureOpenAIEmbeddings, OpenAIEmbeddings]: The LLM instance.
24
+ """
25
+ if llm_type.lower() == "azure":
26
+ # Get the Azure Credential
27
+ # credential = DefaultAzureCredential()
28
+ # token=credential.get_token("https://cognitiveservices.azure.com/.default").token
29
+
30
+ # if not token:
31
+ # raise ValueError("Token is required for AzureOpenAIEmbeddings.")
32
+ # return AzureOpenAIEmbeddings(
33
+ # azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
34
+ # azure_deployment="text-embedding-3-small", #os.environ["AZURE_OPENAI_API_BASE_MODEL"],
35
+ # api_version=os.environ["AZURE_OPENAI_API_VERSION"],
36
+ # api_key=token
37
+ # )
38
+ pass
39
+ elif llm_type.lower() == "openai":
40
+ return OpenAIEmbeddings(
41
+ api_key=os.environ["OPENAI_API_KEY"],
42
+ model="text-embedding-3-large"
43
+ )
44
+ elif llm_type.lower() == "ollama": # must have ollama running locally with the following model
45
+ return OllamaEmbeddings(model="gemma:2b")
46
+ elif llm_type.lower() == "hf": # must have key update in env:HF_TOKEN
47
+ return HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
48
+ else:
49
+ raise ValueError("Invalid llm_type. Use 'azure' or 'openai'.")
common/utility/bkp/llm_factory.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tiktoken
3
+ from typing import Any
4
+ from langchain_openai.chat_models import ChatOpenAI, AzureChatOpenAI
5
+ from langchain_openai.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings
6
+ # from azure.identity import DefaultAzureCredential
7
+ from huggingface_hub import login
8
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEmbeddings
9
+ from langchain_ollama import ChatOllama, OllamaEmbeddings
10
+ from langchain_groq import ChatGroq
11
+ # from langchain_openai import OpenAIEmbeddings
12
+
13
+ class LLMFactory:
14
+ """
15
+ Factory class to provide LLM and embedding model instances for different providers.
16
+ """
17
+
18
+ @staticmethod
19
+ def get_llm(provider: str, **kwargs) -> Any:
20
+ """
21
+ Returns a chat/completion LLM instance based on the provider.
22
+ Supported providers: openai, azureopenai, huggingface, ollama, groq
23
+ """
24
+ if provider == "openai":
25
+ # OpenAI Chat Model
26
+ return ChatOpenAI(
27
+ openai_api_key=kwargs.get("api_key", os.environ.get("OPENAI_API_KEY")),
28
+ model_name=kwargs.get("model_name", "gpt-4")
29
+ )
30
+
31
+ # elif provider == "azureopenai":
32
+ # # Azure OpenAI Chat Model using Azure Identity for token
33
+ # credential = DefaultAzureCredential()
34
+ # token = credential.get_token("https://cognitiveservices.azure.com/.default").token
35
+ # if not token:
36
+ # raise ValueError("Token is required for AzureChatOpenAI.")
37
+ # return AzureChatOpenAI(
38
+ # azure_endpoint=kwargs["endpoint"],
39
+ # azure_deployment=kwargs.get("deployment_name", "gpt-4"),
40
+ # api_version=kwargs["api_version"],
41
+ # api_key=token
42
+ # )
43
+
44
+ # pip install langchain langchain-huggingface huggingface_hub
45
+ elif provider == "huggingface":
46
+ # If using a private model or endpoint, authenticate
47
+ login(token=kwargs.get("api_key", os.environ.get("HF_TOKEN")))
48
+
49
+ return ChatHuggingFace(
50
+ repo_id=kwargs.get("model_name", "mistralai/Mistral-Nemo-Instruct-2407"), # Or any other chat-friendly model
51
+ task="text-generation",
52
+ model_kwargs={
53
+ "temperature": 0.7,
54
+ "max_new_tokens": 256
55
+ }
56
+ )
57
+
58
+ elif provider == "ollama":
59
+ # Ollama local model
60
+ return ChatOllama(
61
+ model=kwargs.get("model_name", "gemma:2b"),
62
+ temperature=0
63
+ )
64
+
65
+ elif provider == "groq":
66
+ # Groq LLM
67
+ return ChatGroq(
68
+ model=kwargs.get("model_name", "Gemma2-9b-It"),
69
+ max_tokens=512,
70
+ api_key=kwargs.get("api_key", os.environ.get("GROQ_API_KEY"))
71
+ )
72
+
73
+ else:
74
+ raise ValueError(f"Unsupported provider: {provider}")
75
+
76
+ @staticmethod
77
+ def get_embedding_model(provider: str, **kwargs) -> Any:
78
+ """
79
+ Returns an embedding model instance based on the provider.
80
+ Supported providers: openai, huggingface
81
+ """
82
+ if provider == "openai":
83
+ return OpenAIEmbeddings(
84
+ model=kwargs.get("model_name", "text-embedding-3-large"),
85
+ openai_api_key=kwargs.get("api_key", os.environ.get("OPENAI_API_KEY"))
86
+ )
87
+ # if provider == "azureopenai":
88
+ # # Get the Azure Credential
89
+ # credential = DefaultAzureCredential()
90
+ # token=credential.get_token("https://cognitiveservices.azure.com/.default").token
91
+
92
+ # if not token:
93
+ # raise ValueError("Token is required for AzureOpenAIEmbeddings.")
94
+ # return AzureOpenAIEmbeddings(
95
+ # azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
96
+ # azure_deployment=kwargs.get("azure_deployment", "text-embedding-3-large"),
97
+ # api_version=os.environ["AZURE_OPENAI_API_VERSION"],
98
+ # api_key=token
99
+ # )
100
+ elif provider == "huggingface":
101
+ # If using a private model or endpoint, authenticate
102
+ login(token=kwargs.get("api_key", os.environ.get("HF_TOKEN")))
103
+
104
+ return HuggingFaceEmbeddings(
105
+ model_name=kwargs.get("model_name", "all-MiniLM-L6-v2")
106
+ )
107
+ elif provider == "groq":
108
+ raise ValueError(f"No embedding support from the provider: {provider}")
109
+ elif provider == "ollama":
110
+ return OllamaEmbeddings(model=kwargs.get("model_name", "gemma:2b"))
111
+ else:
112
+ raise ValueError(f"Unsupported embedding provider: {provider}")
113
+
114
+ @staticmethod
115
+ def num_tokens_from_messages(messages) -> int:
116
+ """
117
+ Return the number of tokens used by a list of messages.
118
+ Adapted from the OpenAI cookbook token counter.
119
+ """
120
+ encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
121
+ tokens_per_message = 3 # <|start|>, role, <|end|>
122
+ num_tokens = 0
123
+
124
+ for message in messages:
125
+ num_tokens += tokens_per_message
126
+ for key, value in message.items():
127
+ num_tokens += len(encoding.encode(value))
128
+
129
+ num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
130
+ return num_tokens
common/utility/bkp/llm_factory2.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tiktoken
3
+ from typing import Union
4
+ # from azure.identity import DefaultAzureCredential
5
+ from langchain_openai.chat_models import AzureChatOpenAI, ChatOpenAI
6
+
7
+
8
+ class LLMFactory:
9
+ """
10
+ A static utility class to create and return LLM instances based on the input type.
11
+ """
12
+
13
+ @staticmethod
14
+ def get_llm(llm_type: str) -> Union[AzureChatOpenAI, ChatOpenAI]:
15
+ """
16
+ Returns an LLM instance based on the specified type.
17
+
18
+ Parameters:
19
+ llm_type (str): The type of LLM to return. Valid values are 'azure' or 'openai'.
20
+
21
+ Returns:
22
+ Union[AzureChatOpenAI, ChatOpenAI]: The LLM instance.
23
+ """
24
+ if llm_type.lower() == "azure":
25
+ # # Get the Azure Credential
26
+ # credential = DefaultAzureCredential()
27
+ # token=credential.get_token("https://cognitiveservices.azure.com/.default").token
28
+
29
+ # if not token:
30
+ # raise ValueError("Token is required for AzureChatOpenAI.")
31
+ # return AzureChatOpenAI(
32
+ # azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
33
+ # azure_deployment=os.environ["AZURE_OPENAI_API_BASE_MODEL"],
34
+ # api_version=os.environ["AZURE_OPENAI_API_VERSION"],
35
+ # api_key=token
36
+ # )
37
+ pass
38
+ elif llm_type.lower() == "openai":
39
+ return ChatOpenAI(
40
+ api_key=os.environ["OPENAI_API_KEY"],
41
+ model_name="gpt-4"
42
+ )
43
+ elif llm_type.lower() == "openai_chat":
44
+ return ChatOpenAI(
45
+ api_key=os.environ["OPENAI_API_KEY"],
46
+ model_name="gpt-4"
47
+ )
48
+ else:
49
+ raise ValueError("Invalid llm_type. Use 'azure' or 'openai'.")
50
+
51
+ @staticmethod
52
+ def num_tokens_from_messages(messages):
53
+
54
+ """
55
+ Return the number of tokens used by a list of messages.
56
+ Adapted from the Open AI cookbook token counter
57
+ """
58
+
59
+ encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
60
+
61
+ # Each message is sandwiched with <|start|>role and <|end|>
62
+ # Hence, messages look like: <|start|>system or user or assistant{message}<|end|>
63
+
64
+ tokens_per_message = 3 # token1:<|start|>, token2:system(or user or assistant), token3:<|end|>
65
+
66
+ num_tokens = 0
67
+
68
+ for message in messages:
69
+ num_tokens += tokens_per_message
70
+ for key, value in message.items():
71
+ num_tokens += len(encoding.encode(value))
72
+
73
+ num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
74
+
75
+ return num_tokens
common/utility/langchain_model_factory.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from azure.identity import DefaultAzureCredential, get_bearer_token_provider
3
+ from langchain_openai import AzureChatOpenAI, ChatOpenAI
4
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
5
+ from langchain_ollama import ChatOllama
6
+ from huggingface_hub import login
7
+
8
+ class LangChainModelFactory:
9
+ """
10
+ Factory for creating LangChain compatible model instances.
11
+ """
12
+
13
+ @staticmethod
14
+ def get_model(provider: str = "openai", # openai, azure, huggingface, ollama
15
+ model_name: str = "gpt-4o",
16
+ temperature: float = 0
17
+ ):
18
+ """
19
+ Returns a LangChain LLM instance.
20
+ """
21
+
22
+ # ----------------------------------------------------------------------
23
+ # AZURE
24
+ # ----------------------------------------------------------------------
25
+ if provider.lower() == "azure":
26
+ token_provider = get_bearer_token_provider(
27
+ DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
28
+ )
29
+ return AzureChatOpenAI(
30
+ azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
31
+ azure_deployment=os.environ["AZURE_OPENAI_API_BASE_MODEL"], # Or specific model_name if deployment matches
32
+ api_version=os.environ["AZURE_OPENAI_API_VERSION"],
33
+ azure_ad_token_provider=token_provider,
34
+ model_name=model_name,
35
+ temperature=temperature,
36
+ )
37
+
38
+ # ----------------------------------------------------------------------
39
+ # OPENAI
40
+ # ----------------------------------------------------------------------
41
+ elif provider.lower() == "openai":
42
+ return ChatOpenAI(
43
+ api_key=os.environ["OPENAI_API_KEY"],
44
+ model_name=model_name,
45
+ temperature=temperature,
46
+ )
47
+
48
+ # ----------------------------------------------------------------------
49
+ # HUGGING FACE
50
+ # ----------------------------------------------------------------------
51
+ elif provider.lower() == "huggingface":
52
+ if os.environ.get("HF_TOKEN"):
53
+ login(token=os.environ.get("HF_TOKEN"))
54
+ llm = HuggingFaceEndpoint(
55
+ repo_id=model_name,
56
+ task="text-generation",
57
+ temperature=temperature,
58
+ max_new_tokens=512,
59
+ huggingfacehub_api_token=os.environ.get("HF_TOKEN")
60
+ )
61
+ return ChatHuggingFace(llm=llm)
62
+
63
+ # ----------------------------------------------------------------------
64
+ # OLLAMA
65
+ # ----------------------------------------------------------------------
66
+ elif provider.lower() == "ollama":
67
+ return ChatOllama(model=model_name, temperature=temperature)
68
+
69
+ else:
70
+ raise ValueError(f"Unsupported LangChain provider: {provider}")
common/utility/logger.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ import datetime
3
+
4
+ def log_call(func):
5
+ """
6
+ A decorator that logs when a function is called and when it finishes.
7
+ """
8
+ @functools.wraps(func)
9
+ def wrapper(*args, **kwargs):
10
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
11
+ arg_list = ", ".join(
12
+ [repr(a) for a in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
13
+ )
14
+ print(f"[{timestamp}] 🚀 Calling: {func.__name__}({arg_list})")
15
+ try:
16
+ result = func(*args, **kwargs)
17
+ # print(f"[{timestamp}] ✅ Finished: {func.__name__}")
18
+ return result
19
+ except Exception as e:
20
+ print(f"[{timestamp}] ❌ Error in {func.__name__}: {e}")
21
+ raise
22
+ return wrapper
common/utility/model_factory_notused.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tiktoken
3
+ from typing import Union
4
+ from azure.identity import DefaultAzureCredential, get_bearer_token_provider
5
+ from langchain_openai import AzureChatOpenAI, ChatOpenAI, AzureOpenAIEmbeddings, OpenAIEmbeddings
6
+ from agents import OpenAIChatCompletionsModel
7
+ from openai import AsyncOpenAI, AsyncAzureOpenAI
8
+ from huggingface_hub import login
9
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
10
+ from langchain_ollama import ChatOllama, OllamaEmbeddings
11
+
12
+
13
+ class ModelFactory:
14
+ """
15
+ A static utility class to create and return LLM instances based on the input type.
16
+ """
17
+
18
+ @staticmethod
19
+ def get_model(framework: str = "openai-sdk-agent", # openai-sdk-agent, langchain, autogen
20
+ provider: str = "openai", # openai, azure, google, groq, huggingface, ollama
21
+ model_name: str = "gpt-4o-mini", # gpt-4o-mini, gemini-flash-1.5, groq/compound
22
+ model_info: dict = None, # additional info (e.g. backend provider for autogen/langchain)
23
+ temperature: float = 0
24
+ ) -> Union[AzureChatOpenAI, ChatOpenAI, OpenAIChatCompletionsModel, ChatHuggingFace, ChatOllama]:
25
+ """
26
+ Returns an LLM instance based on the specified parameters.
27
+
28
+ Parameters:
29
+ framework (str): The framework to use ('langchain', 'openai-sdk-agent', 'autogen').
30
+ provider (str): The model provider ('openai', 'azure', 'google', 'groq', 'huggingface', 'ollama').
31
+ model_name (str): The specific model name.
32
+ model_info (dict): Additional model info.
33
+ temperature (float): The temperature for generation (default 0).
34
+
35
+ Returns:
36
+ Union[...]: The model instance.
37
+ """
38
+
39
+ # ----------------------------------------------------------------------
40
+ # AUTOGEN SUPPORT
41
+ # ----------------------------------------------------------------------
42
+ if framework.lower() == "autogen":
43
+ # Lazy import to avoid dependency issues if autogen is not installed
44
+ try:
45
+ from autogen_ext.models.openai import OpenAIChatCompletionClient
46
+ except ImportError as e:
47
+ raise ImportError("AutoGen libraries (autogen-agentchat, autogen-ext[openai]) are not installed.") from e
48
+
49
+ # Azure Backend
50
+ if provider.lower() == "azure":
51
+ token_provider = get_bearer_token_provider(
52
+ DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
53
+ )
54
+ return OpenAIChatCompletionClient(
55
+ model=model_name,
56
+ azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
57
+ api_version=os.environ["AZURE_OPENAI_API_VERSION"],
58
+ azure_ad_token_provider=token_provider,
59
+ temperature=temperature,
60
+ )
61
+
62
+ # OpenAI Backend
63
+ elif provider.lower() == "openai":
64
+ return OpenAIChatCompletionClient(
65
+ model=model_name,
66
+ api_key=os.environ["OPENAI_API_KEY"],
67
+ temperature=temperature,
68
+ )
69
+
70
+ # Google Backend (Gemini via OpenAI compat)
71
+ elif provider.lower() == "google" or provider.lower() == "gemini":
72
+ return OpenAIChatCompletionClient(
73
+ model=model_name,
74
+ base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
75
+ api_key=os.environ["GOOGLE_API_KEY"],
76
+ model_info=model_info, # Pass full model_info for capabilities
77
+ temperature=temperature,
78
+ )
79
+
80
+ # Groq Backend
81
+ elif provider.lower() == "groq":
82
+ return OpenAIChatCompletionClient(
83
+ model=model_name,
84
+ base_url="https://api.groq.com/openai/v1",
85
+ api_key=os.environ["GROQ_API_KEY"],
86
+ temperature=temperature,
87
+ )
88
+
89
+ # Ollama Backend
90
+ elif provider.lower() == "ollama":
91
+ # Ensure model_info defaults to empty dict if None
92
+ info = model_info if model_info is not None else {}
93
+ return OpenAIChatCompletionClient(
94
+ model=model_name,
95
+ base_url="http://localhost:11434/v1",
96
+ api_key="ollama", # dummy key
97
+ model_info=info,
98
+ temperature=temperature,
99
+ )
100
+
101
+ else:
102
+ raise ValueError(f"Unsupported AutoGen provider: {provider}")
103
+
104
+ # ----------------------------------------------------------------------
105
+ # LANGCHAIN SUPPORT
106
+ # ----------------------------------------------------------------------
107
+ elif framework.lower() == "langchain":
108
+
109
+ if provider.lower() == "azure":
110
+ token_provider = get_bearer_token_provider(
111
+ DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
112
+ )
113
+ return AzureChatOpenAI(
114
+ azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
115
+ azure_deployment=os.environ["AZURE_OPENAI_API_BASE_MODEL"],
116
+ api_version=os.environ["AZURE_OPENAI_API_VERSION"],
117
+ azure_ad_token_provider=token_provider,
118
+ model_name=model_name,
119
+ temperature=temperature,
120
+ )
121
+
122
+ elif provider.lower() == "openai":
123
+ return ChatOpenAI(
124
+ api_key=os.environ["OPENAI_API_KEY"],
125
+ model_name=model_name,
126
+ temperature=temperature,
127
+ )
128
+
129
+ elif provider.lower() == "huggingface":
130
+ if os.environ.get("HF_TOKEN"):
131
+ login(token=os.environ.get("HF_TOKEN"))
132
+ llm = HuggingFaceEndpoint(
133
+ repo_id=model_name,
134
+ task="text-generation",
135
+ temperature=temperature,
136
+ max_new_tokens=512,
137
+ huggingfacehub_api_token=os.environ.get("HF_TOKEN")
138
+ )
139
+ return ChatHuggingFace(llm=llm)
140
+
141
+ elif provider.lower() == "ollama":
142
+ return ChatOllama(model=model_name, temperature=temperature)
143
+
144
+ else:
145
+ raise ValueError(f"Unsupported LangChain provider: {provider}")
146
+
147
+ # ----------------------------------------------------------------------
148
+ # STANDARD LOGIC (Agents Lib / OpenAI SDK)
149
+ # ----------------------------------------------------------------------
150
+ elif framework.lower() == "openai-sdk-agent" or framework.lower() == "openai-sdk" or framework.lower() == "openai":
151
+
152
+ if provider.lower() == "azure":
153
+ token_provider = get_bearer_token_provider(
154
+ DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
155
+ )
156
+ client = AsyncAzureOpenAI(
157
+ azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
158
+ api_version=os.environ["AZURE_OPENAI_API_VERSION"],
159
+ azure_ad_token_provider=token_provider,
160
+ )
161
+ return OpenAIChatCompletionsModel(model=model_name, openai_client=client)
162
+
163
+ elif provider.lower() == "openai":
164
+ client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
165
+ return OpenAIChatCompletionsModel(model=model_name, openai_client=client)
166
+
167
+ elif provider.lower() == "google":
168
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
169
+ client = AsyncOpenAI(
170
+ base_url=GEMINI_BASE_URL,
171
+ api_key=os.environ["GOOGLE_API_KEY"]
172
+ )
173
+ return OpenAIChatCompletionsModel(model=model_name, openai_client=client)
174
+
175
+ elif provider.lower() == "groq":
176
+ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
177
+ client = AsyncOpenAI(
178
+ base_url=GROQ_BASE_URL,
179
+ api_key=os.environ["GROQ_API_KEY"]
180
+ )
181
+ return OpenAIChatCompletionsModel(model=model_name, openai_client=client)
182
+
183
+ elif provider.lower() == "ollama":
184
+ client = AsyncOpenAI(
185
+ base_url="http://localhost:11434/v1",
186
+ api_key="ollama"
187
+ )
188
+ return OpenAIChatCompletionsModel(model=model_name, openai_client=client)
189
+
190
+ elif provider.lower() == "huggingface":
191
+ # Agents lib doesn't have native HF support in the same way
192
+ raise ValueError("For Hugging Face, please use framework='langchain'")
193
+
194
+ else:
195
+ raise ValueError(f"Unsupported provider for openai-sdk-agent: {provider}")
196
+
197
+ else:
198
+ raise ValueError(f"Unsupported framework: {framework}")
199
+
200
+
201
+ @staticmethod
202
+ def num_tokens_from_messages(messages, model: str = "gpt-4o"):
203
+ """
204
+ Return the number of tokens used by a list of messages.
205
+ """
206
+ try:
207
+ encoding = tiktoken.encoding_for_model(model)
208
+ except KeyError:
209
+ encoding = tiktoken.get_encoding("cl100k_base")
210
+
211
+ tokens_per_message = 3
212
+ num_tokens = 0
213
+
214
+ for message in messages:
215
+ num_tokens += tokens_per_message
216
+ for key, value in message.items():
217
+ if key == "name":
218
+ num_tokens += 1
219
+
220
+ # Encode values if they are strings
221
+ if isinstance(value, str):
222
+ num_tokens += len(encoding.encode(value))
223
+ elif isinstance(value, list) and key == "content":
224
+ for part in value:
225
+ if isinstance(part, dict) and part.get("type") == "text":
226
+ num_tokens += len(encoding.encode(part.get("text", "")))
227
+ elif isinstance(part, dict) and part.get("type") == "image_url":
228
+ num_tokens += 85
229
+
230
+ num_tokens += 3
231
+ return num_tokens
232
+
233
+
234
+ class EmbeddingFactory:
235
+ """
236
+ A static utility class to create and return Embedding Model instances.
237
+ """
238
+
239
+ @staticmethod
240
+ def get_embedding_model(provider: str = "openai",
241
+ model_name: str = "text-embedding-3-small"
242
+ ) -> Union[AzureOpenAIEmbeddings, OpenAIEmbeddings, OllamaEmbeddings, HuggingFaceEmbeddings]:
243
+
244
+ if provider.lower() == "azure":
245
+ token_provider = get_bearer_token_provider(
246
+ DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
247
+ )
248
+ return AzureOpenAIEmbeddings(
249
+ azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
250
+ azure_deployment=os.environ.get("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", model_name),
251
+ api_version=os.environ["AZURE_OPENAI_API_VERSION"],
252
+ azure_ad_token_provider=token_provider,
253
+ )
254
+ elif provider.lower() == "openai":
255
+ return OpenAIEmbeddings(
256
+ api_key=os.environ["OPENAI_API_KEY"],
257
+ model=model_name
258
+ )
259
+ elif provider.lower() == "ollama":
260
+ return OllamaEmbeddings(model=model_name)
261
+ elif provider.lower() == "huggingface":
262
+ if os.environ.get("HF_TOKEN"):
263
+ login(token=os.environ.get("HF_TOKEN"))
264
+ return HuggingFaceEmbeddings(model_name=model_name)
265
+ else:
266
+ raise ValueError(f"Unsupported embedding provider: {provider}")
267
+
268
+ # =================================================================================================
269
+ # GLOBAL HELPER FUNCTIONS (for agents)
270
+ # =================================================================================================
271
+
272
+ # model used for orchestrator or executor
273
+ # def get_model(provider:str = "google", framework:str = "openai-sdk", model_name:str = "gemini-2.5-flash"):
274
+ def get_model(provider:str = "openai", framework:str = "openai", model_name:str = "gpt-4-turbo"):
275
+ # def get_model(provider:str = "groq", framework:str = "openai-sdk", model_name:str = "openai/gpt-oss-120b"):
276
+ model_info = None
277
+ if provider in list["gemini", "google"]:
278
+ model_info = {
279
+ "family": "gemini",
280
+ "vision": True,
281
+ "function_calling": True,
282
+ "json_output": True,
283
+ "structured_output": True,
284
+ }
285
+
286
+ return ModelFactory.get_model( framework=framework,
287
+ provider=provider,
288
+ model_name=model_name,
289
+ model_info=model_info,
290
+ temperature=0)
291
+ # else:
292
+ # return ModelFactory.get_model( framework="openai-sdk",
293
+ # provider="openai",
294
+ # model_name="gpt-4o-mini",
295
+ # temperature=0)
296
+
297
+ # Use this model where agent executing tool and returning JSON
298
+ def get_model_json(model_name: str = "gpt-4.1-mini", provider: str = "openai"):
299
+ return ModelFactory.get_model( framework="openai-sdk",
300
+ provider=provider,
301
+ model_name=model_name,
302
+ temperature=0)
common/utility/openai_model_factory.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tiktoken
3
+ from typing import Union
4
+ from azure.identity import DefaultAzureCredential, get_bearer_token_provider
5
+ from agents import OpenAIChatCompletionsModel
6
+ from openai import AsyncOpenAI, AsyncAzureOpenAI
7
+ from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain_ollama import OllamaEmbeddings
10
+ from huggingface_hub import login
11
+
12
+ class OpenAIModelFactory:
13
+ """
14
+ Factory for creating OpenAI-SDK compatible model instances (using the 'agents' library).
15
+ Supports multiple providers via the OpenAI-compatible API format.
16
+ """
17
+
18
+ @staticmethod
19
+ def get_model(provider: str = "openai", # openai, azure, google, groq, ollama
20
+ model_name: str = "gpt-4o",
21
+ temperature: float = 0
22
+ ) -> OpenAIChatCompletionsModel:
23
+ """
24
+ Returns an OpenAIChatCompletionsModel instance.
25
+ """
26
+
27
+ # ----------------------------------------------------------------------
28
+ # AZURE OPENAI
29
+ # ----------------------------------------------------------------------
30
+ if provider.lower() == "azure":
31
+ token_provider = get_bearer_token_provider(
32
+ DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
33
+ )
34
+ client = AsyncAzureOpenAI(
35
+ azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
36
+ api_version=os.environ["AZURE_OPENAI_API_VERSION"],
37
+ azure_ad_token_provider=token_provider,
38
+ )
39
+ return OpenAIChatCompletionsModel(model=model_name, openai_client=client)
40
+
41
+ # ----------------------------------------------------------------------
42
+ # STANDARD OPENAI
43
+ # ----------------------------------------------------------------------
44
+ elif provider.lower() == "openai":
45
+ client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
46
+ return OpenAIChatCompletionsModel(model=model_name, openai_client=client)
47
+
48
+ # ----------------------------------------------------------------------
49
+ # GOOGLE (GEMINI) via OpenAI Compat
50
+ # ----------------------------------------------------------------------
51
+ elif provider.lower() == "google" or provider.lower() == "gemini":
52
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
53
+ client = AsyncOpenAI(
54
+ base_url=GEMINI_BASE_URL,
55
+ api_key=os.environ["GOOGLE_API_KEY"]
56
+ )
57
+ return OpenAIChatCompletionsModel(model=model_name, openai_client=client)
58
+
59
+ # ----------------------------------------------------------------------
60
+ # GROQ via OpenAI Compat
61
+ # ----------------------------------------------------------------------
62
+ elif provider.lower() == "groq":
63
+ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
64
+ client = AsyncOpenAI(
65
+ base_url=GROQ_BASE_URL,
66
+ api_key=os.environ["GROQ_API_KEY"]
67
+ )
68
+ return OpenAIChatCompletionsModel(model=model_name, openai_client=client)
69
+
70
+ # ----------------------------------------------------------------------
71
+ # OLLAMA via OpenAI Compat
72
+ # ----------------------------------------------------------------------
73
+ elif provider.lower() == "ollama":
74
+ client = AsyncOpenAI(
75
+ base_url="http://localhost:11434/v1",
76
+ api_key="ollama"
77
+ )
78
+ return OpenAIChatCompletionsModel(model=model_name, openai_client=client)
79
+
80
+ # ----------------------------------------------------------------------
81
+ # UNSUPPORTED
82
+ # ----------------------------------------------------------------------
83
+ else:
84
+ raise ValueError(f"Unsupported provider for OpenAIModelFactory: {provider}")
85
+
86
+
87
+ @staticmethod
88
+ def num_tokens_from_messages(messages, model: str = "gpt-4o"):
89
+ """
90
+ Return the number of tokens used by a list of messages.
91
+ """
92
+ try:
93
+ encoding = tiktoken.encoding_for_model(model)
94
+ except KeyError:
95
+ encoding = tiktoken.get_encoding("cl100k_base")
96
+
97
+ tokens_per_message = 3
98
+ num_tokens = 0
99
+
100
+ for message in messages:
101
+ num_tokens += tokens_per_message
102
+ for key, value in message.items():
103
+ if key == "name":
104
+ num_tokens += 1
105
+
106
+ # Encode values if they are strings
107
+ if isinstance(value, str):
108
+ num_tokens += len(encoding.encode(value))
109
+ elif isinstance(value, list) and key == "content":
110
+ for part in value:
111
+ if isinstance(part, dict) and part.get("type") == "text":
112
+ num_tokens += len(encoding.encode(part.get("text", "")))
113
+ elif isinstance(part, dict) and part.get("type") == "image_url":
114
+ num_tokens += 85
115
+
116
+ num_tokens += 3
117
+ return num_tokens
118
+
119
+
120
+ class EmbeddingFactory:
121
+ """
122
+ A static utility class to create and return Embedding Model instances.
123
+ """
124
+
125
+ @staticmethod
126
+ def get_embedding_model(provider: str = "openai",
127
+ model_name: str = "text-embedding-3-small"
128
+ ) -> Union[AzureOpenAIEmbeddings, OpenAIEmbeddings, OllamaEmbeddings, HuggingFaceEmbeddings]:
129
+
130
+ if provider.lower() == "azure":
131
+ token_provider = get_bearer_token_provider(
132
+ DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
133
+ )
134
+ return AzureOpenAIEmbeddings(
135
+ azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
136
+ azure_deployment=os.environ.get("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", model_name),
137
+ api_version=os.environ["AZURE_OPENAI_API_VERSION"],
138
+ azure_ad_token_provider=token_provider,
139
+ )
140
+ elif provider.lower() == "openai":
141
+ return OpenAIEmbeddings(
142
+ api_key=os.environ["OPENAI_API_KEY"],
143
+ model=model_name
144
+ )
145
+ elif provider.lower() == "ollama":
146
+ return OllamaEmbeddings(model=model_name)
147
+ elif provider.lower() == "huggingface":
148
+ if os.environ.get("HF_TOKEN"):
149
+ login(token=os.environ.get("HF_TOKEN"))
150
+ return HuggingFaceEmbeddings(model_name=model_name)
151
+ else:
152
+ raise ValueError(f"Unsupported embedding provider: {provider}")
153
+
154
+
155
+ # =================================================================================================
156
+ # GLOBAL HELPER FUNCTIONS
157
+ # =================================================================================================
158
+
159
+ def get_model(provider:str = "openai", model_name:str = "gpt-4o"):
160
+ """
161
+ Global helper to get an OpenAI-SDK compatible model.
162
+ Defaults to OpenAI provider and gpt-4o.
163
+ """
164
+ return OpenAIModelFactory.get_model(
165
+ provider=provider,
166
+ model_name=model_name,
167
+ temperature=0
168
+ )
169
+
170
+ def get_model_json(model_name: str = "gpt-4o-2024-08-06", provider: str = "openai"):
171
+ """
172
+ Global helper to get a JSON-capable model (Structured Outputs).
173
+ Defaults to gpt-4o-2024-08-06 on OpenAI.
174
+ """
175
+ return OpenAIModelFactory.get_model(
176
+ provider=provider,
177
+ model_name=model_name,
178
+ temperature=0
179
+ )
pyproject.toml ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "agents"
3
+ version = "0.1.0"
4
+ description = "Agentic AI project"
5
+ readme = "README.md"
6
+ requires-python = "==3.12.*"
7
+
8
+ dependencies = [
9
+ # =======================
10
+ # LLM PROVIDERS / SDKs
11
+ # =======================
12
+ "openai>=2.8.1",
13
+ "openai-agents>=0.5.1",
14
+ "anthropic>=0.49.0",
15
+ "langchain-openai>=1.0.3",
16
+ "langchain-anthropic>=1.1.0",
17
+ "langchain_huggingface>=1.1.0",
18
+ "langchain_ollama>=1.0.0",
19
+ "langchain_google_genai>=3.0.3",
20
+ "langchain_groq>=1.0.1",
21
+
22
+
23
+ # =======================
24
+ # LANGCHAIN / LANGGRAPH
25
+ # =======================
26
+ "langchain>=1.0.7",
27
+ "langchain-community>=0.4.1",
28
+ "langgraph>=1.0.3",
29
+ "langgraph-checkpoint-sqlite>=3.0.0",
30
+ "langsmith>=0.4.43",
31
+ "langchain-text-splitters>=1.0.0",
32
+ "langchain-chroma>=1.0.0",
33
+ "html2text>=2025.4.15",
34
+ "traceloop-sdk>=0.33.0",
35
+
36
+ # =======================
37
+ # MICROSOFT AGENT FRAMEWORK
38
+ # =======================
39
+ #"agent-framework==1.0.0b251204",
40
+ #"agent-framework-azure-ai==1.0.0b251204",
41
+ #"azure-ai-projects",
42
+ #"azure-ai-agents",
43
+ #"azure-ai-agents>=1.2.0b5",
44
+ #"agent-framework-azure-ai",
45
+
46
+ # =======================
47
+ # VECTOR DB / INDEXING
48
+ # =======================
49
+ "faiss-cpu>=1.13.0",
50
+ "chromadb==1.3.5",
51
+ "sentence-transformers>=5.1.2",
52
+ "pymupdf",
53
+ "pypdf>=6.3.0",
54
+ "pypdf2>=3.0.1",
55
+ "arxiv>=2.3.1",
56
+ "wikipedia>=1.4.0",
57
+
58
+ # =======================
59
+ # AUTOGEN
60
+ # =======================
61
+ "autogen-agentchat>=0.7.5",
62
+ "autogen-ext[grpc,mcp,ollama,openai]>=0.7.5",
63
+ "asyncio",
64
+
65
+ # =======================
66
+ # MCP
67
+ # =======================
68
+ "mcp-server-fetch>=2025.1.17",
69
+ "mcp[cli]>=1.21.2",
70
+
71
+ # =======================
72
+ # NETWORKING / UTILITIES
73
+ # =======================
74
+ "psutil>=7.0.0",
75
+ "python-dotenv>=1.0.1",
76
+ "requests>=2.32.3",
77
+ "aiohttp>=3.8.5",
78
+ "httpx>=0.28.1",
79
+ "speedtest-cli>=2.1.3",
80
+ "logfire",
81
+ "google-search-results",
82
+ "smithery>=0.4.4",
83
+ "sendgrid",
84
+
85
+ # =======================
86
+ # WEB SCRAPING
87
+ # =======================
88
+ "playwright>=1.51.0",
89
+ "beautifulsoup4>=4.12.3",
90
+ "lxml>=5.3.1",
91
+
92
+ # =======================
93
+ # FINANCE / NLP
94
+ # =======================
95
+ "yfinance>=0.2.66",
96
+ "textblob>=0.17.1",
97
+ "polygon-api-client>=1.16.3",
98
+
99
+ # =======================
100
+ # VISUAL / UI / PDF
101
+ # =======================
102
+ "plotly>=6.5.0",
103
+ "streamlit>=1.51.0",
104
+ "reportlab>=4.4.5",
105
+ "fastapi",
106
+ "Pillow",
107
+ "python-docx",
108
+ "matplotlib",
109
+ "fpdf",
110
+ "extra-streamlit-components",
111
+ "nest_asyncio",
112
+
113
+ # =======================
114
+ # AUDIO / VIDEO
115
+ # =======================
116
+ "yt_dlp>=2025.11.12",
117
+ "openai-whisper>=1.0.0",
118
+
119
+ # =======================
120
+ # MACHINE LEARNING
121
+ # =======================
122
+ "scikit-learn>=1.7.2",
123
+ "huggingface_hub<=1.1.4",
124
+ "datasets>=4.4.1",
125
+
126
+ # =======================
127
+ # IPYNB SUPPORT
128
+ # =======================
129
+ "ipykernel>=7.1.0",
130
+
131
+ # =======================
132
+ # TOOLS
133
+ # =======================
134
+ "ddgs>=9.9.2",
135
+ "duckduckgo_search",
136
+ "azure-identity>=1.25.1",
137
+
138
+ # =======================
139
+ # OBSERVABILITY
140
+ # =======================
141
+ "openinference-instrumentation-autogen>=0.1.0",
142
+ "openinference-instrumentation-openai>=0.1.15",
143
+ "opentelemetry-sdk>=1.20.0",
144
+ "opentelemetry-exporter-otlp>=1.20.0",
145
+ "opentelemetry-api>=1.20.0",
146
+ ]
147
+
148
+ [dependency-groups]
149
+ dev = [
150
+ "pytest>=8.3.3",
151
+ "ipykernel>=7.1.0",
152
+ "pytest-asyncio",
153
+ ]
154
+
155
+ # ============================================================
156
+ # BUILD SYSTEM
157
+ # ============================================================
158
+ # Defines how to build the project.
159
+ # We use setuptools as the build backend, ensuring consistent packaging.
160
+ [build-system]
161
+ requires = ["setuptools>=80.9.0"]
162
+ build-backend = "setuptools.build_meta"
163
+
164
+ # ============================================================
165
+ # PACKAGING & DISCOVERY
166
+ # ============================================================
167
+ # Tells setuptools where to find the source code.
168
+ # This makes 'common' and 'src' importable when installed (pip install -e .).
169
+ [tool.setuptools.packages.find]
170
+ where = ["."] # Look in the project root
171
+ include = ["common*", "src*"] # Treat 'common' and 'src' folders as packages
172
+
173
+
174
+ # ============================================================
175
+ # PYTEST SETTINGS
176
+ # ============================================================
177
+ # Configures the test runner to automatically find code.
178
+ [tool.pytest.ini_options]
179
+ # Adds 'src' and 'common' to the python path during tests.
180
+ # This allows tests to import modules (e.g., 'import travel_agent')
181
+ # just like the apps do locally, preventing ModuleNotFoundError.
182
+ pythonpath = ["src", "common"]
183
+ testpaths = ["tests"] # Only look for tests in the 'tests' directory
184
+ addopts = "-q" # Run in quiet mode (less verbose output)
run.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Universal App Launcher for AgenticAI Projects
4
+
5
+ Usage:
6
+ python run.py <app_name> [--port PORT] [--help]
7
+
8
+ Examples:
9
+ python run.py healthcare
10
+ python run.py deep-research --port 8502
11
+ python run.py stock-analyst
12
+ python run.py --list
13
+ """
14
+
15
+ import sys
16
+ import os
17
+ import subprocess
18
+ import argparse
19
+ from pathlib import Path
20
+ from typing import Dict, Optional
21
+ from agents import Runner, SQLiteSession
22
+ # from agents import set_trace_processors
23
+ # from langsmith.wrappers import OpenAIAgentsTracingProcessor
24
+
25
+ # Load environment variables explicitly
26
+ from dotenv import load_dotenv
27
+ load_dotenv(override=True)
28
+
29
+ # App registry - maps app names to their paths and entry points
30
+ APP_REGISTRY: Dict[str, Dict[str, str]] = {
31
+ "healthcare": {
32
+ "path": "src/healthcare-assistant",
33
+ "entry": "app.py",
34
+ "description": "Healthcare Assistant - Medical information with RAG and web search"
35
+ },
36
+ "deep-research": {
37
+ "path": "src/deep-research",
38
+ "entry": "app.py",
39
+ "description": "Deep Research AI - Comprehensive research assistant"
40
+ },
41
+ "stock-analyst": {
42
+ "path": "src/stock-analyst",
43
+ "entry": "app.py",
44
+ "description": "Stock Analyst - Financial analysis and stock recommendations"
45
+ },
46
+ "travel-agent": {
47
+ "path": "src/travel-agent",
48
+ "entry": "app.py",
49
+ "description": "Travel Agent - Trip planning and travel recommendations"
50
+ },
51
+ "trip-planner": {
52
+ "path": "src/trip-planner",
53
+ "entry": "main.py",
54
+ "description": "Trip Planner - Detailed trip itinerary planning"
55
+ },
56
+ "chatbot": {
57
+ "path": "src/chatbot",
58
+ "entry": "app.py",
59
+ "description": "General Chatbot - Multi-purpose conversational AI"
60
+ },
61
+ "accessibility": {
62
+ "path": "src/accessibility",
63
+ "entry": "app.py",
64
+ "description": "Accessibility Tools - Assistive technology applications"
65
+ },
66
+ "literature-review": {
67
+ "path": "src/literature-review",
68
+ "entry": "app.py",
69
+ "description": "Literature Review Assistant - Multi-agent literature review tool"
70
+ },
71
+ "market-analyst": {
72
+ "path": "src/market-analyst",
73
+ "entry": "app.py",
74
+ "description": "Market Analyst - Multi-agent market analysis tool"
75
+ },
76
+ "image": {
77
+ "path": "src/image-generator",
78
+ "entry": "app.py",
79
+ "description": "Image Generator - Multi-agent image generation tool"
80
+ },
81
+ "interview-assistant": {
82
+ "path": "src/interview-assistant",
83
+ "entry": "app.py",
84
+ "description": "Interview Assistant - Multi-agent interview tool"
85
+ }
86
+ }
87
+
88
+
89
+ def print_banner():
90
+ """Print a nice banner."""
91
+ print("=" * 70)
92
+ print("🚀 AgenticAI Projects Launcher".center(70))
93
+ print("=" * 70)
94
+ print()
95
+
96
+
97
+ def list_apps():
98
+ """List all available apps."""
99
+ print_banner()
100
+ print("Available Applications:\n")
101
+
102
+ max_name_len = max(len(name) for name in APP_REGISTRY.keys())
103
+
104
+ for name, config in sorted(APP_REGISTRY.items()):
105
+ print(f" {name.ljust(max_name_len + 2)} - {config['description']}")
106
+
107
+ print("\n" + "=" * 70)
108
+ print("\nUsage: python run.py <app_name> [--port PORT]")
109
+ print("Example: python run.py healthcare --port 8501\n")
110
+
111
+
112
+ def validate_app(app_name: str) -> Optional[Dict[str, str]]:
113
+ """
114
+ Validate that the app exists and its files are present.
115
+
116
+ Args:
117
+ app_name: Name of the app to validate
118
+
119
+ Returns:
120
+ App configuration dict if valid, None otherwise
121
+ """
122
+ if app_name not in APP_REGISTRY:
123
+ print(f"❌ Error: Unknown app '{app_name}'")
124
+ print(f"\nAvailable apps: {', '.join(sorted(APP_REGISTRY.keys()))}")
125
+ print("\nRun 'python run.py --list' to see all available apps.")
126
+ return None
127
+
128
+ config = APP_REGISTRY[app_name]
129
+ project_root = Path(__file__).parent
130
+ app_path = project_root / config["path"] / config["entry"]
131
+
132
+ if not app_path.exists():
133
+ print(f"❌ Error: App file not found at {app_path}")
134
+ return None
135
+
136
+ return config
137
+
138
+
139
+ def launch_app(app_name: str, port: Optional[int] = None):
140
+ """
141
+ Launch a Streamlit app.
142
+
143
+ Args:
144
+ app_name: Name of the app to launch
145
+ port: Optional port number (default: 8501)
146
+ """
147
+ config = validate_app(app_name)
148
+ if not config:
149
+ sys.exit(1)
150
+
151
+ project_root = Path(__file__).parent
152
+ app_dir = project_root / config["path"]
153
+ app_file = config["entry"]
154
+
155
+ print_banner()
156
+ print(f"📱 Launching: {config['description']}")
157
+ print(f"📂 Location: {config['path']}")
158
+ print(f"🌐 Entry Point: {app_file}")
159
+
160
+ # Build streamlit command
161
+ cmd = ["streamlit", "run", app_file]
162
+
163
+ # Add port if specified
164
+ if port:
165
+ cmd.extend(["--server.port", str(port)])
166
+ print(f"🔌 Port: {port}")
167
+ else:
168
+ print(f"🔌 Port: 8501 (default)")
169
+
170
+ print("\n" + "=" * 70)
171
+ print("\n🎯 Starting application...\n")
172
+
173
+ # Prepare environment with project root in PYTHONPATH to fix imports
174
+ env = os.environ.copy()
175
+ env["PYTHONPATH"] = str(project_root) + os.pathsep + env.get("PYTHONPATH", "")
176
+ print(f"\n\nPYTHONPATH: {env['PYTHONPATH']}")
177
+
178
+ try:
179
+ # Change to app directory and run
180
+ os.chdir(app_dir)
181
+ subprocess.run(cmd, env=env)
182
+ except KeyboardInterrupt:
183
+ print("\n\n👋 Application stopped by user")
184
+ except FileNotFoundError:
185
+ print("\n❌ Error: Streamlit not found. Please install it:")
186
+ print(" pip install streamlit")
187
+ sys.exit(1)
188
+ except Exception as e:
189
+ print(f"\n❌ Error launching app: {e}")
190
+ sys.exit(1)
191
+
192
+ def main():
193
+ """Main entry point."""
194
+ parser = argparse.ArgumentParser(
195
+ description="Universal launcher for AgenticAI project applications",
196
+ formatter_class=argparse.RawDescriptionHelpFormatter,
197
+ epilog="""
198
+ Examples:
199
+ python run.py healthcare # Launch healthcare chatbot
200
+ python run.py deep-research --port 8502 # Launch on custom port
201
+ python run.py --list # List all available apps
202
+
203
+ Available Apps:
204
+ """ + "\n ".join(f"{name}: {config['description']}"
205
+ for name, config in sorted(APP_REGISTRY.items()))
206
+ )
207
+
208
+ parser.add_argument(
209
+ "app_name",
210
+ nargs="?",
211
+ help="Name of the app to launch"
212
+ )
213
+
214
+ parser.add_argument(
215
+ "--port",
216
+ type=int,
217
+ help="Port number for Streamlit server (default: 8501)"
218
+ )
219
+
220
+ parser.add_argument(
221
+ "--list",
222
+ action="store_true",
223
+ help="List all available apps"
224
+ )
225
+
226
+ args = parser.parse_args()
227
+
228
+ # Handle --list flag
229
+ if args.list:
230
+ list_apps()
231
+ return
232
+
233
+ # Require app name if not listing
234
+ if not args.app_name:
235
+ parser.print_help()
236
+ print("\n")
237
+ list_apps()
238
+ return
239
+
240
+ # Launch the app
241
+ launch_app(args.app_name, args.port)
242
+
243
+
244
+ if __name__ == "__main__":
245
+ # set_trace_processors([OpenAIAgentsTracingProcessor()])
246
+ main()
src/interview-assistant/Dockerfile ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ ENV PYTHONUNBUFFERED=1 \
4
+ DEBIAN_FRONTEND=noninteractive \
5
+ PYTHONPATH=/app/src/interview-assistant:$PYTHONPATH
6
+
7
+ WORKDIR /app
8
+
9
+ # System deps
10
+ RUN apt-get update && apt-get install -y \
11
+ git build-essential curl \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Install uv
15
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
16
+ ENV PATH="/root/.local/bin:$PATH"
17
+
18
+ # Copy project metadata
19
+ COPY pyproject.toml .
20
+ COPY uv.lock .
21
+
22
+ # Copy application code
23
+ COPY common/ ./common/
24
+ COPY src/interview-assistant/ ./src/interview-assistant/
25
+
26
+ # Install dependencies using uv, then export and install with pip to system
27
+ # We use --no-dev to exclude dev dependencies if any
28
+ RUN uv sync --frozen --no-dev && \
29
+ uv pip install -e . --system
30
+
31
+ # Copy entry point
32
+ COPY run.py .
33
+
34
+ EXPOSE 7860
35
+
36
+ CMD ["python", "run.py", "interview-assistant", "--port", "7860"]
src/interview-assistant/README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Interviewer Assistant
3
+ emoji: 👔
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ license: mit
10
+ short_description: Agentic AI for Resume Analysis & Interview Prep
11
+ ---
12
+
13
+ # 👔 Interviewer Assistant
14
+
15
+ A **Multi-Agent System** designed to assist HR and technical interviewers by automatically analyzing job descriptions and resumes to generate tailored interview questions and fitness scores.
16
+
17
+ ## 🚀 Features
18
+
19
+ - **🧠 Multi-Agent Swarm**:
20
+ - **Job Analyst**: Extracts key requirements from JDs.
21
+ - **Candidate Profiler**: Analyzes resumes for strengths and gaps.
22
+ - **Evaluator**: Scores candidates on Technical, Behavioral, and Leadership metrics.
23
+ - **Interview Designer**: Generates bespoke interview questions.
24
+ - **📄 Input Flexibility**: Accepts raw text for JD and Resume/LinkedIn profile.
25
+ - **📊 Structured Evaluation**: Provides a clear score and justification.
26
+
27
+ ## 🛠️ Architecture
28
+
29
+ ```
30
+ src/interview-assistant/
31
+ ├── app.py # Streamlit UI (Orchestrator)
32
+ ├── teams/ # Team Definitions
33
+ │ └── team.py # GroupChat Configuration
34
+ ├── aagents/ # Agent Definitions
35
+ │ ├── job_analyst.py
36
+ │ ├── candidate_profiler.py
37
+ │ ├── evaluator.py
38
+ │ ├── interview_designer.py
39
+ │ └── admin.py
40
+ └── Dockerfile # Deployment Configuration
41
+ ```
42
+
43
+ ## 📦 Startup
44
+
45
+ ### Local Run
46
+
47
+ 1. **Install Dependencies**:
48
+ ```bash
49
+ pip install -r src/interviewer-assistant/requirements.txt
50
+ ```
51
+
52
+ 2. **Run Application**:
53
+ ```bash
54
+ streamlit run src/interviewer-assistant/app.py
55
+ ```
56
+ The app will open at `http://localhost:8501`.
57
+
58
+ ## 🐳 Docker / Deployment
59
+
60
+ The project is packaged for **Hugging Face Spaces** (Docker SDK).
61
+
62
+ ```bash
63
+ # Build
64
+ docker build -t interviewer-assistant -f src/interviewer-assistant/Dockerfile .
65
+
66
+ # Run
67
+ docker run -p 7860:7860 interviewer-assistant
68
+ ```
src/interview-assistant/aagents/__init__.py ADDED
File without changes
src/interview-assistant/aagents/candidate_profiler.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from autogen_agentchat.agents import AssistantAgent
2
+ from autogen_core.tools import FunctionTool
3
+
4
+ # Import tools
5
+ import sys
6
+ import os
7
+
8
+ # Ensure we can import from parent directory if needed
9
+ current_dir = os.path.dirname(os.path.abspath(__file__))
10
+ parent_dir = os.path.abspath(os.path.join(current_dir, ".."))
11
+ if parent_dir not in sys.path:
12
+ sys.path.append(parent_dir)
13
+
14
+ from tools.resume_tools import read_local_file, scrape_web_page
15
+
16
+ def get_candidate_profiler(model_client):
17
+
18
+ # Wrap tools
19
+ read_tool = FunctionTool(read_local_file, description="Reads a local file (PDF, DOCX, or TXT) and returns its text content.")
20
+ scrape_tool = FunctionTool(scrape_web_page, description="Fetches the content of a web page (e.g., LinkedIn public profile) and returns the text.")
21
+
22
+ return AssistantAgent(
23
+ name="Candidate_Profiler",
24
+ model_client=model_client,
25
+ tools=[read_tool, scrape_tool],
26
+ system_message="""
27
+ You are an expert Candidate Profiler.
28
+
29
+ Task:
30
+ 1. Read the candidate's resume (using `read_local_file` if path provided) and/or LinkedIn profile (using `scrape_web_page` if URL provided).
31
+ 2. Summarize the candidate's professional profile, key skills, years of experience, and notable achievements.
32
+ 3. Do NOT evaluate the candidate against any job description yet. Just provide a factual, comprehensive summary. Also extract the Candidate's Name if available.
33
+
34
+ IMPORTANT:
35
+ - If you have the Resume content, that is sufficient. exact LinkedIn data is secondary.
36
+ - Do not retry scraping if it fails or returns empty/short content. Proceed with the Resume only.
37
+ - Output the results in the JSON format below.
38
+ - After outputting the JSON, do not perform further actions.
39
+
40
+ Output:
41
+ Return a JSON object:
42
+ ```json
43
+ {
44
+ "candidate_name": "...",
45
+ "candidate_summary": "...",
46
+ "key_skills": ["..."],
47
+ "years_of_experience": "...",
48
+ "recent_roles": ["..."]
49
+ }
50
+ ```
51
+
52
+ End your message with: HANDOFF_TO_JOB_ANALYST
53
+ """,
54
+ )
src/interview-assistant/aagents/evaluator.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from autogen_agentchat.agents import AssistantAgent
2
+
3
+ def get_evaluator(model_client):
4
+ return AssistantAgent(
5
+ name="Evaluator",
6
+ model_client=model_client,
7
+ system_message="""
8
+ You are the Hiring Evaluator.
9
+
10
+ Inputs:
11
+ 1. Context includes "Candidate Summary" (from Candidate_Profiler).
12
+ 2. Context includes "Approved JD Analysis" (from Job_Analyst/Reviewer).
13
+
14
+ Task:
15
+ 1. Compare the Candidate's profile against the approved JD requirements across FOUR dimensions:
16
+ - Technical Proficiency
17
+ - Behavioral Fit
18
+ - Situational Judgment & Problem Solving
19
+ - Leadership Potential (Ownership/Influence)
20
+ 2. Score the overall fitness (1-10) weighted by the priorities defined in the JD Analysis.
21
+ 3. Identify Strengths and Gaps.
22
+ 4. List specific areas to probe in the interview.
23
+
24
+ Output:
25
+ Return a JSON object:
26
+ ```json
27
+ {
28
+ "fitness_score": 8,
29
+ "justification": "...",
30
+ "strengths": ["..."],
31
+ "gaps": ["..."],
32
+ "interview_focus_areas": ["..."]
33
+ }
34
+ ```
35
+ """,
36
+ )
src/interview-assistant/aagents/interview_designer.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from autogen_agentchat.agents import AssistantAgent
2
+
3
+ def get_interview_designer(model_client):
4
+ return AssistantAgent(
5
+ name="Interview_Designer",
6
+ model_client=model_client,
7
+ system_message="""
8
+ You are an Expert Interview Designer.
9
+
10
+ Input:
11
+ - Evaluator's output (Focus Areas, Gaps, Strengths).
12
+ - Approved JD Analysis (with Weights).
13
+
14
+ Task:
15
+ 1. Design a comprehensive structured interview based on the JD weights and Evaluator's focus areas.
16
+ 2. **MANDATORY QUANTITY**: You MUST generate a minimum of **40 unique questions**. This is a hard requirement.
17
+ 3. **SUGGESTED BREAKDOWN**: ~16 Technical, ~8 Behavioral, ~8 Situational, ~8 Leadership.
18
+ 4. **DISTRIBUTION**: Allocate questions based on skill weights.
19
+ 5. **STRICT ELABORATION**: Each question text ("q") and "sample_answer" MUST be substantive (spanning **at least 3-4 lines/sentences**) to provide deep context. NO ONE-LINERS.
20
+ 6. **SITUATIONAL**: Ensure at least 8 questions are complex "Situational" scenarios.
21
+ 7. **Sample Answers**: For EACH question, provide a detailed "Sample Answer" (3-4 sentences).
22
+ 8. **Completeness**: Ensure the JSON is complete. Output the **FULL LIST** every time (do not strictly append, regenerate the full set if needed).
23
+
24
+ Output:
25
+ Return a JSON object:
26
+ ```json
27
+ {
28
+ "structured_interview": [
29
+ {
30
+ "skill": "Python (Weight: 40%)",
31
+ "questions": [
32
+ {
33
+ "q": "Explain decorators...",
34
+ "complexity": "High",
35
+ "type": "Conceptual",
36
+ "sample_answer": "Candidate should mention: Higher-order functions, @syntax, typical use cases like logging or auth."
37
+ }
38
+ ]
39
+ }
40
+ ]
41
+ }
42
+ ```
43
+ """,
44
+ )
src/interview-assistant/aagents/job_analyst.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from autogen_agentchat.agents import AssistantAgent
2
+
3
+ def get_job_analyst(model_client):
4
+ return AssistantAgent(
5
+ name="Job_Analyst",
6
+ model_client=model_client,
7
+ system_message="""
8
+ You are an Expert Job Analyst.
9
+
10
+ Task:
11
+ 1. Analyze the provided Job Description (JD).
12
+ 2. Identify key skills in four categories:
13
+ - **Technical**: Hard skills, tools, languages.
14
+ - **Behavioral**: Soft skills, culture fit.
15
+ - **Situational Judgment**: Problem-solving, conflict resolution, strategic thinking scenarios.
16
+ - **Leadership**: Coaching, ownership, influence, team management (even for individual contributors).
17
+ 3. Assign a PERCENTAGE WEIGHT to each skill based on its importance in the JD.
18
+ 4. CONSTRAINT: The sum of weights across ALL skills in ALL categories MUST equal exactly 100.
19
+
20
+ Output:
21
+ Return a JSON object:
22
+ ```json
23
+ {
24
+ "role_summary": "...",
25
+ "analysis": {
26
+ "technical": [{"skill": "Python", "weight": 30}, {"skill": "AWS", "weight": 20}],
27
+ "behavioral": [{"skill": "Teamwork", "weight": 10}],
28
+ "situational_judgment": [{"skill": "Production Outage Handling", "weight": 20}],
29
+ "leadership": [{"skill": "Mentoring", "weight": 20}]
30
+ }
31
+ }
32
+ ```
33
+ """,
34
+ )
src/interview-assistant/aagents/job_analyst_reviewer.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from autogen_agentchat.agents import AssistantAgent
2
+
3
+ def get_job_analyst_reviewer(model_client):
4
+ return AssistantAgent(
5
+ name="Job_Analyst_Reviewer",
6
+ model_client=model_client,
7
+ system_message="""
8
+ You are a Senior HR Reviewer.
9
+
10
+ Task:
11
+ 1. Review the output from the "Job_Analyst".
12
+ 2. Verify if the identified skills accurately reflect the JD.
13
+ 3. CHECK MATH: Verify that the sum of ALL weights across all categories equals exactly 100.
14
+ 4. If the math is wrong or skills are missing, reject and ask for corrections.
15
+ 5. If acceptable, output "APPROVED".
16
+ """,
17
+ )
src/interview-assistant/aagents/team_lead.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from autogen_agentchat.agents import AssistantAgent
2
+
3
+ def get_team_lead(model_client):
4
+ return AssistantAgent(
5
+ name="Team_Lead",
6
+ model_client=model_client,
7
+ system_message="""
8
+ You are the Quality Assurance Team Lead.
9
+
10
+ Task:
11
+ 1. Review the output of the Interview Designer.
12
+ 2. **Quantity Check**: Count the total questions. If the count is **less than 35**, return "REJECT: Generated fewer than 35 questions. Need at least 35 unique questions. Please add more."
13
+ 3. **Quality Check**: Read the "q" and "sample_answer" fields.
14
+ - If they are short (1-2 sentences), return "REJECT: Questions/Answers are too brief. Elaboration to 3-4 detailed sentences is REQUIRED for every item. Please rewrite."
15
+ 4. **Validation**: Ensure the JSON is valid and complete.
16
+
17
+ Action:
18
+ - If ALL checks pass: Reply with "TERMINATE".
19
+ - If ANY check fails: Reply with the specific feedback to the Interview_Designer.
20
+ """,
21
+ )
src/interview-assistant/app.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import sys
4
+ import tempfile
5
+ import asyncio
6
+ import traceback
7
+ import extra_streamlit_components as stx
8
+ from dotenv import load_dotenv
9
+
10
+ # Ensure we can import from local modules
11
+ current_dir = os.path.dirname(os.path.abspath(__file__))
12
+ if current_dir not in sys.path:
13
+ sys.path.append(current_dir)
14
+
15
+ from teams.team import get_interview_team
16
+ from ui.styles import apply_custom_styles
17
+ from ui.hero import render_hero
18
+ from ui.sidebar import render_sidebar
19
+ from ui.report import generate_markdown_report, create_pdf, render_persistent_view
20
+ from common.utility.autogen_model_factory import AutoGenModelFactory
21
+
22
+ # Load env variables
23
+ load_dotenv()
24
+
25
+ st.set_page_config(page_title="Interviewer Assistant", page_icon="👔", layout="wide")
26
+
27
+ # Cookie Manager (Must be initialized at top level)
28
+ def get_manager():
29
+ return stx.CookieManager()
30
+
31
+ cookie_manager = get_manager()
32
+
33
+ # 1. Apply Styles
34
+ apply_custom_styles()
35
+
36
+ # 2. Render Sidebar
37
+ sidebar_data = render_sidebar(cookie_manager)
38
+ mode = sidebar_data["mode"]
39
+ job_description = sidebar_data["job_description"]
40
+ uploaded_resume = sidebar_data["uploaded_resume"]
41
+ linkedin_url = sidebar_data["linkedin_url"]
42
+
43
+ # 3. Render Hero
44
+ render_hero()
45
+
46
+ # ------------------------------------------------------------------------------
47
+ # LOGIC & ANALYSIS HELPERS
48
+ # ------------------------------------------------------------------------------
49
+
50
+ async def run_analysis_stream(model_client, task_msg):
51
+ team = get_interview_team(model_client)
52
+ # Return the stream generator
53
+ stream = team.run_stream(task=task_msg)
54
+ return stream
55
+
56
+ # ------------------------------------------------------------------------------
57
+ # MAIN CONTENT LOGIC
58
+ # ------------------------------------------------------------------------------
59
+
60
+ if mode == "Candidate":
61
+ st.markdown("## 🎓 Candidate Prep Portal")
62
+ st.info("This feature is under development. It will allow candidates to take mock interviews based on the generated guide.")
63
+ st.image("https://cdn-icons-png.flaticon.com/512/3220/3220565.png", width=150)
64
+ st.stop()
65
+
66
+ if st.session_state.analyzing:
67
+ # Validation
68
+ if not os.getenv("OPENAI_API_KEY"):
69
+ st.error("Missing OpenAI API Key. Please check your .env file or environment variables.")
70
+ st.session_state.analyzing = False
71
+ st.stop()
72
+ if not job_description:
73
+ st.error("Missing Job Description.")
74
+ st.session_state.analyzing = False
75
+ st.stop()
76
+ if not uploaded_resume:
77
+ st.error("Missing Resume File.")
78
+ st.session_state.analyzing = False
79
+ st.stop()
80
+
81
+ try:
82
+ # Process Input
83
+ resume_path = ""
84
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_resume.name.split('.')[-1]}") as tmp_file:
85
+ tmp_file.write(uploaded_resume.getvalue())
86
+ resume_path = tmp_file.name
87
+
88
+ resume_content_msg = f"Candidate Resume File Path: {resume_path} (Please use `read_local_file` to read this)."
89
+ if linkedin_url:
90
+ resume_content_msg += f"\nAlso check LinkedIn: {linkedin_url} (use `scrape_web_page`)."
91
+
92
+ # Execution
93
+ prog_bar = st.progress(0)
94
+ status_text = st.empty()
95
+
96
+ # Create Model Client using Factory
97
+ model_client = AutoGenModelFactory.get_model(
98
+ provider="openai", model_name="gpt-4-turbo", model_info={"vision": False, "function_calling": True, "json_output": False}
99
+ )
100
+
101
+ task_msg = f"""
102
+ Here is the Job Description:
103
+ {job_description}
104
+
105
+ {resume_content_msg}
106
+
107
+ The team must follow the strict workflow:
108
+ Profiler -> Job Analyst -> Reviewer -> Evaluator -> Designer.
109
+ """
110
+
111
+ status_text.text("Initializing Agents...")
112
+ prog_bar.progress(10)
113
+
114
+ # Create a placeholder for debug output to avoid context issues inside async
115
+ debug_placeholder = st.empty()
116
+
117
+ # Define debug print helper
118
+ def debug_print(msg):
119
+ print(f"DEBUG: {msg}")
120
+
121
+ debug_print("Analysis Started. Loop initializing...")
122
+
123
+ async def execute_analysis(placeholder):
124
+ try:
125
+ debug_print("Entering execute_analysis async function")
126
+ with st.spinner("Analyzing candidate and designing interview..."):
127
+ # Get the stream
128
+ debug_print(f"Creating team and stream with task length {len(task_msg)}")
129
+ stream = await run_analysis_stream(model_client, task_msg)
130
+
131
+ messages = []
132
+ msg_count = 0
133
+ debug_print("Stream created. Iterating...")
134
+
135
+ # Stream messages
136
+ async for message in stream:
137
+ msg_count += 1
138
+ messages.append(message)
139
+
140
+ source = getattr(message, 'source', 'Unknown')
141
+ content = getattr(message, 'content', '')
142
+ debug_print(f"Stream Msg {msg_count}: {source}")
143
+
144
+ if isinstance(content, list):
145
+ content = "[Multimodal Content]"
146
+ elif not content:
147
+ content = "[No Content]"
148
+
149
+ # Evidence: Show Data (Source Content) in UI & Console
150
+ if isinstance(content, str) and len(content) > 500:
151
+ print(f"--- EVIDENCE ({source}) ---\n{content[:5000]}\n---------------------------")
152
+ with st.expander(f"📄 Data Source Evidence ({source})", expanded=False):
153
+ st.text(content)
154
+
155
+ # Update Debug UI safely
156
+ placeholder.text(f"[{msg_count}] {source}: {str(content)[:150]}...")
157
+
158
+ # Progress bar update
159
+ if source == "Candidate_Profiler": prog_bar.progress(20)
160
+ elif source == "Job_Analyst": prog_bar.progress(40)
161
+ elif source == "Job_Analyst_Reviewer": prog_bar.progress(60)
162
+ elif source == "Evaluator": prog_bar.progress(80)
163
+ elif source == "Interview_Designer": prog_bar.progress(95)
164
+
165
+ if msg_count == 0:
166
+ st.error("No messages received. Check logs/console.")
167
+ debug_print("Stream finished with 0 messages.")
168
+ else:
169
+ debug_print(f"Stream finished with {msg_count} messages.")
170
+
171
+ prog_bar.progress(100)
172
+ status_text.text("Analysis Complete.")
173
+
174
+ # Generation
175
+ final_markdown = generate_markdown_report(messages)
176
+
177
+ if not final_markdown.strip():
178
+ final_markdown = "## Report Generation Failed\nNo structured output was found from the agent team."
179
+
180
+ # Save to State (Persistence)
181
+ st.session_state.generated_report = final_markdown
182
+ st.session_state.generated_pdf = create_pdf(final_markdown)
183
+
184
+ except Exception as e:
185
+ st.error(f"An error occurred during analysis: {e}")
186
+ import traceback
187
+ st.text(traceback.format_exc())
188
+ debug_print(f"Async Job Failed: {e}")
189
+
190
+ # Run the async execution
191
+ try:
192
+ import asyncio
193
+ loop = asyncio.new_event_loop()
194
+ asyncio.set_event_loop(loop)
195
+ try:
196
+ loop.run_until_complete(execute_analysis(debug_placeholder))
197
+ finally:
198
+ # Cleanup pending tasks
199
+ pending = asyncio.all_tasks(loop)
200
+ for task in pending:
201
+ task.cancel()
202
+ if pending:
203
+ loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
204
+ loop.close()
205
+ except Exception as e:
206
+ st.error(f"System Error: {e}")
207
+ import traceback
208
+ st.text(traceback.format_exc())
209
+
210
+ finally:
211
+ # Reset analysis state at the end so user can run again
212
+ st.session_state.analyzing = False
213
+ st.rerun()
214
+
215
+ else:
216
+ st.info("👈 Please fill in the details in the sidebar to get started.")
217
+
218
+ # 4. Render Persistent View (Report)
219
+ render_persistent_view()
src/interview-assistant/debug_profiler.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import sys
4
+ from dotenv import load_dotenv
5
+
6
+ # Path setup
7
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
8
+
9
+ from common.utility.autogen_model_factory import AutoGenModelFactory
10
+ from aagents.candidate_profiler import get_candidate_profiler
11
+
12
+ # Load env (assuming .env in project root, but we can rely on env vars being set in shell)
13
+ load_dotenv()
14
+
15
+ async def main():
16
+ print("DEBUG: Starting standalone profiler test.")
17
+
18
+ # 1. Setup Dummy Resume
19
+ resume_path = os.path.join(os.getcwd(), "dummy_resume.txt")
20
+ with open(resume_path, "w") as f:
21
+ f.write("Jane Doe. Experienced Software Engineer. Python, Azure, AI. 10 years experience.")
22
+
23
+ print(f"DEBUG: Created dummy resume at {resume_path}")
24
+
25
+ # 2. Setup Agent
26
+ try:
27
+ model_client = AutoGenModelFactory.get_model(
28
+ provider="openai",
29
+ model_name="gpt-4-turbo",
30
+ model_info={"vision": False, "function_calling": True, "json_output": False}
31
+ )
32
+ profiler = get_candidate_profiler(model_client)
33
+ print("DEBUG: Profiler agent created.")
34
+ except Exception as e:
35
+ print(f"ERROR: Failed to create agent: {e}")
36
+ return
37
+
38
+ # 3. Run Agent
39
+ task_msg = f"Candidate Resume File Path: {resume_path} (Please use `read_local_file` to read this)."
40
+
41
+ print(f"DEBUG: Sending task: {task_msg}")
42
+
43
+ try:
44
+ # Run directly against agent usually requires a team context for proper tool loop handling in 0.4
45
+ # But let's try calling on_messages or similar if supported,
46
+ # OR just wrap in a minimal RoundRobin team like in the app.
47
+ from autogen_agentchat.teams import RoundRobinGroupChat
48
+ from autogen_agentchat.conditions import MaxMessageTermination
49
+
50
+ team = RoundRobinGroupChat(
51
+ participants=[profiler],
52
+ termination_condition=MaxMessageTermination(5)
53
+ )
54
+
55
+ print("DEBUG: Running team stream...")
56
+ async for message in team.run_stream(task=task_msg):
57
+ source = getattr(message, 'source', 'Unknown')
58
+ content = getattr(message, 'content', '')
59
+ print(f"STREAM: {source}: {str(content)[:100]}")
60
+
61
+ except Exception as e:
62
+ print(f"ERROR during execution: {e}")
63
+ import traceback
64
+ traceback.print_exc()
65
+
66
+ if __name__ == "__main__":
67
+ asyncio.run(main())
src/interview-assistant/teams/__init__.py ADDED
File without changes
src/interview-assistant/teams/team.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import json
4
+ import re
5
+ from typing import Dict, Any
6
+
7
+ from autogen_agentchat.teams import RoundRobinGroupChat, SelectorGroupChat
8
+ from autogen_agentchat.conditions import MaxMessageTermination, TextMentionTermination
9
+
10
+ # Ensure we can import from parent directory if needed
11
+ current_dir = os.path.dirname(os.path.abspath(__file__))
12
+ parent_dir = os.path.abspath(os.path.join(current_dir, ".."))
13
+ if parent_dir not in sys.path:
14
+ sys.path.append(parent_dir)
15
+
16
+ # Import agents
17
+ try:
18
+ from ..aagents.job_analyst import get_job_analyst
19
+ from ..aagents.job_analyst_reviewer import get_job_analyst_reviewer
20
+ from ..aagents.candidate_profiler import get_candidate_profiler
21
+ from ..aagents.evaluator import get_evaluator
22
+ from ..aagents.interview_designer import get_interview_designer
23
+ from ..aagents.team_lead import get_team_lead
24
+
25
+ except ImportError:
26
+ from aagents.job_analyst import get_job_analyst
27
+ from aagents.job_analyst_reviewer import get_job_analyst_reviewer
28
+ from aagents.candidate_profiler import get_candidate_profiler
29
+ from aagents.evaluator import get_evaluator
30
+ from aagents.interview_designer import get_interview_designer
31
+ from aagents.team_lead import get_team_lead
32
+
33
+
34
+ def get_interview_team(model_client):
35
+ """
36
+ Creates the Interview Team using SelectorGroupChat to enforce order.
37
+ The order is: Profiler -> Job Analyst -> Reviewer -> Evaluator -> Designer -> Team Lead
38
+ """
39
+ print(f"[DEBUG] Creating Interview Team")
40
+
41
+ # 1. Initialize Agents
42
+ profiler = get_candidate_profiler(model_client)
43
+ job_analyst = get_job_analyst(model_client)
44
+ reviewer = get_job_analyst_reviewer(model_client)
45
+ evaluator = get_evaluator(model_client)
46
+ designer = get_interview_designer(model_client)
47
+ lead = get_team_lead(model_client)
48
+
49
+
50
+ # 2. Define Selector/Transition Logic (Deprecated in RoundRobin but kept for ref)
51
+ selector_prompt = """
52
+ ...
53
+ """
54
+
55
+ # Use RoundRobin as requested ("Keep it simple like market-analyst")
56
+ # This avoids Selector logic loops.
57
+ # The agents must be robust enough to handle the sequential flow.
58
+ print(f"[DEBUG] Using RoundRobinGroupChat")
59
+ team = RoundRobinGroupChat(
60
+ participants=[profiler, job_analyst, reviewer, evaluator, designer, lead],
61
+ termination_condition=TextMentionTermination("TERMINATE") | MaxMessageTermination(15)
62
+ )
63
+ print(f"[DEBUG] Team created: {team}")
64
+
65
+ return team
66
+
67
+ def extract_json(text: str) -> Dict[str, Any]:
68
+ """Helper to extract JSON from markdown code blocks or raw text."""
69
+ try:
70
+ match = re.search(r"```json\s*(.*?)\s*```", text, re.DOTALL)
71
+ if match:
72
+ return json.loads(match.group(1))
73
+ return json.loads(text)
74
+ except:
75
+ return {}