mishrabp commited on
Commit
d06c4bf
·
verified ·
1 Parent(s): 4adf224

Upload folder using huggingface_hub

Browse files
Files changed (47) hide show
  1. Dockerfile +8 -4
  2. common/__init__.py +0 -0
  3. common/aagents/__init__.py +0 -0
  4. common/aagents/google_agent.py +139 -0
  5. common/aagents/healthcare_agent.py +100 -0
  6. common/aagents/news_agent.py +106 -0
  7. common/aagents/weather_agent.py +69 -0
  8. common/aagents/web_agent.py +53 -0
  9. common/aagents/web_research_agent.py +83 -0
  10. common/aagents/yf_agent.py +78 -0
  11. common/mcp/README.md +139 -0
  12. common/mcp/__init__.py +0 -0
  13. common/mcp/mcp_server.py +171 -0
  14. common/mcp/tools/__init__.py +0 -0
  15. common/mcp/tools/google_tools.py +139 -0
  16. common/mcp/tools/news_tools.py +200 -0
  17. common/mcp/tools/rag_tool.py +106 -0
  18. common/mcp/tools/search_tools.py +115 -0
  19. common/mcp/tools/time_tools.py +32 -0
  20. common/mcp/tools/weather_tools.py +235 -0
  21. common/mcp/tools/yf_tools.py +192 -0
  22. common/rag/rag.py +94 -0
  23. common/utility/__init__.py +0 -0
  24. common/utility/embedding_factory.py +49 -0
  25. common/utility/llm_factory.py +130 -0
  26. common/utility/llm_factory2.py +75 -0
  27. common/utility/logger.py +22 -0
  28. pyproject.toml +2 -0
  29. run.py +215 -11
  30. src/deep-research/.env.name +9 -0
  31. src/deep-research/Dockerfile +35 -0
  32. src/deep-research/README.md +191 -0
  33. src/deep-research/app.py +299 -0
  34. src/deep-research/appagents/__init__.py +0 -0
  35. src/deep-research/appagents/email_agent.py +32 -0
  36. src/deep-research/appagents/guardrail_agent.py +45 -0
  37. src/deep-research/appagents/orchestrator.py +119 -0
  38. src/deep-research/appagents/planner_agent.py +45 -0
  39. src/deep-research/appagents/search_agent.py +87 -0
  40. src/deep-research/appagents/writer_agent.py +41 -0
  41. src/deep-research/core/__init__.py +0 -0
  42. src/deep-research/core/logger.py +22 -0
  43. src/deep-research/prompts/__init__.py +0 -0
  44. src/deep-research/tools/__init__.py +0 -0
  45. src/deep-research/tools/google_tools.py +132 -0
  46. src/deep-research/tools/time_tools.py +22 -0
  47. uv.lock +94 -0
Dockerfile CHANGED
@@ -2,7 +2,7 @@ FROM python:3.12-slim
2
 
3
  ENV PYTHONUNBUFFERED=1 \
4
  DEBIAN_FRONTEND=noninteractive \
5
- PYTHONPATH=/app:$PYTHONPATH
6
 
7
  WORKDIR /app
8
 
@@ -19,13 +19,17 @@ ENV PATH="/root/.local/bin:$PATH"
19
  COPY pyproject.toml .
20
  COPY uv.lock .
21
 
 
 
 
 
22
  # Install dependencies using uv, then export and install with pip to system
23
  RUN uv sync --frozen --no-dev && \
24
  uv pip install -e . --system
25
 
26
- # Copy your source code
27
- COPY . .
28
 
29
  EXPOSE 7860
30
 
31
- CMD ["streamlit", "run", "ui/app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.headless=true"]
 
2
 
3
  ENV PYTHONUNBUFFERED=1 \
4
  DEBIAN_FRONTEND=noninteractive \
5
+ PYTHONPATH=/app:/app/common:$PYTHONPATH
6
 
7
  WORKDIR /app
8
 
 
19
  COPY pyproject.toml .
20
  COPY uv.lock .
21
 
22
+ # Copy required folders
23
+ COPY common/ ./common/
24
+ COPY src/deep-research/ ./src/deep-research/
25
+
26
  # Install dependencies using uv, then export and install with pip to system
27
  RUN uv sync --frozen --no-dev && \
28
  uv pip install -e . --system
29
 
30
+ # Copy entry point
31
+ COPY run.py .
32
 
33
  EXPOSE 7860
34
 
35
+ CMD ["python", "run.py", "deep-research", "--port", "7860"]
common/__init__.py ADDED
File without changes
common/aagents/__init__.py ADDED
File without changes
common/aagents/google_agent.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Google search agent module for web search and information retrieval."""
2
+ import os
3
+ from agents import Agent, OpenAIChatCompletionsModel
4
+ from dotenv import load_dotenv
5
+ from mcp.tools.google_tools import google_search, google_search_recent
6
+ from mcp.tools.search_tools import duckduckgo_search, fetch_page_content
7
+ from mcp.tools.time_tools import current_datetime
8
+ from openai import AsyncOpenAI
9
+
10
+ # ---------------------------------------------------------
11
+ # Load environment variables
12
+ # ---------------------------------------------------------
13
+ load_dotenv()
14
+
15
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
16
+ google_api_key = os.getenv('GOOGLE_API_KEY')
17
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
18
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
19
+
20
+ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
21
+ groq_api_key = os.getenv('GROQ_API_KEY')
22
+ groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
23
+ groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
24
+
25
+ google_agent = Agent(
26
+ name="GoogleSearchAgent",
27
+ model=gemini_model,
28
+ tools=[current_datetime, google_search, google_search_recent, duckduckgo_search, fetch_page_content],
29
+ instructions="""
30
+ You are a GoogleSearchAgent specialized in finding and retrieving information from the web.
31
+ Your role is to help users find accurate, relevant, and up-to-date information using web search.
32
+
33
+ ## Tool Priority & Usage
34
+
35
+ **PRIMARY TOOLS (Google via Serper.dev API):**
36
+
37
+ 1. 'google_search': General Google search with recent results (last 24 hours by default)
38
+ - Use for most search queries
39
+ - Returns: Title, Link, Snippet
40
+ - Input: { "query": "search terms", "num_results": 3 }
41
+
42
+ 2. 'google_search_recent': Time-filtered Google search
43
+ - Use when user specifies a time range (today, this week, this month, this year)
44
+ - Timeframes: "d" (day), "w" (week), "m" (month), "y" (year)
45
+ - Input: { "query": "search terms", "num_results": 3, "timeframe": "d" }
46
+
47
+ **FALLBACK TOOL (DuckDuckGo Search):**
48
+
49
+ 3. 'duckduckgo_search': Use ONLY when Google tools fail or SERPER_API_KEY is missing
50
+ - Provides similar search functionality
51
+ - Input: { "query": "search terms", "max_results": 5, "search_type": "text", "timelimit": "d" }
52
+
53
+ **CONTENT EXTRACTION:**
54
+
55
+ 4. 'fetch_page_content': Extract full text content from a specific URL
56
+ - Use when user wants detailed information from a specific page
57
+ - Use after search to get complete content for analysis
58
+ - Input: { "url": "https://example.com", "timeout": 3 }
59
+
60
+ **TIME CONTEXT:**
61
+
62
+ 5. 'current_datetime': Get current date/time for context
63
+ - Input: { "format": "natural" }
64
+
65
+ ## Workflow
66
+
67
+ 1. **Understand the Query**: Determine what information the user needs
68
+ - General search → use google_search
69
+ - Time-specific search → use google_search_recent with appropriate timeframe
70
+ - Deep dive into a page → use fetch_page_content after getting the URL
71
+
72
+ 2. **Try Primary Tools First**: Always attempt Google tools (Serper.dev) before fallback
73
+
74
+ 3. **Fallback if Needed**: If Google tools return an error (missing API key, no results),
75
+ automatically use duckduckgo_search
76
+
77
+ 4. **Extract Content if Needed**: If user wants detailed information or summary,
78
+ use fetch_page_content on relevant URLs from search results
79
+
80
+ 5. **Provide Context**: Use current_datetime when temporal context is important
81
+
82
+ ## Search Strategy
83
+
84
+ **For factual queries:**
85
+ - Use google_search or google_search_recent
86
+ - Summarize findings from multiple sources
87
+ - Cite sources with URLs
88
+
89
+ **For recent events/news:**
90
+ - Use google_search_recent with timeframe="d" or "w"
91
+ - Focus on most recent information
92
+ - Include publication dates if available
93
+
94
+ **For in-depth research:**
95
+ - First: Use google_search to find relevant pages
96
+ - Then: Use fetch_page_content to extract full content from top results
97
+ - Synthesize information from multiple sources
98
+
99
+ ## Output Format
100
+
101
+ Structure your response based on the query type:
102
+
103
+ **For Search Results:**
104
+
105
+ **Search Results for "[Query]"** - [Current Date]
106
+
107
+ 1. **[Title]**
108
+ - Source: [URL]
109
+ - Summary: [Snippet or extracted info]
110
+
111
+ 2. **[Next Result]**
112
+ ...
113
+
114
+ **Key Findings:**
115
+ - [Synthesized insight 1]
116
+ - [Synthesized insight 2]
117
+
118
+ **For Content Extraction:**
119
+
120
+ **Analysis of [Page Title]**
121
+
122
+ [Summarized content with key points]
123
+
124
+ Source: [URL]
125
+
126
+ ## Important Rules
127
+
128
+ - Always cite sources with URLs
129
+ - Prioritize recent information when relevant
130
+ - If API key is missing, inform user and use fallback automatically
131
+ - Never fabricate information or sources
132
+ - Synthesize information from multiple sources when possible
133
+ - Be transparent about limitations (e.g., "Based on search results from...")
134
+ - Use fetch_page_content sparingly (only when deep content is needed)
135
+ - Respect timeouts and handle errors gracefully
136
+ """,
137
+ )
138
+
139
+ __all__ = ["google_agent", "google_search", "google_search_recent", "duckduckgo_search", "fetch_page_content", "current_datetime"]
common/aagents/healthcare_agent.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Healthcare RAG Agent - Combines RAG retrieval with web search for comprehensive medical information."""
2
+ import os
3
+ from agents import Agent, OpenAIChatCompletionsModel
4
+ from dotenv import load_dotenv
5
+ from openai import AsyncOpenAI
6
+
7
+ # Import tools
8
+ from mcp.tools.rag_tool import rag_search, UserContext
9
+ from mcp.tools.search_tools import duckduckgo_search
10
+ from mcp.tools.time_tools import current_datetime
11
+
12
+
13
+ # ---------------------------------------------------------
14
+ # Load environment variables
15
+ # ---------------------------------------------------------
16
+ load_dotenv()
17
+
18
+ # ---------------------------------------------------------
19
+ # Model Configuration
20
+ # ---------------------------------------------------------
21
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
22
+ google_api_key = os.getenv('GOOGLE_API_KEY')
23
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
24
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
25
+
26
+ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
27
+ groq_api_key = os.getenv('GROQ_API_KEY')
28
+ groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
29
+ groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
30
+
31
+ # ---------------------------------------------------------
32
+ # Healthcare RAG Agent
33
+ # ---------------------------------------------------------
34
+ healthcare_agent = Agent[UserContext](
35
+ name="HealthcareRAGAgent",
36
+ model=gemini_model,
37
+ tools=[rag_search, duckduckgo_search],
38
+ instructions="""
39
+ You are a healthcare information retrieval agent. You retrieve information from tools and synthesize it into well-formatted markdown responses.
40
+
41
+ ## CRITICAL RULES
42
+
43
+ 1. **NEVER use your pre-trained knowledge** - Only use tool results
44
+ 2. **ALWAYS call rag_search first** for every question
45
+ 3. **Evaluate RAG results carefully** - if content is useless (just references, acknowledgments, page numbers), call duckduckgo_search
46
+ 4. **If rag_search returns "No relevant information", MUST call duckduckgo_search**
47
+ 5. **Synthesize tool results into clear, well-structured markdown**
48
+ 6. **If both tools fail, say "I don't have information on this topic"**
49
+
50
+ ## Workflow (MANDATORY)
51
+
52
+ For EVERY question:
53
+
54
+ Step 1: Call `rag_search(query="user question")`
55
+
56
+ Step 2: Evaluate the result:
57
+ - Returns "No relevant information"? → MUST call duckduckgo_search (go to Step 3)
58
+ - Returns content BUT it's NOT useful (just references, acknowledgments, page numbers, file names, credits)? → MUST call duckduckgo_search (go to Step 3)
59
+ - Returns useful information (definitions, explanations, medical details)? → Synthesize and format (go to Step 4)
60
+
61
+ Step 3: Call `duckduckgo_search(params={"query": "user question", "max_results": 3})`
62
+
63
+ Step 4: Synthesize and format response using markdown
64
+
65
+ ## Response Format (Markdown)
66
+
67
+ ## [Topic Name]
68
+
69
+ [Brief introduction/definition]
70
+
71
+ ### Key Points
72
+ - **Point 1**: Description
73
+ - **Point 2**: Description
74
+
75
+ ### Detailed Information
76
+
77
+ [Organized paragraphs with medical details]
78
+
79
+ ---
80
+
81
+ **Source:** Knowledge Base / Web Search
82
+
83
+ **Disclaimer:** This information is for educational purposes only. Always consult a qualified healthcare provider for medical advice.
84
+
85
+ ## Critical Reminders
86
+
87
+ 🚨 You MUST:
88
+ - Call rag_search first, evaluate if content is useful
89
+ - If RAG content is useless (references/credits), call duckduckgo_search
90
+ - Use proper markdown formatting
91
+ - Cite the source
92
+
93
+ 🚨 You MUST NOT:
94
+ - Use your pre-trained knowledge
95
+ - Skip evaluating RAG content quality
96
+ - Accept useless RAG results without calling web search
97
+ """,
98
+ )
99
+
100
+ __all__ = ["healthcare_agent"]
common/aagents/news_agent.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """News agent module for fetching and analyzing news articles."""
2
+ import os
3
+ from agents import Agent, OpenAIChatCompletionsModel
4
+ from dotenv import load_dotenv
5
+ from mcp.tools.news_tools import get_top_headlines, search_news, get_news_by_category
6
+ from mcp.tools.search_tools import duckduckgo_search
7
+ from mcp.tools.time_tools import current_datetime
8
+ from openai import AsyncOpenAI
9
+
10
+ # ---------------------------------------------------------
11
+ # Load environment variables
12
+ # ---------------------------------------------------------
13
+ load_dotenv()
14
+
15
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
16
+ google_api_key = os.getenv('GOOGLE_API_KEY')
17
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
18
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
19
+
20
+ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
21
+ groq_api_key = os.getenv('GROQ_API_KEY')
22
+ groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
23
+ groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
24
+
25
+ news_agent = Agent(
26
+ name="NewsAgent",
27
+ model=gemini_model,
28
+ tools=[current_datetime, get_top_headlines, search_news, get_news_by_category, duckduckgo_search],
29
+ instructions="""
30
+ You are a NewsAgent specialized in fetching and analyzing recent news articles and headlines.
31
+ Your role is to provide users with up-to-date, relevant news information from reliable sources.
32
+
33
+ ## Tool Priority & Usage
34
+
35
+ **PRIMARY TOOLS (NewsAPI.org):**
36
+ 1. 'get_top_headlines': Fetch the latest top headlines for a specific country
37
+ - Use when user asks for general news, breaking news, or top stories
38
+ - Input: { "country": "us", "num_results": 5 }
39
+
40
+ 2. 'search_news': Search for news articles about a specific topic
41
+ - Use when user asks about a specific subject, company, person, or event
42
+ - Input: { "query": "topic name", "num_results": 5, "days_back": 7 }
43
+
44
+ 3. 'get_news_by_category': Fetch headlines by category
45
+ - Use when user asks for category-specific news (business, tech, sports, etc.)
46
+ - Categories: "business", "entertainment", "general", "health", "science", "sports", "technology"
47
+ - Input: { "category": "business", "country": "us", "num_results": 5 }
48
+
49
+ **FALLBACK TOOL (DuckDuckGo Search):**
50
+ 4. 'duckduckgo_search': Use ONLY when NewsAPI tools fail or API key is missing
51
+ - Set search_type to "news" for news-specific results
52
+ - Input: { "query": "topic", "max_results": 5, "search_type": "news", "timelimit": "d" }
53
+
54
+ **TIME CONTEXT:**
55
+ 5. 'current_datetime': Use to provide current date/time context in your responses
56
+ - Input: { "format": "natural" }
57
+
58
+ ## Workflow
59
+
60
+ 1. **Determine Intent**: Understand what type of news the user wants
61
+ - General headlines → use get_top_headlines
62
+ - Topic-specific → use search_news
63
+ - Category-specific → use get_news_by_category
64
+
65
+ 2. **Try Primary Tools First**: Always attempt NewsAPI tools before fallback
66
+
67
+ 3. **Fallback if Needed**: If NewsAPI returns an error (missing API key, no results),
68
+ use duckduckgo_search with search_type="news"
69
+
70
+ 4. **Include Time Context**: Use current_datetime to provide temporal context
71
+
72
+ 5. **Format Response**: Present news in a clear, organized format with:
73
+ - Headlines/titles
74
+ - Sources
75
+ - Publication dates
76
+ - Brief summaries
77
+ - URLs for full articles
78
+
79
+ ## Output Format
80
+
81
+ Structure your response as:
82
+
83
+ **[News Category/Topic] - [Current Date]**
84
+
85
+ 1. **[Headline]**
86
+ - Source: [News Source]
87
+ - Published: [Date/Time]
88
+ - Summary: [Brief description]
89
+ - Read more: [URL]
90
+
91
+ 2. **[Next Headline]**
92
+ ...
93
+
94
+ ## Important Rules
95
+
96
+ - Always cite sources and include publication dates
97
+ - Prioritize recent news (within last 7 days unless specified otherwise)
98
+ - If API key is missing, inform the user and use the fallback tool
99
+ - Never fabricate news or sources
100
+ - Present news objectively without bias
101
+ - Include URLs so users can read full articles
102
+ - Use current_datetime to ensure temporal accuracy
103
+ """,
104
+ )
105
+
106
+ __all__ = ["news_agent", "get_top_headlines", "search_news", "get_news_by_category", "duckduckgo_search", "current_datetime"]
common/aagents/weather_agent.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Web search agent module for internet queries."""
2
+ import os
3
+ from agents import Agent
4
+ from dotenv import load_dotenv
5
+ from pydantic import BaseModel, Field
6
+ from mcp.tools.weather_tools import get_weather_forecast, search_weather_fallback_ddgs, search_weather_fallback_bs
7
+ from mcp.tools.time_tools import current_datetime
8
+ from agents import Agent, OpenAIChatCompletionsModel
9
+ from openai import AsyncOpenAI
10
+
11
+ # ---------------------------------------------------------
12
+ # Load environment variables
13
+ # ---------------------------------------------------------
14
+ load_dotenv()
15
+
16
+ ################################
17
+ # Learning: gemini models struggles to construct the output_type when it's a Pydantic model.
18
+ # So we use list[dict] as output_type instead of list[searchResult].
19
+ # Then in the calling code, we can convert dicts back to searchResult models if needed.
20
+ ################################
21
+
22
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
23
+ google_api_key = os.getenv('GOOGLE_API_KEY')
24
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
25
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-flash-latest", openai_client=gemini_client)
26
+
27
+ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
28
+ groq_api_key = os.getenv('GROQ_API_KEY')
29
+ groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
30
+ groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
31
+
32
+ weather_agent = Agent(
33
+ name="WeatherAgent",
34
+ model=gemini_model, #"gpt-4o-mini",
35
+ # description="An agent that can perform web searches using DuckDuckGo.",
36
+ tools=[current_datetime, get_weather_forecast, search_weather_fallback_ddgs, search_weather_fallback_bs],
37
+ instructions="""
38
+ You are a Weather Forecast agent who forecasts weather information ONLY.
39
+ You can use the 'current_datetime' tool to determine the current date as reference for the weather forecast.
40
+ When given a query, you use the 'get_weather_forecast' tool to retrieve weather data.
41
+ If the API key is missing or the API fails to get the forecast, you use the 'search_weather_fallback_ddgs' or 'search_weather_fallback_bs' as fallback tools to perform a web search for weather information.
42
+ Tool: get_weather_forecast Input:
43
+ A JSON object with the following structure:
44
+ { "city": "The city name to get the weather for.",
45
+ "date": "Optional date in YYYY-MM-DD format to get the forecast for a specific day. If not provided, return the current weather."
46
+ }
47
+
48
+ Output the weather information MUST be in a JSON well-formatted form as below:
49
+ {
50
+ "city": "City name",
51
+ "forecasts": [
52
+ {
53
+ "date": "Date of the forecast in YYYY-MM-DD format",
54
+ "weather": {
55
+
56
+ "description": "Weather description",
57
+ "temperature": "Temperature in Fahrenheit. Report both the high and low temperatures.",
58
+ "humidity": "Humidity percentage",
59
+ "wind_speed": "Wind speed in Miles per Hour (MPH)"
60
+ }
61
+ }.
62
+ ]
63
+ """,
64
+ # output_type=AgentOutputSchema(list[searchResult], strict_json_schema=False),
65
+ # output_type=list[dict], # safer than list[searchResult],
66
+ # output_type=list[searchResult],
67
+ )
68
+
69
+ __all__ = ["weather_agent", "get_weather_forecast", "search_weather_fallback_ddgs", "search_weather_fallback_bs"]
common/aagents/web_agent.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Web search agent module for internet queries."""
2
+ import os
3
+ from agents import AgentOutputSchema, function_tool, Agent
4
+ from dotenv import load_dotenv
5
+ from pydantic import BaseModel, Field
6
+ from mcp.tools.search_tools import duckduckgo_search, searchQuery, searchResult
7
+ from agents import Agent, OpenAIChatCompletionsModel
8
+ from openai import AsyncOpenAI
9
+
10
+ # ---------------------------------------------------------
11
+ # Load environment variables
12
+ # ---------------------------------------------------------
13
+ load_dotenv()
14
+
15
+ ################################
16
+ # Learning: gemini models struggles to construct the output_type when it's a Pydantic model.
17
+ # So we use list[dict] as output_type instead of list[searchResult].
18
+ # Then in the calling code, we can convert dicts back to searchResult models if needed.
19
+ ################################
20
+
21
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
22
+ google_api_key = os.getenv('GOOGLE_API_KEY')
23
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
24
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
25
+
26
+ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
27
+ groq_api_key = os.getenv('GROQ_API_KEY')
28
+ groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
29
+ groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
30
+
31
+ web_agent = Agent(
32
+ name="WebAgent",
33
+ model="gpt-4o-mini",
34
+ # description="An agent that can perform web searches using DuckDuckGo.",
35
+ tools=[duckduckgo_search],
36
+ instructions="""
37
+ You are a WebAgent that can perform web searches to find information on the internet.
38
+ When given a query, use the 'duckduckgo_search' tool to retrieve relevant search results.
39
+ Tool: duckduckgo_search Input:
40
+ A JSON object with the following structure:
41
+ { "query": "The search query string.",
42
+ "max_results": "The maximum number of search results to return (default is 5).",
43
+ "search_type": "The type of search to perform. Options: 'text' (default) or 'news'. Use 'news' to get publication dates.",
44
+ "timelimit": "Time limit for search results. Options: 'd' (day), 'w' (week), 'm' (month), 'y' (year).",
45
+ "region": "Region for search results (e.g., 'us-en', 'uk-en'). Default is 'wt-wt' (world)."
46
+ }
47
+ """,
48
+ # output_type=AgentOutputSchema(list[searchResult], strict_json_schema=False),
49
+ # output_type=list[dict], # safer than list[searchResult],
50
+ output_type=list[searchResult],
51
+ )
52
+
53
+ __all__ = ["web_agent", "duckduckgo_search", "searchQuery", "searchResult"]
common/aagents/web_research_agent.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Web search agent module for internet queries."""
2
+ import os
3
+ from agents import AgentOutputSchema, function_tool, Agent
4
+ from dotenv import load_dotenv
5
+ from pydantic import BaseModel, Field
6
+ from mcp.tools.search_tools import duckduckgo_search, searchQuery, searchResult, fetch_page_content
7
+ from agents import Agent, OpenAIChatCompletionsModel
8
+ from openai import AsyncOpenAI
9
+
10
+ # ---------------------------------------------------------
11
+ # Load environment variables
12
+ # ---------------------------------------------------------
13
+ load_dotenv()
14
+
15
+ ################################
16
+ # Learning: gemini models struggles to construct the output_type when it's a Pydantic model.
17
+ # So we use list[dict] as output_type instead of list[searchResult].
18
+ # Then in the calling code, we can convert dicts back to searchResult models if needed.
19
+ ################################
20
+
21
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
22
+ google_api_key = os.getenv('GOOGLE_API_KEY')
23
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
24
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
25
+
26
+ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
27
+ groq_api_key = os.getenv('GROQ_API_KEY')
28
+ groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
29
+ groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
30
+
31
+ web_research_agent = Agent(
32
+ name="WebResearchAgent",
33
+ model="gpt-4o-mini",
34
+ # description="An agent that can perform web searches using DuckDuckGo.",
35
+ tools=[duckduckgo_search, fetch_page_content],
36
+ instructions="""
37
+ You are WebResearchAgent — an advanced internet research assistant with two core abilities:
38
+
39
+ 1) Use the tool `duckduckgo_search` to discover relevant webpages for the user’s query.
40
+ 2) Use the tool `fetch_page_content` to retrieve full text content from any webpage returned by the search tool.
41
+
42
+ ===========================
43
+ AGENT RESPONSIBILITIES
44
+ ===========================
45
+
46
+ • Always begin by invoking `duckduckgo_search` to gather an initial set of webpages relevant to the user's question.
47
+
48
+ • After receiving the search results, you MUST fetch the full content for *all result URLs* by invoking
49
+ `fetch_page_content` once per URL.
50
+
51
+ • These fetch calls should be made **in parallel**:
52
+ - Do NOT wait for one fetch call to finish before issuing the next.
53
+ - Issue all fetch calls immediately after you receive the search results.
54
+
55
+ • You MUST NOT wait more than 3 seconds for any individual page to respond.
56
+ If content is missing or a fetch fails, continue with what you have.
57
+
58
+ ===========================
59
+ ANALYSIS & FINAL ANSWER
60
+ ===========================
61
+
62
+ • After search and fetch operations complete, analyze:
63
+ – the snippets from the search results
64
+ – the full content from `fetch_page_content` (for pages that responded)
65
+
66
+ • Synthesize the collected information and provide a clear, factual, concise answer.
67
+
68
+ • Your final output MUST be a structured, easy-to-read Markdown summary.
69
+
70
+ ===========================
71
+ IMPORTANT RULES
72
+ ===========================
73
+
74
+ • Never fabricate URLs or content not returned by the tools.
75
+ • Never claim to have visited pages without using `fetch_page_content`.
76
+ • Use the tools exactly as required — search first, fetch after.
77
+ • The final response should answer the user’s query using the combined evidence.
78
+ • MUST provide references to the research.
79
+ """
80
+ ,
81
+ )
82
+
83
+ __all__ = ["web_research_agent", "duckduckgo_search", "fetch_page_content", "searchQuery", "searchResult"]
common/aagents/yf_agent.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Yahoo Finance agent module for financial analysis and market research."""
2
+ import os
3
+ from agents import Agent, OpenAIChatCompletionsModel
4
+ from dotenv import load_dotenv
5
+ from mcp.tools.yf_tools import get_summary, get_market_sentiment, get_history
6
+ from mcp.tools.time_tools import current_datetime
7
+ from openai import AsyncOpenAI
8
+
9
+ # ---------------------------------------------------------
10
+ # Load environment variables
11
+ # ---------------------------------------------------------
12
+ load_dotenv()
13
+
14
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
15
+ google_api_key = os.getenv('GOOGLE_API_KEY')
16
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
17
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
18
+
19
+ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
20
+ groq_api_key = os.getenv('GROQ_API_KEY')
21
+ groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
22
+ groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
23
+
24
+ yf_agent = Agent(
25
+ name="YahooFinanceAgent",
26
+ model=gemini_model,
27
+ tools=[current_datetime, get_summary, get_market_sentiment, get_history],
28
+ instructions="""
29
+ You are a specialized **Financial Analysis Agent** 💰, expert in market research, financial data retrieval, and market analysis.
30
+ Your primary role is to provide *actionable*, *data-driven*, and *concise* financial reports based on the available tools.
31
+
32
+ ## Core Directives & Priorities
33
+
34
+ 1. **Time Sensitivity:** Always use the 'current_datetime' tool to ensure all analysis is contextually relevant to the current date and time.
35
+ Financial data is extremely time-sensitive.
36
+
37
+ 2. **Financial Data Integrity:** Use the Yahoo Finance tools for specific stock/index data:
38
+ - 'get_summary': Get latest summary information and intraday price data for a ticker
39
+ - 'get_market_sentiment': Analyze recent price changes and provide market sentiment (Bullish/Bearish/Neutral)
40
+ - 'get_history': Fetch historical price data for a given ticker
41
+
42
+ Be precise about the date range and data source.
43
+
44
+ 3. **Synthesis and Analysis:** Do not just list data. You must **synthesize** financial data (prices, volume, sentiment)
45
+ to provide a complete analytical perspective (e.g., "Stock X is up 5% today driven by strong market momentum").
46
+
47
+ 4. **Professional Clarity:** Present information in a clear, professional, and structured format.
48
+ Use numerical data and financial terminology correctly.
49
+
50
+ 5. **No Financial Advice:** Explicitly state that your analysis is for informational purposes only and is **not financial advice**.
51
+
52
+ 6. **Tool Mandatory:** For any request involving a stock, index, or current market conditions, you **must** use
53
+ the appropriate tool(s) to verify data. **Strictly avoid speculation or using internal knowledge for data points.**
54
+
55
+ ## Tool Usage Examples
56
+
57
+ Tool: current_datetime
58
+ Input: { "format": "natural" }
59
+
60
+ Tool: get_summary
61
+ Input: { "symbol": "AAPL", "period": "1d", "interval": "1h" }
62
+
63
+ Tool: get_market_sentiment
64
+ Input: { "symbol": "AAPL", "period": "1mo" }
65
+
66
+ Tool: get_history
67
+ Input: { "symbol": "AAPL", "period": "1mo" }
68
+
69
+ ## Output Format Guidelines
70
+
71
+ * Use **bold** for key financial metrics (e.g., Stock Symbol, Price, Volume).
72
+ * Cite the tools used to obtain the data (e.g., "Data sourced from Yahoo Finance as of [Date]").
73
+ * If a symbol or data point cannot be found, clearly state "Data for [X] is unavailable or invalid."
74
+ * Always include a disclaimer: "This analysis is for informational purposes only and is not financial advice."
75
+ """,
76
+ )
77
+
78
+ __all__ = ["yf_agent", "get_summary", "get_market_sentiment", "get_history", "current_datetime"]
common/mcp/README.md ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MCP Tools Server
2
+
3
+ A Model Context Protocol (MCP) server that exposes all tools from the `tools/` folder via stdio transport.
4
+
5
+ ## Features
6
+
7
+ - **Dynamic Tool Discovery**: Automatically discovers and registers all tools from the tools folder
8
+ - **Stdio Transport**: Compatible with Claude Desktop and other MCP clients
9
+ - **Comprehensive Tool Coverage**: Exposes ~13 tools across 6 categories:
10
+ - Google Search (google_tools)
11
+ - News API (news_tools)
12
+ - DuckDuckGo Search (search_tools)
13
+ - Time Utilities (time_tools)
14
+ - Weather Forecast (weather_tools)
15
+ - Yahoo Finance (yf_tools)
16
+
17
+ ## Installation
18
+
19
+ 1. Install required dependencies:
20
+ ```bash
21
+ pip install mcp requests beautifulsoup4 ddgs yfinance python-dotenv pydantic
22
+ ```
23
+
24
+ 2. Set up environment variables in `.env`:
25
+ ```bash
26
+ # Google Search (Serper.dev)
27
+ SERPER_API_KEY=your_serper_api_key
28
+
29
+ # News API
30
+ NEWS_API_KEY=your_news_api_key
31
+
32
+ # Weather API
33
+ OPENWEATHER_API_KEY=your_openweather_api_key
34
+
35
+ # Google AI (for agents)
36
+ GOOGLE_API_KEY=your_google_api_key
37
+
38
+ # Groq (for agents)
39
+ GROQ_API_KEY=your_groq_api_key
40
+ ```
41
+
42
+ ## Usage
43
+
44
+ ### Running the Server
45
+
46
+ ```bash
47
+ cd common/mcp
48
+ python mcp_server.py
49
+ ```
50
+
51
+ The server will:
52
+ 1. Discover all tools from the `tools/` folder
53
+ 2. Print registered tools to stderr
54
+ 3. Start listening on stdio for MCP protocol messages
55
+
56
+ ### Integrating with Claude Desktop
57
+
58
+ Add to your Claude Desktop config (`claude_desktop_config.json`):
59
+
60
+ ```json
61
+ {
62
+ "mcpServers": {
63
+ "tools-server": {
64
+ "command": "python",
65
+ "args": ["/absolute/path/to/agenticaiprojects/common/mcp/mcp_server.py"],
66
+ "env": {
67
+ "SERPER_API_KEY": "your_key",
68
+ "NEWS_API_KEY": "your_key",
69
+ "OPENWEATHER_API_KEY": "your_key"
70
+ }
71
+ }
72
+ }
73
+ }
74
+ ```
75
+
76
+ ### Available Tools
77
+
78
+ The server exposes the following tools:
79
+
80
+ **Google Search:**
81
+ - `google_tools.google_search` - General Google search
82
+ - `google_tools.google_search_recent` - Time-filtered Google search
83
+
84
+ **News:**
85
+ - `news_tools.get_top_headlines` - Top headlines by country
86
+ - `news_tools.search_news` - Search news by topic
87
+ - `news_tools.get_news_by_category` - News by category
88
+
89
+ **Search & Content:**
90
+ - `search_tools.duckduckgo_search` - DuckDuckGo search
91
+ - `search_tools.fetch_page_content` - Extract page content
92
+
93
+ **Time:**
94
+ - `time_tools.current_datetime` - Get current date/time
95
+
96
+ **Weather:**
97
+ - `weather_tools.get_weather_forecast` - Weather forecast via API
98
+ - `weather_tools.search_weather_fallback_ddgs` - Weather via DuckDuckGo
99
+ - `weather_tools.search_weather_fallback_bs` - Weather via web scraping
100
+
101
+ **Finance:**
102
+ - `yf_tools.get_summary` - Stock summary
103
+ - `yf_tools.get_market_sentiment` - Market sentiment analysis
104
+ - `yf_tools.get_history` - Historical stock data
105
+
106
+ ## Development
107
+
108
+ ### Adding New Tools
109
+
110
+ 1. Create a new file in `tools/` folder (e.g., `my_tools.py`)
111
+ 2. Decorate functions with `@function_tool`
112
+ 3. The server will automatically discover and register them on next restart
113
+
114
+ ### Testing
115
+
116
+ ```bash
117
+ # Test the server
118
+ cd common/mcp
119
+ python mcp_server.py
120
+
121
+ # In another terminal, you can send MCP protocol messages via stdin
122
+ # Or use an MCP client library to test
123
+ ```
124
+
125
+ ## Troubleshooting
126
+
127
+ **Tools not discovered:**
128
+ - Check that functions are decorated with `@function_tool`
129
+ - Verify the module is in the `tools/` folder
130
+ - Check stderr output for registration messages
131
+
132
+ **API errors:**
133
+ - Verify environment variables are set correctly
134
+ - Check API key validity
135
+ - Review tool-specific error messages in stderr
136
+
137
+ ## License
138
+
139
+ Part of the agenticaiprojects repository.
common/mcp/__init__.py ADDED
File without changes
common/mcp/mcp_server.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ MCP Server with stdio transport that exposes all tools from the tools folder.
4
+ """
5
+ import asyncio
6
+ import sys
7
+ import os
8
+ import inspect
9
+ import importlib
10
+ from pathlib import Path
11
+ from typing import Any, Callable
12
+
13
+ # Add parent directory to path for imports
14
+ sys.path.insert(0, str(Path(__file__).parent.parent))
15
+
16
+ from mcp.server import Server
17
+ from mcp.server.stdio import stdio_server
18
+ from mcp.types import Tool, TextContent
19
+
20
+ # Initialize MCP server
21
+ app = Server("tools-server")
22
+
23
+ # Dictionary to store all discovered tools
24
+ TOOLS_REGISTRY: dict[str, Callable] = {}
25
+
26
+ def discover_tools():
27
+ """
28
+ Dynamically discover all @function_tool decorated functions from the tools folder.
29
+ """
30
+ tools_dir = Path(__file__).parent / "tools"
31
+ tool_modules = [
32
+ "google_tools",
33
+ "news_tools",
34
+ "search_tools",
35
+ "time_tools",
36
+ "weather_tools",
37
+ "yf_tools"
38
+ ]
39
+
40
+ print(f"[MCP Server] Discovering tools from: {tools_dir}", file=sys.stderr)
41
+
42
+ for module_name in tool_modules:
43
+ try:
44
+ # Import the module
45
+ module = importlib.import_module(f"mcp.tools.{module_name}")
46
+
47
+ # Find all functions in the module
48
+ for name, obj in inspect.getmembers(module, inspect.isfunction):
49
+ # Check if it has the function_tool decorator
50
+ # The @function_tool decorator typically adds metadata to the function
51
+ if hasattr(obj, '__wrapped__') or name.startswith('_'):
52
+ continue
53
+
54
+ # Check if it's a tool by looking for common patterns
55
+ if callable(obj) and not name.startswith('_'):
56
+ # Register the tool
57
+ tool_name = f"{module_name}.{name}"
58
+ TOOLS_REGISTRY[tool_name] = obj
59
+ print(f"[MCP Server] Registered tool: {tool_name}", file=sys.stderr)
60
+
61
+ except Exception as e:
62
+ print(f"[MCP Server] Error loading module {module_name}: {e}", file=sys.stderr)
63
+
64
+ print(f"[MCP Server] Total tools registered: {len(TOOLS_REGISTRY)}", file=sys.stderr)
65
+
66
+
67
+ @app.list_tools()
68
+ async def list_tools() -> list[Tool]:
69
+ """
70
+ List all available tools.
71
+ """
72
+ tools = []
73
+
74
+ for tool_name, tool_func in TOOLS_REGISTRY.items():
75
+ # Extract function signature and docstring
76
+ sig = inspect.signature(tool_func)
77
+ doc = inspect.getdoc(tool_func) or "No description available"
78
+
79
+ # Build input schema from function parameters
80
+ properties = {}
81
+ required = []
82
+
83
+ for param_name, param in sig.parameters.items():
84
+ param_type = "string" # Default type
85
+ param_desc = ""
86
+
87
+ # Try to infer type from annotation
88
+ if param.annotation != inspect.Parameter.empty:
89
+ annotation = param.annotation
90
+ if annotation == int:
91
+ param_type = "integer"
92
+ elif annotation == bool:
93
+ param_type = "boolean"
94
+ elif annotation == float:
95
+ param_type = "number"
96
+
97
+ properties[param_name] = {
98
+ "type": param_type,
99
+ "description": param_desc or f"Parameter: {param_name}"
100
+ }
101
+
102
+ # Check if parameter is required (no default value)
103
+ if param.default == inspect.Parameter.empty:
104
+ required.append(param_name)
105
+
106
+ # Create tool definition
107
+ tool = Tool(
108
+ name=tool_name,
109
+ description=doc.split('\n')[0][:200], # First line, max 200 chars
110
+ inputSchema={
111
+ "type": "object",
112
+ "properties": properties,
113
+ "required": required
114
+ }
115
+ )
116
+ tools.append(tool)
117
+
118
+ return tools
119
+
120
+
121
+ @app.call_tool()
122
+ async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
123
+ """
124
+ Execute a tool with the provided arguments.
125
+ """
126
+ print(f"[MCP Server] Calling tool: {name} with args: {arguments}", file=sys.stderr)
127
+
128
+ if name not in TOOLS_REGISTRY:
129
+ raise ValueError(f"Tool not found: {name}")
130
+
131
+ tool_func = TOOLS_REGISTRY[name]
132
+
133
+ try:
134
+ # Call the tool function
135
+ if inspect.iscoroutinefunction(tool_func):
136
+ result = await tool_func(**arguments)
137
+ else:
138
+ result = tool_func(**arguments)
139
+
140
+ # Convert result to string if needed
141
+ if not isinstance(result, str):
142
+ result = str(result)
143
+
144
+ return [TextContent(type="text", text=result)]
145
+
146
+ except Exception as e:
147
+ error_msg = f"Error executing tool {name}: {str(e)}"
148
+ print(f"[MCP Server] {error_msg}", file=sys.stderr)
149
+ return [TextContent(type="text", text=error_msg)]
150
+
151
+
152
+ async def main():
153
+ """
154
+ Main entry point for the MCP server.
155
+ """
156
+ # Discover all tools before starting the server
157
+ discover_tools()
158
+
159
+ print(f"[MCP Server] Starting MCP server with {len(TOOLS_REGISTRY)} tools", file=sys.stderr)
160
+
161
+ # Run the server with stdio transport
162
+ async with stdio_server() as (read_stream, write_stream):
163
+ await app.run(
164
+ read_stream,
165
+ write_stream,
166
+ app.create_initialization_options()
167
+ )
168
+
169
+
170
+ if __name__ == "__main__":
171
+ asyncio.run(main())
common/mcp/tools/__init__.py ADDED
File without changes
common/mcp/tools/google_tools.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+ from agents import function_tool
5
+ from typing import Optional
6
+
7
+ # ---------------------------------------------------------
8
+ # Load environment variables
9
+ # ---------------------------------------------------------
10
+ load_dotenv()
11
+
12
+ # ============================================================
13
+ # 🔹 GOOGLE SEARCH TOOLSET (Serper.dev API)
14
+ # ============================================================
15
+
16
+ @function_tool
17
+ def google_search(query: str, num_results: int = 3) -> str:
18
+ """
19
+ Perform a general Google search using Serper.dev API.
20
+
21
+ Parameters:
22
+ -----------
23
+ query : str
24
+ The search query string, e.g., "latest Tesla stock news".
25
+ num_results : int, optional (default=3)
26
+ Maximum number of search results to return.
27
+
28
+ Returns:
29
+ --------
30
+ str
31
+ Formatted string of top search results, each including:
32
+ - Title of the page
33
+ - URL link
34
+ - Snippet / description
35
+ If no results are found or API key is missing, returns an error message.
36
+
37
+ Example:
38
+ --------
39
+ google_search("AI in finance", num_results=2)
40
+
41
+ Output:
42
+ Title: How AI is Transforming Finance
43
+ Link: https://example.com/ai-finance
44
+ Snippet: AI is increasingly used for trading, risk management...
45
+
46
+ Title: AI Applications in Banking
47
+ Link: https://example.com/ai-banking
48
+ Snippet: Banks are leveraging AI for customer service, fraud detection...
49
+ """
50
+ print(f"[DEBUG] google_search called with query='{query}', num_results={num_results}")
51
+
52
+ try:
53
+ api_key = os.getenv("SERPER_API_KEY")
54
+ if not api_key:
55
+ return "Error: SERPER_API_KEY missing in environment variables."
56
+
57
+ url = "https://google.serper.dev/search"
58
+ headers = {"X-API-KEY": api_key, "Content-Type": "application/json"}
59
+ payload = {"q": query, "num": num_results, "tbs": "qdr:d"} # results from last 24h
60
+
61
+ response = requests.post(url, headers=headers, json=payload, timeout=10)
62
+ response.raise_for_status()
63
+ data = response.json()
64
+
65
+ if "organic" not in data or not data["organic"]:
66
+ return f"No results found for query: '{query}'"
67
+
68
+ formatted_results = [
69
+ f"Title: {item.get('title')}\n"
70
+ f"Link: {item.get('link')}\n"
71
+ f"Snippet: {item.get('snippet', '')}\n"
72
+ for item in data["organic"][:num_results]
73
+ ]
74
+ return "\n".join(formatted_results)
75
+
76
+ except requests.exceptions.RequestException as e:
77
+ print(f"[DEBUG] Network error during Google search: {e}")
78
+ return f"Network error during Google search: {e}"
79
+ except Exception as e:
80
+ print(f"[DEBUG] Error performing Google search: {e}")
81
+ return f"Error performing Google search: {e}"
82
+
83
+
84
+ @function_tool
85
+ def google_search_recent(query: str, num_results: int = 3, timeframe: str = "d") -> str:
86
+ """
87
+ Perform a Google search with time-based filtering using Serper.dev API.
88
+
89
+ Parameters:
90
+ -----------
91
+ query : str
92
+ The search query string.
93
+ num_results : int, optional (default=3)
94
+ Maximum number of search results to return.
95
+ timeframe : str, optional (default="d")
96
+ Time range for results:
97
+ - "d" = past day
98
+ - "w" = past week
99
+ - "m" = past month
100
+ - "y" = past year
101
+
102
+ Returns:
103
+ --------
104
+ str
105
+ Formatted string of recent search results.
106
+ """
107
+ print(f"[DEBUG] google_search_recent called with query='{query}', timeframe={timeframe}")
108
+
109
+ try:
110
+ api_key = os.getenv("SERPER_API_KEY")
111
+ if not api_key:
112
+ return "Error: SERPER_API_KEY missing in environment variables."
113
+
114
+ url = "https://google.serper.dev/search"
115
+ headers = {"X-API-KEY": api_key, "Content-Type": "application/json"}
116
+ payload = {"q": query, "num": num_results, "tbs": f"qdr:{timeframe}"}
117
+
118
+ response = requests.post(url, headers=headers, json=payload, timeout=10)
119
+ response.raise_for_status()
120
+ data = response.json()
121
+
122
+ if "organic" not in data or not data["organic"]:
123
+ return f"No recent results found for query: '{query}'"
124
+
125
+ formatted_results = [
126
+ f"Title: {item.get('title')}\n"
127
+ f"Link: {item.get('link')}\n"
128
+ f"Snippet: {item.get('snippet', '')}\n"
129
+ for item in data["organic"][:num_results]
130
+ ]
131
+
132
+ return f"Recent results ({timeframe}):\n\n" + "\n".join(formatted_results)
133
+
134
+ except requests.exceptions.RequestException as e:
135
+ print(f"[DEBUG] Network error: {e}")
136
+ return f"Network error during Google search: {e}"
137
+ except Exception as e:
138
+ print(f"[DEBUG] Error: {e}")
139
+ return f"Error performing Google search: {e}"
common/mcp/tools/news_tools.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+ from agents import function_tool
5
+ from typing import Optional
6
+ import datetime
7
+
8
+ # ---------------------------------------------------------
9
+ # Load environment variables
10
+ # ---------------------------------------------------------
11
+ load_dotenv()
12
+
13
+ # ============================================================
14
+ # 🔹 NEWS TOOLSET (NewsAPI.org)
15
+ # ============================================================
16
+
17
+ @function_tool
18
+ def get_top_headlines(country: str = "us", num_results: int = 5) -> str:
19
+ """
20
+ Fetch the latest top headlines for a country using NewsAPI.org.
21
+
22
+ Parameters:
23
+ -----------
24
+ country : str, optional (default="us")
25
+ Two-letter country code (e.g., "us", "gb", "in").
26
+ num_results : int, optional (default=5)
27
+ Number of articles to fetch.
28
+
29
+ Returns:
30
+ --------
31
+ str
32
+ Formatted headlines with title, source, published date, and URL.
33
+ If API key is missing or no results found, returns an error message.
34
+ """
35
+ print(f"[DEBUG] get_top_headlines called for country={country}, num_results={num_results}")
36
+
37
+ try:
38
+ api_key = os.getenv("NEWS_API_KEY")
39
+ if not api_key:
40
+ return "Error: NEWS_API_KEY missing in environment variables."
41
+
42
+ url = "https://newsapi.org/v2/top-headlines"
43
+ params = {
44
+ "country": country,
45
+ "pageSize": num_results,
46
+ "apiKey": api_key
47
+ }
48
+
49
+ response = requests.get(url, params=params, timeout=10)
50
+ response.raise_for_status()
51
+ data = response.json()
52
+
53
+ if not data.get("articles"):
54
+ return f"No top headlines found for country: {country}"
55
+
56
+ formatted = []
57
+ for article in data["articles"][:num_results]:
58
+ formatted.append(
59
+ f"📰 {article.get('title')}\n"
60
+ f" Source: {article.get('source', {}).get('name')}\n"
61
+ f" Published: {article.get('publishedAt', 'N/A')}\n"
62
+ f" URL: {article.get('url')}\n"
63
+ )
64
+
65
+ return f"Top Headlines ({country.upper()}):\n\n" + "\n".join(formatted)
66
+
67
+ except requests.exceptions.RequestException as e:
68
+ print(f"[DEBUG] Network error: {e}")
69
+ return f"Network error while calling News API: {e}"
70
+ except Exception as e:
71
+ print(f"[DEBUG] Error: {e}")
72
+ return f"Unexpected error fetching news: {e}"
73
+
74
+
75
+ @function_tool
76
+ def search_news(query: str, num_results: int = 5, days_back: int = 7) -> str:
77
+ """
78
+ Search for recent news articles about a specific topic using NewsAPI.org.
79
+
80
+ Parameters:
81
+ -----------
82
+ query : str
83
+ Keyword or topic to search (e.g., "Tesla earnings", "AI healthcare").
84
+ num_results : int, optional (default=5)
85
+ Number of articles to fetch.
86
+ days_back : int, optional (default=7)
87
+ Number of days to look back for articles (1-30).
88
+
89
+ Returns:
90
+ --------
91
+ str
92
+ Formatted news articles with title, source, published date, and URL.
93
+ If API key is missing or no results found, returns an error message.
94
+ """
95
+ print(f"[DEBUG] search_news called with query='{query}', num_results={num_results}, days_back={days_back}")
96
+
97
+ try:
98
+ api_key = os.getenv("NEWS_API_KEY")
99
+ if not api_key:
100
+ return "Error: NEWS_API_KEY missing in environment variables."
101
+
102
+ # Calculate date range
103
+ today = datetime.datetime.utcnow()
104
+ from_date = (today - datetime.timedelta(days=days_back)).strftime('%Y-%m-%dT%H:%M:%SZ')
105
+
106
+ url = "https://newsapi.org/v2/everything"
107
+ params = {
108
+ "q": query,
109
+ "pageSize": num_results,
110
+ "apiKey": api_key,
111
+ "sortBy": "publishedAt",
112
+ "language": "en",
113
+ "from": from_date
114
+ }
115
+
116
+ response = requests.get(url, params=params, timeout=10)
117
+ response.raise_for_status()
118
+ data = response.json()
119
+
120
+ if not data.get("articles"):
121
+ return f"No news found for query: '{query}'"
122
+
123
+ formatted = []
124
+ for article in data["articles"][:num_results]:
125
+ formatted.append(
126
+ f"📰 {article.get('title')}\n"
127
+ f" Source: {article.get('source', {}).get('name')}\n"
128
+ f" Published: {article.get('publishedAt', 'N/A')}\n"
129
+ f" URL: {article.get('url')}\n"
130
+ )
131
+
132
+ return f"News Search Results for '{query}' (last {days_back} days):\n\n" + "\n".join(formatted)
133
+
134
+ except requests.exceptions.RequestException as e:
135
+ print(f"[DEBUG] Network error: {e}")
136
+ return f"Network error while calling News API: {e}"
137
+ except Exception as e:
138
+ print(f"[DEBUG] Error: {e}")
139
+ return f"Unexpected error fetching news: {e}"
140
+
141
+
142
+ @function_tool
143
+ def get_news_by_category(category: str = "business", country: str = "us", num_results: int = 5) -> str:
144
+ """
145
+ Fetch top headlines by category using NewsAPI.org.
146
+
147
+ Parameters:
148
+ -----------
149
+ category : str, optional (default="business")
150
+ News category: "business", "entertainment", "general", "health",
151
+ "science", "sports", "technology".
152
+ country : str, optional (default="us")
153
+ Two-letter country code.
154
+ num_results : int, optional (default=5)
155
+ Number of articles to fetch.
156
+
157
+ Returns:
158
+ --------
159
+ str
160
+ Formatted headlines for the specified category.
161
+ """
162
+ print(f"[DEBUG] get_news_by_category called for category={category}, country={country}")
163
+
164
+ try:
165
+ api_key = os.getenv("NEWS_API_KEY")
166
+ if not api_key:
167
+ return "Error: NEWS_API_KEY missing in environment variables."
168
+
169
+ url = "https://newsapi.org/v2/top-headlines"
170
+ params = {
171
+ "category": category,
172
+ "country": country,
173
+ "pageSize": num_results,
174
+ "apiKey": api_key
175
+ }
176
+
177
+ response = requests.get(url, params=params, timeout=10)
178
+ response.raise_for_status()
179
+ data = response.json()
180
+
181
+ if not data.get("articles"):
182
+ return f"No headlines found for category: {category}"
183
+
184
+ formatted = []
185
+ for article in data["articles"][:num_results]:
186
+ formatted.append(
187
+ f"📰 {article.get('title')}\n"
188
+ f" Source: {article.get('source', {}).get('name')}\n"
189
+ f" Published: {article.get('publishedAt', 'N/A')}\n"
190
+ f" URL: {article.get('url')}\n"
191
+ )
192
+
193
+ return f"Top {category.capitalize()} Headlines ({country.upper()}):\n\n" + "\n".join(formatted)
194
+
195
+ except requests.exceptions.RequestException as e:
196
+ print(f"[DEBUG] Network error: {e}")
197
+ return f"Network error while calling News API: {e}"
198
+ except Exception as e:
199
+ print(f"[DEBUG] Error: {e}")
200
+ return f"Unexpected error fetching news: {e}"
common/mcp/tools/rag_tool.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """RAG Search Tool - Search the local healthcare knowledge base"""
2
+ import os
3
+ from pathlib import Path
4
+ from agents import function_tool, RunContextWrapper
5
+ from dotenv import load_dotenv
6
+ from rag.rag import Retriever
7
+ from dataclasses import dataclass
8
+
9
+
10
+ @dataclass
11
+ class UserContext:
12
+ uid: str
13
+ db_path: str = ""
14
+ file_path: str = ""
15
+ similarity_threshold: float = 0.4 # FAISS L2 distance threshold for RAG relevance
16
+
17
+
18
+ # ---------------------------------------------------------
19
+ # Load environment variables
20
+ # ---------------------------------------------------------
21
+ load_dotenv()
22
+
23
+ # ---------------------------------------------------------
24
+ # Initialize RAG Retriever
25
+ # ---------------------------------------------------------
26
+ # Get the healthcare-rag-chatbot directory path
27
+ # healthcare_dir = str(Path(__file__).parent.parent.parent)
28
+ # retriever = None
29
+
30
+ # ---------------------------------------------------------
31
+ # RAG Search Tool
32
+ # ---------------------------------------------------------
33
+ @function_tool
34
+ def rag_search(wrapper: RunContextWrapper[UserContext], query: str) -> str:
35
+ """
36
+ Search the local healthcare knowledge base for relevant information.
37
+
38
+ Args:
39
+ query: The medical question or topic to search for
40
+
41
+ Returns:
42
+ Relevant information from the healthcare knowledge base
43
+ """
44
+ print(f"[DEBUG] RAG_SEARCH called with query: '{query}'")
45
+
46
+ # Get similarity threshold from user context
47
+ similarity_threshold = wrapper.context.similarity_threshold
48
+ print(f"[DEBUG] RAG_SEARCH: Using similarity threshold: {similarity_threshold}")
49
+
50
+ try:
51
+ # Initialize retriever with user context
52
+ retriever = Retriever(
53
+ db_path=wrapper.context.db_path,
54
+ file_path=wrapper.context.file_path
55
+ )
56
+
57
+ # Get results with similarity scores
58
+ results_with_scores = retriever.retrieve_with_scores(query, k=5) # Increased from 4 to 5
59
+
60
+ if not results_with_scores:
61
+ print("[DEBUG] RAG_SEARCH: No results found in knowledge base")
62
+ return "No relevant information found in the knowledge base."
63
+
64
+ print(f"[DEBUG] RAG_SEARCH: Found {len(results_with_scores)} results")
65
+
66
+ # Check if the best match meets the threshold
67
+ # FAISS returns (document, distance) where lower distance = better match
68
+ best_score = results_with_scores[0][1]
69
+ print(f"[DEBUG] RAG_SEARCH: Best similarity score (distance): {best_score:.4f} (threshold: {similarity_threshold})")
70
+
71
+ if best_score > similarity_threshold:
72
+ print(f"[DEBUG] RAG_SEARCH: Best match score {best_score:.4f} is above threshold {similarity_threshold}")
73
+ print("[DEBUG] RAG_SEARCH: Results not relevant enough, triggering web search fallback")
74
+ return "No relevant information found in the knowledge base."
75
+
76
+ print(f"[DEBUG] RAG_SEARCH: Results are relevant (score: {best_score:.4f} <= {similarity_threshold})")
77
+
78
+ # Log all scores for debugging
79
+ all_scores = [f"{score:.4f}" for _, score in results_with_scores]
80
+ print(f"[DEBUG] RAG_SEARCH: All scores: {', '.join(all_scores)}")
81
+
82
+ # Format results - only include documents that meet the similarity threshold
83
+ formatted_results = []
84
+ for i, (doc, score) in enumerate(results_with_scores[:5], 1): # Top 5 results
85
+ if score <= similarity_threshold:
86
+ content = doc.page_content.strip()
87
+ formatted_results.append(f"Result {i} (score: {score:.4f}):\n{content}\n")
88
+
89
+ if not formatted_results:
90
+ print("[DEBUG] RAG_SEARCH: No results met the similarity threshold")
91
+ print("[DEBUG] RAG_SEARCH: Triggering web search fallback")
92
+ return "No relevant information found in the knowledge base."
93
+
94
+ result_text = "\n".join(formatted_results)
95
+ print(f"[DEBUG] RAG_SEARCH: Returning {len(formatted_results)} results, total length: {len(result_text)} characters")
96
+ print(f"[DEBUG] RAG_SEARCH: First 300 chars: {result_text[:300]}...")
97
+
98
+ return result_text
99
+
100
+ except Exception as e:
101
+ print(f"[DEBUG] RAG_SEARCH: Error occurred - {str(e)}")
102
+ return f"Error retrieving from knowledge base: {str(e)}"
103
+
104
+
105
+
106
+ __all__ = ["rag_search", "retriever"]
common/mcp/tools/search_tools.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ddgs import DDGS
2
+ from agents import function_tool
3
+ from dotenv import load_dotenv
4
+ from pydantic import BaseModel, Field
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
+ from typing import Optional
8
+
9
+ # ---------------------------------------------------------
10
+ # Load environment variables
11
+ # ---------------------------------------------------------
12
+ load_dotenv()
13
+
14
+ # ---------------------- MODELS ---------------------------
15
+ class searchQuery(BaseModel):
16
+ query: str = Field(..., description="The search query string.")
17
+ max_results: int = Field(5, description="The maximum number of search results to return.")
18
+ search_type: str = Field(
19
+ "text",
20
+ description="Search type: 'text' (default) or 'news'. Use 'news' to get publication dates."
21
+ )
22
+ timelimit: str = Field(
23
+ 'd',
24
+ description="Time limit for search results: 'd' (day), 'w' (week), 'm' (month), 'y' (year)."
25
+ )
26
+ region: str = Field("us-en", description="Region for search results (e.g., 'us-en').")
27
+
28
+
29
+ class searchResult(BaseModel):
30
+ title: str
31
+ link: str
32
+ snippet: str
33
+ datetime: Optional[str] = None
34
+
35
+
36
+ # ---------------------- PAGE FETCH TOOL ---------------------------
37
+ @function_tool
38
+ def fetch_page_content(url: str, timeout: int = 3) -> Optional[str]:
39
+ """Fetch and extract text content from a web page."""
40
+ print(f"[DEBUG] fetch_page_content called with: {url} - timeout: {timeout}")
41
+ try:
42
+ headers = {
43
+ 'User-Agent': (
44
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
45
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
46
+ 'Chrome/91.0.4472.124 Safari/537.36'
47
+ )
48
+ }
49
+ response = requests.get(url, headers=headers, timeout=timeout)
50
+ response.raise_for_status()
51
+
52
+ soup = BeautifulSoup(response.content, 'html.parser')
53
+
54
+ # Remove irrelevant elements
55
+ for tag in soup(["script", "style", "nav", "footer", "header"]):
56
+ tag.decompose()
57
+
58
+ # Extract text
59
+ text = soup.get_text(separator='\n', strip=True)
60
+
61
+ # Clean whitespace
62
+ lines = (line.strip() for line in text.splitlines())
63
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
64
+ text = '\n'.join(chunk for chunk in chunks if chunk)
65
+
66
+ return text
67
+ except Exception as e:
68
+ print(f"[WARNING] Failed to fetch content from {url}: {str(e)}")
69
+ return None
70
+
71
+
72
+ # ---------------------- SEARCH TOOL ---------------------------
73
+ @function_tool
74
+ def duckduckgo_search(params: searchQuery) -> list[dict]:
75
+ """Perform a DuckDuckGo search and return only snippets.
76
+ No page content fetched here."""
77
+ print(f"[DEBUG] duckduckgo_search called with: {params}")
78
+
79
+ results = []
80
+ with DDGS() as ddgs:
81
+ if params.search_type == "news":
82
+ search_results = ddgs.news(
83
+ params.query,
84
+ max_results=params.max_results,
85
+ timelimit=params.timelimit,
86
+ region=params.region
87
+ )
88
+ for result in search_results:
89
+ results.append(
90
+ searchResult(
91
+ title=result.get("title", ""),
92
+ link=result.get("url", ""),
93
+ snippet=result.get("body", ""),
94
+ datetime=result.get("date", "")
95
+ ).model_dump()
96
+ )
97
+ else:
98
+ search_results = ddgs.text(
99
+ params.query,
100
+ max_results=params.max_results,
101
+ timelimit=params.timelimit,
102
+ region=params.region
103
+ )
104
+ for result in search_results:
105
+ results.append(
106
+ searchResult(
107
+ title=result.get("title", ""),
108
+ link=result.get("href", ""),
109
+ snippet=result.get("body", "")
110
+ ).model_dump()
111
+ )
112
+
113
+ print(f"[DEBUG] duckduckgo_search returning {len(results)} results")
114
+ return results
115
+
common/mcp/tools/time_tools.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from agents import function_tool
3
+ # from ..common.utility.logger import log_call
4
+
5
+ @function_tool
6
+ # @log_call
7
+ def current_datetime(format: str = "natural") -> str:
8
+ """
9
+ Returns the current date and time as a formatted string.
10
+
11
+ Args:
12
+ format (str): Format style for the datetime. Options:
13
+ - "natural" (default): "Saturday, December 7, 2025 at 3:59 PM"
14
+ - "natural_short": "Dec 7, 2025 at 3:59 PM"
15
+ - "natural_full": "Saturday, December 7, 2025 at 3:59:30 PM CST"
16
+ - Custom strftime format string (e.g., "%Y-%m-%d %H:%M:%S")
17
+
18
+ Returns:
19
+ str: Current date and time in the specified format
20
+ """
21
+ now = datetime.now()
22
+
23
+ # Natural format options
24
+ if format == "natural":
25
+ return now.strftime("%A, %B %d, %Y at %I:%M %p")
26
+ elif format == "natural_short":
27
+ return now.strftime("%b %d, %Y at %I:%M %p")
28
+ elif format == "natural_full":
29
+ return now.strftime("%A, %B %d, %Y at %I:%M:%S %p %Z")
30
+ else:
31
+ # Custom format string
32
+ return now.strftime(format)
common/mcp/tools/weather_tools.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import requests
4
+ import datetime
5
+ from dotenv import load_dotenv
6
+ from typing import Optional
7
+
8
+ from ddgs import DDGS
9
+ from agents import function_tool
10
+
11
+ # ---------------------------------------------------------
12
+ # Load environment variables
13
+ # ---------------------------------------------------------
14
+ load_dotenv()
15
+
16
+ @function_tool
17
+ def get_weather_forecast(city: str, date: Optional[str] = None) -> str:
18
+ """
19
+ PRIMARY TOOL: Fetch weather using OpenWeatherMap API.
20
+ """
21
+ print(f"[DEBUG] Primary API get_weather_forecast called for city={city}")
22
+
23
+ api_key = os.getenv("OPENWEATHER_API_KEY")
24
+ if not api_key:
25
+ return "Error: OPENWEATHER_API_KEY missing. Please use the fallback search tool."
26
+
27
+ url = "https://api.openweathermap.org/data/2.5/forecast"
28
+
29
+ try:
30
+ response = requests.get(
31
+ url,
32
+ params={"q": city, "appid": api_key, "units": "metric"},
33
+ timeout=5
34
+ )
35
+ data = response.json()
36
+ except Exception as e:
37
+ return f"Error calling weather API: {str(e)}"
38
+
39
+ if str(data.get("cod")) != "200":
40
+ return f"Error from API: {data.get('message', 'Unknown error')}"
41
+
42
+ # Build the report string
43
+ report_lines = []
44
+ found_date = False
45
+
46
+ for entry in data.get("list", []):
47
+ dt_txt = entry["dt_txt"].split(" ")[0]
48
+
49
+ if date and dt_txt != date:
50
+ continue
51
+
52
+ found_date = True
53
+ desc = entry['weather'][0]['description'].capitalize()
54
+ temp = entry['main']['temp']
55
+ hum = entry['main']['humidity']
56
+ wind = entry['wind']['speed']
57
+
58
+ report_lines.append(f"{dt_txt}: {desc}, Temp: {temp}°C, Humidity: {hum}%, Wind: {wind} m/s")
59
+
60
+ # Handle "Date not found" case
61
+ if date and not found_date:
62
+ return f"API valid, but date {date} is out of range (5-day limit). Try the search fallback tool."
63
+
64
+ final_report = "\n".join(report_lines)
65
+
66
+ return f"API Forecast for {city}:\n{final_report}"
67
+
68
+ # ---------------------------------------------------------
69
+ # Tool 2: Web Search Fallback (Secondary)
70
+ # ---------------------------------------------------------
71
+
72
+ @function_tool
73
+ def search_weather_fallback_ddgs(city: str, date: Optional[str] = None) -> str:
74
+ """
75
+ SECONDARY TOOL: Search-based fallback that produces an API-like structured forecast.
76
+ """
77
+ print(f"[DEBUG] Fallback API (DDGS) called for city={city}, date={date}")
78
+
79
+ # --- Build Query ---
80
+ try:
81
+ if date:
82
+ try:
83
+ dt_obj = datetime.strptime(date, "%Y-%m-%d")
84
+ natural_date = dt_obj.strftime("%B %d, %Y")
85
+ month_name = dt_obj.strftime("%B")
86
+ except ValueError:
87
+ natural_date = date
88
+ month_name = ""
89
+ else:
90
+ natural_date = datetime.now().strftime("%B %d, %Y")
91
+ month_name = natural_date.split()[0] # Month name
92
+
93
+ query = f"weather {city} {natural_date}"
94
+ print(f"[DEBUG] Search query: {query}")
95
+
96
+ # --- Perform Search ---
97
+ results = list(DDGS().text(query, max_results=3))
98
+ print(f"[DEBUG] Number of search results: {len(results)}")
99
+
100
+ if not results:
101
+ return f"Web Estimated Forecast for {city}:\nNo reliable search data found."
102
+
103
+ # --- Aggregate Text ---
104
+ full_text = " ".join([r.get("body", "") for r in results])
105
+
106
+ # --- Extract Values with Robust Regex ---
107
+ temp_match = re.findall(r'(-?\d+)\s*(?:°|deg|C|F)', full_text, re.I)
108
+ temperature = temp_match[0] if temp_match else "?"
109
+
110
+ humidity_match = re.findall(r'(\d+)\s*%', full_text)
111
+ humidity = humidity_match[0] if humidity_match else "?"
112
+
113
+ wind_match = re.findall(r'(\d+)\s*(?:mph|km/h|m/s)', full_text, re.I)
114
+ wind = wind_match[0] if wind_match else "?"
115
+
116
+ # --- Condition ---
117
+ # Take first word(s) of first title as best guess
118
+ condition_raw = results[0].get("title", "Unknown").split("-")[0].strip()
119
+ condition = condition_raw[0].upper() + condition_raw[1:] if condition_raw else "Unknown"
120
+
121
+ # --- Construct API-like Forecast ---
122
+ forecast = (
123
+ f"Web Estimated Forecast for {city}:\n"
124
+ f"{natural_date}: {condition}, Temp: {temperature}° (approx), "
125
+ f"Humidity: {humidity}%, Wind: {wind}\n"
126
+ )
127
+
128
+ # Optional: add raw snippets for debugging
129
+ # snippet_block = "\nSearch Snippets (Raw):\n" + "\n".join(
130
+ # f"- {r['title']}: {r['body']}" for r in results
131
+ # )
132
+ # return forecast + snippet_block
133
+
134
+ return forecast
135
+
136
+ except Exception as e:
137
+ print(f"[DEBUG] Error in fallback: {e}")
138
+ return f"Error performing web search: {str(e)}"
139
+
140
+
141
+ import requests
142
+ from bs4 import BeautifulSoup
143
+ import re
144
+ from typing import Optional
145
+ from agents import function_tool
146
+ from datetime import datetime
147
+
148
+ @function_tool
149
+ def search_weather_fallback_bs(city: str, date: Optional[str] = None) -> str:
150
+ """
151
+ SECONDARY TOOL: Web-scraping fallback using BeautifulSoup.
152
+ Produces an API-like structured forecast.
153
+ """
154
+ import requests
155
+ from bs4 import BeautifulSoup
156
+ import re
157
+ from datetime import datetime
158
+
159
+ print(f"[DEBUG] Fallback API (BeautifulSoup) called for city={city}, date={date}")
160
+
161
+ try:
162
+ # --- Build Query ---
163
+ if date:
164
+ try:
165
+ dt_obj = datetime.strptime(date, "%Y-%m-%d")
166
+ natural_date = dt_obj.strftime("%B %d, %Y")
167
+ except ValueError:
168
+ natural_date = date
169
+ else:
170
+ natural_date = datetime.now().strftime("%B %d, %Y")
171
+
172
+ query = f"weather {city} {natural_date}"
173
+ print(f"[DEBUG] Search query: {query}")
174
+
175
+ # --- DuckDuckGo Search ---
176
+ search_url = f"https://duckduckgo.com/html/?q={query.replace(' ', '+')}"
177
+ headers = {"User-Agent": "Mozilla/5.0"}
178
+ response = requests.get(search_url, headers=headers, timeout=5)
179
+ if response.status_code != 200:
180
+ return f"Error fetching search results: {response.status_code}"
181
+
182
+ soup = BeautifulSoup(response.text, "html.parser")
183
+ results = []
184
+ for result in soup.select(".result__body"):
185
+ title_tag = result.select_one(".result__title a")
186
+ snippet_tag = result.select_one(".result__snippet")
187
+ if title_tag and snippet_tag:
188
+ results.append({
189
+ "title": title_tag.get_text(strip=True),
190
+ "body": snippet_tag.get_text(strip=True)
191
+ })
192
+
193
+ if not results:
194
+ return f"Web Estimated Forecast for {city}:\nNo reliable search data found."
195
+
196
+ # --- Aggregate Text ---
197
+ full_text = " ".join([r["body"] for r in results])
198
+
199
+ # --- Extract Temperature ---
200
+ temp_matches = re.findall(r'(-?\d{1,2})\s*(?:°|deg|C|F)', full_text, re.I)
201
+ temperature = temp_matches[0] if temp_matches else "?"
202
+
203
+ # --- Extract Humidity ---
204
+ humidity_matches = re.findall(r'(\d{1,3})\s*%', full_text)
205
+ humidity = humidity_matches[0] if humidity_matches else "?"
206
+
207
+ # --- Extract Wind ---
208
+ wind_matches = re.findall(r'(\d{1,3})\s*(?:mph|km/h|m/s)', full_text, re.I)
209
+ wind = wind_matches[0] if wind_matches else "?"
210
+
211
+ # --- Extract Condition ---
212
+ # Look in all results first, fallback to first title
213
+ condition = "Unknown"
214
+ for r in results:
215
+ m = re.search(r'(clear|sunny|cloudy|rain|snow|storm|fog|mist)', r["body"], re.I)
216
+ if m:
217
+ condition = m.group(1).capitalize()
218
+ break
219
+ if condition == "Unknown":
220
+ # Fallback
221
+ condition_raw = results[0]["title"].split("-")[0].strip()
222
+ condition = condition_raw[0].upper() + condition_raw[1:] if condition_raw else "Unknown"
223
+
224
+ # --- Build Forecast ---
225
+ forecast = (
226
+ f"Web Estimated Forecast for {city}:\n"
227
+ f"{natural_date}: {condition}, Temp: {temperature}° (approx), "
228
+ f"Humidity: {humidity}%, Wind: {wind}\n"
229
+ )
230
+
231
+ return forecast
232
+
233
+ except Exception as e:
234
+ print(f"[DEBUG] Error in fallback: {e}")
235
+ return f"Error performing web search: {str(e)}"
common/mcp/tools/yf_tools.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import yfinance as yf
4
+ from dotenv import load_dotenv
5
+ from agents import function_tool
6
+ from datetime import datetime, timedelta
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+
12
+ # ============================================================
13
+ # 🔹 YAHOO FINANCE TOOLSET
14
+ # ============================================================
15
+ @function_tool
16
+ def get_summary(symbol: str, period: str = "1d", interval: str = "1h") -> str:
17
+ """
18
+ Fetch the latest summary information and intraday price data for a given ticker.
19
+ Ensures recent data is retrieved by calculating start/end dates dynamically.
20
+
21
+ Parameters:
22
+ -----------
23
+ symbol : str
24
+ The ticker symbol (e.g., "AAPL", "GOOG", "BTC-USD").
25
+ period : str, optional (default="1d")
26
+ Time range for price data. Examples: "1d", "5d", "1mo", "3mo".
27
+ interval : str, optional (default="1h")
28
+ Granularity of the data. Examples: "1m", "5m", "1h", "1d".
29
+
30
+ Returns:
31
+ --------
32
+ str
33
+ A formatted string containing:
34
+ - Company/ticker name
35
+ - Current price and change
36
+ - Open, High, Low prices
37
+ - Volume
38
+ - Period and interval used
39
+ """
40
+ try:
41
+ ticker = yf.Ticker(symbol)
42
+
43
+ # Calculate start and end dates based on period
44
+ end_date = datetime.today()
45
+ if period.endswith("d"):
46
+ days = int(period[:-1])
47
+ elif period.endswith("mo"):
48
+ days = int(period[:-2]) * 30
49
+ elif period.endswith("y"):
50
+ days = int(period[:-1]) * 365
51
+ else:
52
+ days = 30 # default 1 month
53
+ start_date = end_date - timedelta(days=days)
54
+
55
+ # Fetch recent data explicitly
56
+ data = ticker.history(
57
+ start=start_date.strftime("%Y-%m-%d"),
58
+ end=end_date.strftime("%Y-%m-%d"),
59
+ interval=interval
60
+ )
61
+
62
+ if data.empty:
63
+ return f"No data found for symbol '{symbol}'."
64
+
65
+ latest = data.iloc[-1]
66
+ current_price = round(latest["Close"], 2)
67
+ open_price = round(latest["Open"], 2)
68
+ change = round(current_price - open_price, 2)
69
+ pct_change = round((change / open_price) * 100, 2)
70
+
71
+ info = ticker.info
72
+ long_name = info.get("longName", symbol)
73
+ currency = info.get("currency", "USD")
74
+
75
+ formatted = [
76
+ f"📈 {long_name} ({symbol})",
77
+ f"Current Price: {current_price} {currency}",
78
+ f"Change: {change} ({pct_change}%)",
79
+ f"Open: {open_price} | High: {round(latest['High'], 2)} | Low: {round(latest['Low'], 2)}",
80
+ f"Volume: {int(latest['Volume'])}",
81
+ f"Period: {period} | Interval: {interval}",
82
+ ]
83
+ return "\n".join(formatted)
84
+
85
+ except Exception as e:
86
+ return f"Error fetching data for '{symbol}': {e}"
87
+
88
+ @function_tool
89
+ def get_market_sentiment(symbol: str, period: str = "1mo") -> str:
90
+ """
91
+ Analyze recent price changes and provide a simple market sentiment.
92
+ Uses dynamic start/end dates to ensure recent data.
93
+
94
+ This tool computes the percentage change over the specified period and
95
+ classifies the sentiment as:
96
+ - Bullish (if price increased >2%)
97
+ - Bearish (if price decreased >2%)
98
+ - Neutral (otherwise)
99
+
100
+ Parameters:
101
+ -----------
102
+ symbol : str
103
+ The ticker symbol (e.g., "AAPL", "GOOG", "BTC-USD").
104
+ period : str, optional (default="1mo")
105
+ Time range to analyze. Examples: "7d", "1mo", "3mo".
106
+
107
+ Returns:
108
+ --------
109
+ str
110
+ A human-readable sentiment string including percentage change.
111
+ """
112
+ try:
113
+ ticker = yf.Ticker(symbol)
114
+
115
+ # Calculate start/end dynamically
116
+ end_date = datetime.today()
117
+ if period.endswith("d"):
118
+ days = int(period[:-1])
119
+ elif period.endswith("mo"):
120
+ days = int(period[:-2]) * 30
121
+ elif period.endswith("y"):
122
+ days = int(period[:-1]) * 365
123
+ else:
124
+ days = 30
125
+ start_date = end_date - timedelta(days=days)
126
+
127
+ data = ticker.history(
128
+ start=start_date.strftime("%Y-%m-%d"),
129
+ end=end_date.strftime("%Y-%m-%d")
130
+ )
131
+
132
+ if data.empty:
133
+ return f"No data for {symbol}."
134
+
135
+ recent_change = data["Close"].iloc[-1] - data["Close"].iloc[0]
136
+ pct_change = (recent_change / data["Close"].iloc[0]) * 100
137
+
138
+ sentiment = "Neutral"
139
+ if pct_change > 2:
140
+ sentiment = "Bullish"
141
+ elif pct_change < -2:
142
+ sentiment = "Bearish"
143
+
144
+ return f"{symbol} market sentiment ({period}): {sentiment} ({pct_change:.2f}% change)"
145
+
146
+ except Exception as e:
147
+ return f"Error fetching market sentiment for '{symbol}': {e}"
148
+
149
+ @function_tool
150
+ def get_history(symbol: str, period: str = "1mo") -> str:
151
+ """
152
+ Fetch historical price data for a given ticker.
153
+ Ensures recent data is retrieved dynamically using start/end dates.
154
+
155
+ Parameters:
156
+ -----------
157
+ symbol : str
158
+ The ticker symbol (e.g., "AAPL", "GOOG", "BTC-USD").
159
+ period : str, optional (default="1mo")
160
+ The length of historical data to retrieve. Examples: "1d", "5d", "1mo", "3mo", "1y", "5y".
161
+
162
+ Returns:
163
+ --------
164
+ str
165
+ A formatted string showing the last 5 rows of historical prices (Open, High, Low, Close, Volume).
166
+ """
167
+ try:
168
+ ticker = yf.Ticker(symbol)
169
+
170
+ # Calculate start/end dynamically
171
+ end_date = datetime.today()
172
+ if period.endswith("d"):
173
+ days = int(period[:-1])
174
+ elif period.endswith("mo"):
175
+ days = int(period[:-2]) * 30
176
+ elif period.endswith("y"):
177
+ days = int(period[:-1]) * 365
178
+ else:
179
+ days = 30
180
+ start_date = end_date - timedelta(days=days)
181
+
182
+ data = ticker.history(
183
+ start=start_date.strftime("%Y-%m-%d"),
184
+ end=end_date.strftime("%Y-%m-%d")
185
+ )
186
+
187
+ if data.empty:
188
+ return f"No historical data found for '{symbol}'."
189
+ return f"Historical data for {symbol} ({period}):\n{data.tail(5).to_string()}"
190
+
191
+ except Exception as e:
192
+ return f"Error fetching historical data for '{symbol}': {e}"
common/rag/rag.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+
8
+ DB_NAME = 'healthcare_db'
9
+ DIRECTORY_NAME = "healthcare"
10
+
11
+ class Retriever:
12
+ def __init__(self,
13
+ file_path:str = os.path.join(os.getcwd(), "data"),
14
+ db_path:str = os.path.join(os.getcwd(), "db") ):
15
+ self.directory_path = os.path.join(file_path, DIRECTORY_NAME)
16
+ self.db_path = os.path.join(db_path, DB_NAME)
17
+ self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
18
+ self.text_splitter = RecursiveCharacterTextSplitter(
19
+ chunk_size=1024,
20
+ chunk_overlap=200,
21
+ length_function=len,
22
+ # separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
23
+ is_separator_regex=False,
24
+ )
25
+ self.retriever = None
26
+
27
+ def load_knowledge_base(self):
28
+ if os.path.exists(self.db_path):
29
+ self.retriever = FAISS.load_local(
30
+ self.db_path,
31
+ self.embeddings,
32
+ allow_dangerous_deserialization=True
33
+ ).as_retriever()
34
+ else:
35
+ self.retriever = self._create_knowledge_base()
36
+
37
+ def _create_knowledge_base(self):
38
+ documents = self._load_documents()
39
+ chunks = self._split_documents(documents)
40
+ # embeddings = self._embed_documents(texts)
41
+ vectorstore = FAISS.from_documents(chunks, self.embeddings)
42
+ vectorstore.save_local(self.db_path)
43
+ return vectorstore.as_retriever()
44
+
45
+ def _load_documents(self):
46
+ documents = []
47
+ loader = DirectoryLoader(
48
+ self.directory_path,
49
+ glob="**/*.pdf",
50
+ loader_cls=PyPDFLoader,
51
+ show_progress=True
52
+ )
53
+ documents = loader.load()
54
+ return documents
55
+
56
+ def _split_documents(self, documents):
57
+ chunks = []
58
+ for doc in documents:
59
+ chunks.extend(self.text_splitter.split_documents([doc]))
60
+ return chunks
61
+
62
+ # def _embed_documents(self, texts):
63
+ # return [self.embeddings.embed_query(text.page_content) for text in texts]
64
+
65
+ def retrieve(self, query, k=4):
66
+ """Retrieve documents without scores (backward compatible)"""
67
+ if not self.retriever:
68
+ self.load_knowledge_base()
69
+ return self.retriever.invoke(query)
70
+
71
+ def retrieve_with_scores(self, query, k=4):
72
+ """Retrieve documents with similarity scores"""
73
+ if not self.retriever:
74
+ self.load_knowledge_base()
75
+
76
+ # Get the underlying vectorstore from the retriever
77
+ vectorstore = self.retriever.vectorstore
78
+
79
+ # Use similarity_search_with_score to get scores
80
+ # Note: FAISS returns L2 distance, lower is better
81
+ results = vectorstore.similarity_search_with_score(query, k=k)
82
+
83
+ return results
84
+
85
+
86
+ def update_knowledge_base(self):
87
+ self._create_knowledge_base()
88
+
89
+ def delete_knowledge_base(self):
90
+ if os.path.exists(self.db_path):
91
+ shutil.rmtree(self.db_path)
92
+
93
+ # No cleanup needed for VectorStoreRetriever
94
+
common/utility/__init__.py ADDED
File without changes
common/utility/embedding_factory.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Union
3
+ # from azure.identity import DefaultAzureCredential
4
+ from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
5
+ from langchain_ollama import OllamaEmbeddings
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+
8
+
9
+ class EmbeddingFactory:
10
+ """
11
+ A static utility class to create and return LLM Embedding instances based on the input type.
12
+ """
13
+
14
+ @staticmethod
15
+ def get_llm(llm_type: str) -> Union[AzureOpenAIEmbeddings, OpenAIEmbeddings]:
16
+ """
17
+ Returns an LLM instance based on the specified type.
18
+
19
+ Parameters:
20
+ llm_type (str): The type of LLM to return. Valid values are 'azure' or 'openai'.
21
+
22
+ Returns:
23
+ Union[AzureOpenAIEmbeddings, OpenAIEmbeddings]: The LLM instance.
24
+ """
25
+ if llm_type.lower() == "azure":
26
+ # Get the Azure Credential
27
+ # credential = DefaultAzureCredential()
28
+ # token=credential.get_token("https://cognitiveservices.azure.com/.default").token
29
+
30
+ # if not token:
31
+ # raise ValueError("Token is required for AzureOpenAIEmbeddings.")
32
+ # return AzureOpenAIEmbeddings(
33
+ # azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
34
+ # azure_deployment="text-embedding-3-small", #os.environ["AZURE_OPENAI_API_BASE_MODEL"],
35
+ # api_version=os.environ["AZURE_OPENAI_API_VERSION"],
36
+ # api_key=token
37
+ # )
38
+ pass
39
+ elif llm_type.lower() == "openai":
40
+ return OpenAIEmbeddings(
41
+ api_key=os.environ["OPENAI_API_KEY"],
42
+ model="text-embedding-3-large"
43
+ )
44
+ elif llm_type.lower() == "ollama": # must have ollama running locally with the following model
45
+ return OllamaEmbeddings(model="gemma:2b")
46
+ elif llm_type.lower() == "hf": # must have key update in env:HF_TOKEN
47
+ return HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
48
+ else:
49
+ raise ValueError("Invalid llm_type. Use 'azure' or 'openai'.")
common/utility/llm_factory.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tiktoken
3
+ from typing import Any
4
+ from langchain_openai.chat_models import ChatOpenAI, AzureChatOpenAI
5
+ from langchain_openai.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings
6
+ # from azure.identity import DefaultAzureCredential
7
+ from huggingface_hub import login
8
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEmbeddings
9
+ from langchain_ollama import ChatOllama, OllamaEmbeddings
10
+ from langchain_groq import ChatGroq
11
+ # from langchain_openai import OpenAIEmbeddings
12
+
13
+ class LLMFactory:
14
+ """
15
+ Factory class to provide LLM and embedding model instances for different providers.
16
+ """
17
+
18
+ @staticmethod
19
+ def get_llm(provider: str, **kwargs) -> Any:
20
+ """
21
+ Returns a chat/completion LLM instance based on the provider.
22
+ Supported providers: openai, azureopenai, huggingface, ollama, groq
23
+ """
24
+ if provider == "openai":
25
+ # OpenAI Chat Model
26
+ return ChatOpenAI(
27
+ openai_api_key=kwargs.get("api_key", os.environ.get("OPENAI_API_KEY")),
28
+ model_name=kwargs.get("model_name", "gpt-4")
29
+ )
30
+
31
+ # elif provider == "azureopenai":
32
+ # # Azure OpenAI Chat Model using Azure Identity for token
33
+ # credential = DefaultAzureCredential()
34
+ # token = credential.get_token("https://cognitiveservices.azure.com/.default").token
35
+ # if not token:
36
+ # raise ValueError("Token is required for AzureChatOpenAI.")
37
+ # return AzureChatOpenAI(
38
+ # azure_endpoint=kwargs["endpoint"],
39
+ # azure_deployment=kwargs.get("deployment_name", "gpt-4"),
40
+ # api_version=kwargs["api_version"],
41
+ # api_key=token
42
+ # )
43
+
44
+ # pip install langchain langchain-huggingface huggingface_hub
45
+ elif provider == "huggingface":
46
+ # If using a private model or endpoint, authenticate
47
+ login(token=kwargs.get("api_key", os.environ.get("HF_TOKEN")))
48
+
49
+ return ChatHuggingFace(
50
+ repo_id=kwargs.get("model_name", "mistralai/Mistral-Nemo-Instruct-2407"), # Or any other chat-friendly model
51
+ task="text-generation",
52
+ model_kwargs={
53
+ "temperature": 0.7,
54
+ "max_new_tokens": 256
55
+ }
56
+ )
57
+
58
+ elif provider == "ollama":
59
+ # Ollama local model
60
+ return ChatOllama(
61
+ model=kwargs.get("model_name", "gemma:2b"),
62
+ temperature=0
63
+ )
64
+
65
+ elif provider == "groq":
66
+ # Groq LLM
67
+ return ChatGroq(
68
+ model=kwargs.get("model_name", "Gemma2-9b-It"),
69
+ max_tokens=512,
70
+ api_key=kwargs.get("api_key", os.environ.get("GROQ_API_KEY"))
71
+ )
72
+
73
+ else:
74
+ raise ValueError(f"Unsupported provider: {provider}")
75
+
76
+ @staticmethod
77
+ def get_embedding_model(provider: str, **kwargs) -> Any:
78
+ """
79
+ Returns an embedding model instance based on the provider.
80
+ Supported providers: openai, huggingface
81
+ """
82
+ if provider == "openai":
83
+ return OpenAIEmbeddings(
84
+ model=kwargs.get("model_name", "text-embedding-3-large"),
85
+ openai_api_key=kwargs.get("api_key", os.environ.get("OPENAI_API_KEY"))
86
+ )
87
+ # if provider == "azureopenai":
88
+ # # Get the Azure Credential
89
+ # credential = DefaultAzureCredential()
90
+ # token=credential.get_token("https://cognitiveservices.azure.com/.default").token
91
+
92
+ # if not token:
93
+ # raise ValueError("Token is required for AzureOpenAIEmbeddings.")
94
+ # return AzureOpenAIEmbeddings(
95
+ # azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
96
+ # azure_deployment=kwargs.get("azure_deployment", "text-embedding-3-large"),
97
+ # api_version=os.environ["AZURE_OPENAI_API_VERSION"],
98
+ # api_key=token
99
+ # )
100
+ elif provider == "huggingface":
101
+ # If using a private model or endpoint, authenticate
102
+ login(token=kwargs.get("api_key", os.environ.get("HF_TOKEN")))
103
+
104
+ return HuggingFaceEmbeddings(
105
+ model_name=kwargs.get("model_name", "all-MiniLM-L6-v2")
106
+ )
107
+ elif provider == "groq":
108
+ raise ValueError(f"No embedding support from the provider: {provider}")
109
+ elif provider == "ollama":
110
+ return OllamaEmbeddings(model=kwargs.get("model_name", "gemma:2b"))
111
+ else:
112
+ raise ValueError(f"Unsupported embedding provider: {provider}")
113
+
114
+ @staticmethod
115
+ def num_tokens_from_messages(messages) -> int:
116
+ """
117
+ Return the number of tokens used by a list of messages.
118
+ Adapted from the OpenAI cookbook token counter.
119
+ """
120
+ encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
121
+ tokens_per_message = 3 # <|start|>, role, <|end|>
122
+ num_tokens = 0
123
+
124
+ for message in messages:
125
+ num_tokens += tokens_per_message
126
+ for key, value in message.items():
127
+ num_tokens += len(encoding.encode(value))
128
+
129
+ num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
130
+ return num_tokens
common/utility/llm_factory2.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tiktoken
3
+ from typing import Union
4
+ # from azure.identity import DefaultAzureCredential
5
+ from langchain_openai.chat_models import AzureChatOpenAI, ChatOpenAI
6
+
7
+
8
+ class LLMFactory:
9
+ """
10
+ A static utility class to create and return LLM instances based on the input type.
11
+ """
12
+
13
+ @staticmethod
14
+ def get_llm(llm_type: str) -> Union[AzureChatOpenAI, ChatOpenAI]:
15
+ """
16
+ Returns an LLM instance based on the specified type.
17
+
18
+ Parameters:
19
+ llm_type (str): The type of LLM to return. Valid values are 'azure' or 'openai'.
20
+
21
+ Returns:
22
+ Union[AzureChatOpenAI, ChatOpenAI]: The LLM instance.
23
+ """
24
+ if llm_type.lower() == "azure":
25
+ # # Get the Azure Credential
26
+ # credential = DefaultAzureCredential()
27
+ # token=credential.get_token("https://cognitiveservices.azure.com/.default").token
28
+
29
+ # if not token:
30
+ # raise ValueError("Token is required for AzureChatOpenAI.")
31
+ # return AzureChatOpenAI(
32
+ # azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
33
+ # azure_deployment=os.environ["AZURE_OPENAI_API_BASE_MODEL"],
34
+ # api_version=os.environ["AZURE_OPENAI_API_VERSION"],
35
+ # api_key=token
36
+ # )
37
+ pass
38
+ elif llm_type.lower() == "openai":
39
+ return ChatOpenAI(
40
+ api_key=os.environ["OPENAI_API_KEY"],
41
+ model_name="gpt-4"
42
+ )
43
+ elif llm_type.lower() == "openai_chat":
44
+ return ChatOpenAI(
45
+ api_key=os.environ["OPENAI_API_KEY"],
46
+ model_name="gpt-4"
47
+ )
48
+ else:
49
+ raise ValueError("Invalid llm_type. Use 'azure' or 'openai'.")
50
+
51
+ @staticmethod
52
+ def num_tokens_from_messages(messages):
53
+
54
+ """
55
+ Return the number of tokens used by a list of messages.
56
+ Adapted from the Open AI cookbook token counter
57
+ """
58
+
59
+ encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
60
+
61
+ # Each message is sandwiched with <|start|>role and <|end|>
62
+ # Hence, messages look like: <|start|>system or user or assistant{message}<|end|>
63
+
64
+ tokens_per_message = 3 # token1:<|start|>, token2:system(or user or assistant), token3:<|end|>
65
+
66
+ num_tokens = 0
67
+
68
+ for message in messages:
69
+ num_tokens += tokens_per_message
70
+ for key, value in message.items():
71
+ num_tokens += len(encoding.encode(value))
72
+
73
+ num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
74
+
75
+ return num_tokens
common/utility/logger.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ import datetime
3
+
4
+ def log_call(func):
5
+ """
6
+ A decorator that logs when a function is called and when it finishes.
7
+ """
8
+ @functools.wraps(func)
9
+ def wrapper(*args, **kwargs):
10
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
11
+ arg_list = ", ".join(
12
+ [repr(a) for a in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
13
+ )
14
+ print(f"[{timestamp}] 🚀 Calling: {func.__name__}({arg_list})")
15
+ try:
16
+ result = func(*args, **kwargs)
17
+ print(f"[{timestamp}] ✅ Finished: {func.__name__}")
18
+ return result
19
+ except Exception as e:
20
+ print(f"[{timestamp}] ❌ Error in {func.__name__}: {e}")
21
+ raise
22
+ return wrapper
pyproject.toml CHANGED
@@ -67,6 +67,7 @@ dependencies = [
67
  "logfire",
68
  "serpapi",
69
  "smithery>=0.4.4",
 
70
 
71
  # =======================
72
  # WEB SCRAPING
@@ -100,6 +101,7 @@ dependencies = [
100
  # =======================
101
  "scikit-learn>=1.7.2",
102
  "huggingface_hub<=1.1.4",
 
103
 
104
  # =======================
105
  # IPYNB SUPPORT
 
67
  "logfire",
68
  "serpapi",
69
  "smithery>=0.4.4",
70
+ "sendgrid",
71
 
72
  # =======================
73
  # WEB SCRAPING
 
101
  # =======================
102
  "scikit-learn>=1.7.2",
103
  "huggingface_hub<=1.1.4",
104
+ "datasets>=4.4.1",
105
 
106
  # =======================
107
  # IPYNB SUPPORT
run.py CHANGED
@@ -1,11 +1,215 @@
1
- import os
2
- import subprocess
3
- import sys
4
-
5
- # Use module execution to guarantee Streamlit runs inside the current interpreter
6
- subprocess.run([
7
- sys.executable, "-m", "streamlit",
8
- "run",
9
- os.path.join("ui", "app.py"),
10
- "--server.runOnSave", "true"
11
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Universal App Launcher for AgenticAI Projects
4
+
5
+ Usage:
6
+ python run.py <app_name> [--port PORT] [--help]
7
+
8
+ Examples:
9
+ python run.py healthcare
10
+ python run.py deep-research --port 8502
11
+ python run.py stock-advisor
12
+ python run.py --list
13
+ """
14
+
15
+ import sys
16
+ import os
17
+ import subprocess
18
+ import argparse
19
+ from pathlib import Path
20
+ from typing import Dict, Optional
21
+
22
+
23
+ # App registry - maps app names to their paths and entry points
24
+ APP_REGISTRY: Dict[str, Dict[str, str]] = {
25
+ "healthcare": {
26
+ "path": "src/healthcare-assistant",
27
+ "entry": "app.py",
28
+ "description": "Healthcare Assistant - Medical information with RAG and web search"
29
+ },
30
+ "deep-research": {
31
+ "path": "src/deep-research",
32
+ "entry": "app.py",
33
+ "description": "Deep Research AI - Comprehensive research assistant"
34
+ },
35
+ "stock-advisor": {
36
+ "path": "src/stock-advisor",
37
+ "entry": "app.py",
38
+ "description": "Stock Advisor - Financial analysis and stock recommendations"
39
+ },
40
+ "travel-agent": {
41
+ "path": "src/travel-agent",
42
+ "entry": "app.py",
43
+ "description": "Travel Agent - Trip planning and travel recommendations"
44
+ },
45
+ "trip-planner": {
46
+ "path": "src/trip-planner",
47
+ "entry": "app.py",
48
+ "description": "Trip Planner - Detailed trip itinerary planning"
49
+ },
50
+ "chatbot": {
51
+ "path": "src/chatbot",
52
+ "entry": "app.py",
53
+ "description": "General Chatbot - Multi-purpose conversational AI"
54
+ },
55
+ "accessibility": {
56
+ "path": "src/accessibility",
57
+ "entry": "app.py",
58
+ "description": "Accessibility Tools - Assistive technology applications"
59
+ }
60
+ }
61
+
62
+
63
+ def print_banner():
64
+ """Print a nice banner."""
65
+ print("=" * 70)
66
+ print("🚀 AgenticAI Projects Launcher".center(70))
67
+ print("=" * 70)
68
+ print()
69
+
70
+
71
+ def list_apps():
72
+ """List all available apps."""
73
+ print_banner()
74
+ print("Available Applications:\n")
75
+
76
+ max_name_len = max(len(name) for name in APP_REGISTRY.keys())
77
+
78
+ for name, config in sorted(APP_REGISTRY.items()):
79
+ print(f" {name.ljust(max_name_len + 2)} - {config['description']}")
80
+
81
+ print("\n" + "=" * 70)
82
+ print("\nUsage: python run.py <app_name> [--port PORT]")
83
+ print("Example: python run.py healthcare --port 8501\n")
84
+
85
+
86
+ def validate_app(app_name: str) -> Optional[Dict[str, str]]:
87
+ """
88
+ Validate that the app exists and its files are present.
89
+
90
+ Args:
91
+ app_name: Name of the app to validate
92
+
93
+ Returns:
94
+ App configuration dict if valid, None otherwise
95
+ """
96
+ if app_name not in APP_REGISTRY:
97
+ print(f"❌ Error: Unknown app '{app_name}'")
98
+ print(f"\nAvailable apps: {', '.join(sorted(APP_REGISTRY.keys()))}")
99
+ print("\nRun 'python run.py --list' to see all available apps.")
100
+ return None
101
+
102
+ config = APP_REGISTRY[app_name]
103
+ project_root = Path(__file__).parent
104
+ app_path = project_root / config["path"] / config["entry"]
105
+
106
+ if not app_path.exists():
107
+ print(f"❌ Error: App file not found at {app_path}")
108
+ return None
109
+
110
+ return config
111
+
112
+
113
+ def launch_app(app_name: str, port: Optional[int] = None):
114
+ """
115
+ Launch a Streamlit app.
116
+
117
+ Args:
118
+ app_name: Name of the app to launch
119
+ port: Optional port number (default: 8501)
120
+ """
121
+ config = validate_app(app_name)
122
+ if not config:
123
+ sys.exit(1)
124
+
125
+ project_root = Path(__file__).parent
126
+ app_dir = project_root / config["path"]
127
+ app_file = config["entry"]
128
+
129
+ print_banner()
130
+ print(f"📱 Launching: {config['description']}")
131
+ print(f"📂 Location: {config['path']}")
132
+ print(f"🌐 Entry Point: {app_file}")
133
+
134
+ # Build streamlit command
135
+ cmd = ["streamlit", "run", app_file]
136
+
137
+ # Add port if specified
138
+ if port:
139
+ cmd.extend(["--server.port", str(port)])
140
+ print(f"🔌 Port: {port}")
141
+ else:
142
+ print(f"🔌 Port: 8501 (default)")
143
+
144
+ print("\n" + "=" * 70)
145
+ print("\n🎯 Starting application...\n")
146
+
147
+ try:
148
+ # Change to app directory and run
149
+ os.chdir(app_dir)
150
+ subprocess.run(cmd)
151
+ except KeyboardInterrupt:
152
+ print("\n\n👋 Application stopped by user")
153
+ except FileNotFoundError:
154
+ print("\n❌ Error: Streamlit not found. Please install it:")
155
+ print(" pip install streamlit")
156
+ sys.exit(1)
157
+ except Exception as e:
158
+ print(f"\n❌ Error launching app: {e}")
159
+ sys.exit(1)
160
+
161
+
162
+ def main():
163
+ """Main entry point."""
164
+ parser = argparse.ArgumentParser(
165
+ description="Universal launcher for AgenticAI project applications",
166
+ formatter_class=argparse.RawDescriptionHelpFormatter,
167
+ epilog="""
168
+ Examples:
169
+ python run.py healthcare # Launch healthcare chatbot
170
+ python run.py deep-research --port 8502 # Launch on custom port
171
+ python run.py --list # List all available apps
172
+
173
+ Available Apps:
174
+ """ + "\n ".join(f"{name}: {config['description']}"
175
+ for name, config in sorted(APP_REGISTRY.items()))
176
+ )
177
+
178
+ parser.add_argument(
179
+ "app_name",
180
+ nargs="?",
181
+ help="Name of the app to launch"
182
+ )
183
+
184
+ parser.add_argument(
185
+ "--port",
186
+ type=int,
187
+ help="Port number for Streamlit server (default: 8501)"
188
+ )
189
+
190
+ parser.add_argument(
191
+ "--list",
192
+ action="store_true",
193
+ help="List all available apps"
194
+ )
195
+
196
+ args = parser.parse_args()
197
+
198
+ # Handle --list flag
199
+ if args.list:
200
+ list_apps()
201
+ return
202
+
203
+ # Require app name if not listing
204
+ if not args.app_name:
205
+ parser.print_help()
206
+ print("\n")
207
+ list_apps()
208
+ return
209
+
210
+ # Launch the app
211
+ launch_app(args.app_name, args.port)
212
+
213
+
214
+ if __name__ == "__main__":
215
+ main()
src/deep-research/.env.name ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ OPENAI_API_KEY=""
2
+ GROQ_API_KEY=""
3
+ GOOGLE_API_KEY=""
4
+ #https://serper.dev/api-keys
5
+ SERPER_API_KEY=""
6
+ #https://newsapi.org/v2/everything
7
+ NEWS_API_KEY=""
8
+ #https://app.sendgrid.com/ - bm80177
9
+ SENDGRID_API_KEY=""
src/deep-research/Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ ENV PYTHONUNBUFFERED=1 \
4
+ DEBIAN_FRONTEND=noninteractive \
5
+ PYTHONPATH=/app:/app/common:$PYTHONPATH
6
+
7
+ WORKDIR /app
8
+
9
+ # System deps
10
+ RUN apt-get update && apt-get install -y \
11
+ git build-essential curl \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Install uv
15
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
16
+ ENV PATH="/root/.local/bin:$PATH"
17
+
18
+ # Copy project metadata
19
+ COPY pyproject.toml .
20
+ COPY uv.lock .
21
+
22
+ # Copy required folders
23
+ COPY common/ ./common/
24
+ COPY src/deep-research/ ./src/deep-research/
25
+
26
+ # Install dependencies using uv, then export and install with pip to system
27
+ RUN uv sync --frozen --no-dev && \
28
+ uv pip install -e . --system
29
+
30
+ # Copy entry point
31
+ COPY run.py .
32
+
33
+ EXPOSE 7860
34
+
35
+ CMD ["python", "run.py", "deep-research", "--port", "7860"]
src/deep-research/README.md ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: AI Deep Researcher # Give your app a title
3
+ emoji: 🤖 # Pick an emoji
4
+ colorFrom: indigo # Theme start color
5
+ colorTo: blue # Theme end color
6
+ sdk: docker # SDK type
7
+ sdk_version: "4.39.0" # Example Gradio version
8
+ app_file: ui/app.py # <-- points to your app.py inside ui/
9
+ pinned: false
10
+ ---
11
+
12
+ # AI Deep Researcher
13
+
14
+ **AI Deep Researcher** is a generative AI learning project built using the OpenAI Agentic Framework. This app performs deep-level web research based on user queries and generates a well-structured, consolidated report.
15
+
16
+ To achieve this, the project integrates the following technologies and AI features:
17
+ - **OpenAI SDK**
18
+ - **OpenAI Agents**
19
+ - **OpenAI WebSearch Tool**
20
+ - **Serper API** - a free alternative to OpenAI WebSearch Tool (https://serper.dev/api-keys)
21
+ - **News API** (https://newsapi.org/v2/everything)
22
+ - **SendGrid** (for emailing report)
23
+ - **LLMs** - (OpenAI, Geminia, Groq)
24
+
25
+ ## How it works?
26
+ The system is a multi-agent solution, where each agent has a specific responsibility:
27
+
28
+ 1. **Planner Agent**
29
+ - Receives the user query and builds a structured query plan.
30
+
31
+ 2. **Guardrail Agent**
32
+ - Validates user input and ensures compliance.
33
+ - Stops the workflow if the input contains inappropriate or unparliamentary words.
34
+
35
+ 3. **Search Agent**
36
+ - Executes the query plan.
37
+ - Runs multiple web searches in parallel to gather data.
38
+
39
+ 4. **Writer Agent**
40
+ - Reads results from all search agents.
41
+ - Generates a well-formatted, consolidated report.
42
+
43
+ 5. **Email Agent (not functional at present)**
44
+ - Responsible for sending the report via email using SendGrid.
45
+
46
+ 6. **Orchestrator**
47
+ - The entry point of the system.
48
+ - Facilitates communication and workflow between all agents.
49
+
50
+ ## Project Folder Structure
51
+
52
+ ```
53
+ deep-research/
54
+ ├── ui/
55
+ │ ├── app.py # Main Streamlit application entry point
56
+ │ └── __pycache__/ # Python bytecode cache
57
+ ├── appagents/
58
+ │ ├── __init__.py # Package initialization
59
+ │ ├── orchestrator.py # Orchestrator agent - coordinates all agents
60
+ │ ├── planner_agent.py # Planner agent - builds structured query plans
61
+ │ ├── guardrail_agent.py # Guardrail agent - validates user input
62
+ │ ├── search_agent.py # Search agent - performs web searches
63
+ │ ├── writer_agent.py # Writer agent - generates consolidated reports
64
+ │ ├── email_agent.py # Email agent - sends reports via email (not functional)
65
+ │ └── __pycache__/ # Python bytecode cache
66
+ ├── core/
67
+ │ ├── __init__.py # Package initialization
68
+ │ ├── logger.py # Centralized logging configuration
69
+ │ └── __pycache__/ # Python bytecode cache
70
+ ├── tools/
71
+ │ ├── __init__.py # Package initialization
72
+ │ ├── google_tools.py # Google search utilities
73
+ │ ├── time_tools.py # Time-related utility functions
74
+ │ └── __pycache__/ # Python bytecode cache
75
+ ├── prompts/
76
+ │ ├── __init__.py # Package initialization (if present)
77
+ │ ├── planner_prompt.txt # Prompt for planner agent (if present)
78
+ │ ├── guardrail_prompt.txt # Prompt for guardrail agent (if present)
79
+ │ ├── search_prompt.txt # Prompt for search agent (if present)
80
+ │ └── writer_prompt.txt # Prompt for writer agent (if present)
81
+ ├── Dockerfile # Docker configuration for container deployment
82
+ ├── pyproject.toml # Project metadata and dependencies (copied from root)
83
+ ├── uv.lock # Locked dependency versions (copied from root)
84
+ ├── README.md # Project documentation
85
+ └── run.py # Script to run the application locally (if present)
86
+ ```
87
+
88
+ ## File Descriptions
89
+
90
+ ### UI Layer (`ui/`)
91
+ - **app.py** - Main Streamlit web application that provides the user interface. Handles:
92
+ - Text input for research queries
93
+ - Run/Download buttons (PDF, Markdown)
94
+ - Real-time streaming of results
95
+ - Display of final research reports
96
+ - Session state management
97
+ - Button enable/disable during streaming
98
+
99
+ ### Agents (`appagents/`)
100
+ - **orchestrator.py** - Central coordinator that:
101
+ - Manages the multi-agent workflow
102
+ - Handles communication between all agents
103
+ - Streams results back to the UI
104
+ - Implements the research pipeline
105
+
106
+ - **planner_agent.py** - Creates a structured plan for the query:
107
+ - Breaks down user query into actionable research steps
108
+ - Defines search queries and research angles
109
+
110
+ - **guardrail_agent.py** - Validates user input:
111
+ - Checks for inappropriate content
112
+ - Ensures compliance with policies
113
+ - Stops workflow if violations detected
114
+
115
+ - **search_agent.py** - Executes web searches:
116
+ - Performs parallel web searches
117
+ - Integrates with Google Search / Serper API
118
+ - Gathers raw research data
119
+
120
+ - **writer_agent.py** - Generates final report:
121
+ - Consolidates search results
122
+ - Formats findings into structured markdown
123
+ - Creates well-organized research summaries
124
+
125
+ - **email_agent.py** - Email delivery (not functional):
126
+ - Intended to send reports via SendGrid
127
+ - Currently not integrated in the workflow
128
+
129
+ ### Core Utilities (`core/`)
130
+ - **logger.py** - Centralized logging configuration:
131
+ - Provides consistent logging across agents
132
+ - Handles log levels and formatting
133
+
134
+ ### Tools (`tools/`)
135
+ - **google_tools.py** - Google/Serper API wrapper:
136
+ - Executes web searches
137
+ - Handles API authentication and response parsing
138
+
139
+ - **time_tools.py** - Utility functions:
140
+ - Time-related operations
141
+ - Timestamp management
142
+
143
+ ### Configuration Files
144
+ - **Dockerfile** - Container deployment:
145
+ - Builds Docker image with Python 3.12
146
+ - Installs dependencies using `uv`
147
+ - Sets up Streamlit server on port 7860
148
+ - Configures PYTHONPATH for module imports
149
+
150
+ - **pyproject.toml** - Project metadata:
151
+ - Package name: "agents"
152
+ - Python version requirement: 3.12
153
+ - Lists all dependencies (OpenAI, LangChain, Streamlit, etc.)
154
+
155
+ - **uv.lock** - Dependency lock file:
156
+ - Ensures reproducible builds
157
+ - Pins exact versions of all dependencies
158
+
159
+ ## Key Technologies
160
+
161
+ | Component | Technology | Purpose |
162
+ |-----------|-----------|---------|
163
+ | LLM Framework | OpenAI Agents | Multi-agent orchestration |
164
+ | Web Search | Serper API / Google Search | Research data gathering |
165
+ | Web UI | Streamlit | User interface and interaction |
166
+ | Document Export | ReportLab | PDF generation from markdown |
167
+ | Async Operations | AsyncIO | Parallel agent execution |
168
+ | Dependencies | UV | Fast Python package management |
169
+ | Containerization | Docker | Cloud deployment |
170
+
171
+ ## Running Locally
172
+
173
+ ```bash
174
+ # Install dependencies
175
+ uv sync
176
+
177
+ # Set environment variables defined in .env.name file
178
+ export OPENAI_API_KEY="your-key"
179
+ export SERPER_API_KEY="your-key"
180
+
181
+ # Run the Streamlit app
182
+ python run.py
183
+ ```
184
+
185
+ ## Deployment
186
+
187
+ The project is deployed on Hugging Face Spaces as a Docker container:
188
+ - **Space**: https://huggingface.co/spaces/mishrabp/deep-research
189
+ - **URL**: https://huggingface.co/spaces/mishrabp/deep-research
190
+ - **Trigger**: Automatic deployment on push to `main` branch
191
+ - **Configuration**: `.github/workflows/deep-research-app-hf.yml`
src/deep-research/app.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import asyncio
3
+ import time
4
+ import html
5
+ from io import BytesIO
6
+ import os
7
+ import sys
8
+
9
+ # Add project root
10
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".")))
11
+
12
+ from dotenv import load_dotenv
13
+ from reportlab.platypus import SimpleDocTemplate, Paragraph
14
+ from reportlab.lib.styles import getSampleStyleSheet
15
+ from appagents.orchestrator import Orchestrator
16
+ from agents import SQLiteSession
17
+
18
+ load_dotenv(override=True)
19
+
20
+ # --------------------
21
+ # Page config
22
+ # --------------------
23
+ st.set_page_config(page_title="Deep Research AI", layout="wide", page_icon="🧠")
24
+
25
+ # --------------------
26
+ # Premium CSS
27
+ # --------------------
28
+ st.markdown("""
29
+ <style>
30
+ /* Global Defaults */
31
+ .stApp {
32
+ background-color: #f8f9fa;
33
+ font-family: 'Inter', sans-serif;
34
+ }
35
+
36
+ /* Remove default Streamlit top padding but add space for Fixed Header - Revert: Just remove top padding */
37
+ .block-container {
38
+ padding-top: 1rem !important; /* Small buffer */
39
+ }
40
+
41
+ /* Sticky Header */
42
+ header[data-testid="stHeader"] { display: none; } /* Hide default streamlit header */
43
+
44
+ .header-container {
45
+ position: sticky;
46
+ top: 0;
47
+ z-index: 999;
48
+
49
+ background: linear-gradient(135deg, #0f2027 0%, #203a43 50%, #2c5364 100%);
50
+ color: #ffffff;
51
+ padding: 3rem 2rem;
52
+ display: flex;
53
+ justify-content: space-between;
54
+ align-items: center;
55
+ box-shadow: 0 4px 20px rgba(0,0,0,0.15);
56
+
57
+ margin-top: -4rem; /* Pull up aggressively to cover top gap */
58
+ margin-left: -5rem;
59
+ margin-right: -5rem;
60
+
61
+ border-bottom: none;
62
+ border-radius: 0 0 1rem 1rem;
63
+ }
64
+
65
+ .app-brand {
66
+ font-family: 'Inter', sans-serif;
67
+ font-size: 1.6rem;
68
+ font-weight: 700;
69
+ letter-spacing: -0.02em;
70
+ color: #ffffff;
71
+ display: flex;
72
+ gap: 0.75rem;
73
+ align-items: center;
74
+ }
75
+
76
+ /* Centered Search Area */
77
+ .search-wrapper {
78
+ max-width: 800px;
79
+ margin: 4rem auto 2rem auto;
80
+ text-align: center;
81
+ }
82
+
83
+ .search-headline {
84
+ font-size: 2.5rem;
85
+ font-weight: 800;
86
+ color: #111;
87
+ margin-bottom: 0.5rem;
88
+ letter-spacing: -0.03em;
89
+ }
90
+
91
+ .search-subtext {
92
+ font-size: 1.1rem;
93
+ color: #666;
94
+ margin-bottom: 2.5rem;
95
+ }
96
+
97
+ /* Input styling override */
98
+ .stTextArea textarea {
99
+ border-radius: 12px !important;
100
+ border: 1px solid #e0e0e0 !important;
101
+ padding: 1rem !important;
102
+ background: white !important;
103
+ box-shadow: 0 4px 12px rgba(0,0,0,0.03) !important;
104
+ font-size: 1.1rem !important;
105
+ }
106
+ .stTextArea textarea:focus {
107
+ border-color: #667eea !important;
108
+ box-shadow: 0 4px 15px rgba(102, 126, 234, 0.1) !important;
109
+ }
110
+
111
+ /* Custom Button */
112
+ .stButton button {
113
+ background: black !important;
114
+ color: white !important;
115
+ border-radius: 30px !important;
116
+ padding: 0.5rem 2rem !important;
117
+ border: none !important;
118
+ box-shadow: 0 4px 10px rgba(0,0,0,0.2) !important;
119
+ transition: transform 0.1s ease;
120
+ }
121
+ .stButton button:hover {
122
+ transform: scale(1.02);
123
+ }
124
+
125
+ /* Report Paper Style */
126
+ .report-paper {
127
+ max-width: 850px;
128
+ margin: 2rem auto;
129
+ background: white;
130
+ padding: 4rem;
131
+ min-height: 800px;
132
+ box-shadow: 0 1px 3px rgba(0,0,0,0.1), 0 20px 40px rgba(0,0,0,0.05);
133
+ color: #2c3e50;
134
+ border: 1px solid #f0f0f0;
135
+ }
136
+ </style>
137
+ """, unsafe_allow_html=True)
138
+
139
+ # --------------------
140
+ # Session State
141
+ # --------------------
142
+ if "session_id" not in st.session_state:
143
+ st.session_state.session_id = str(id(st))
144
+
145
+ if "final_report" not in st.session_state:
146
+ st.session_state.final_report = ""
147
+
148
+ if "is_researching" not in st.session_state:
149
+ st.session_state.is_researching = False
150
+
151
+ if "research_logs" not in st.session_state:
152
+ st.session_state.research_logs = []
153
+
154
+ # --------------------
155
+ # Helpers
156
+ # --------------------
157
+ def make_pdf_bytes(text: str) -> bytes:
158
+ buf = BytesIO()
159
+ doc = SimpleDocTemplate(buf, topMargin=0.5*72, bottomMargin=0.5*72, leftMargin=0.75*72, rightMargin=0.75*72)
160
+ styles = getSampleStyleSheet()
161
+ story = []
162
+
163
+ for line in text.split("\n"):
164
+ stripped = line.strip()
165
+ if not stripped:
166
+ story.append(Paragraph(" ", styles["Normal"]))
167
+ continue
168
+
169
+ if stripped.startswith("# "):
170
+ story.append(Paragraph(html.escape(stripped[2:]), styles["Heading1"]))
171
+ elif stripped.startswith("## "):
172
+ story.append(Paragraph(html.escape(stripped[3:]), styles["Heading2"]))
173
+ elif stripped.startswith("- "):
174
+ story.append(Paragraph("• " + html.escape(stripped[2:]), styles["Normal"]))
175
+ else:
176
+ story.append(Paragraph(html.escape(stripped), styles["Normal"]))
177
+
178
+ doc.build(story)
179
+ buf.seek(0)
180
+ return buf.read()
181
+
182
+ # --------------------
183
+ # Logic
184
+ # --------------------
185
+ async def run_research(query: str):
186
+ session_id = st.session_state.session_id
187
+ session = SQLiteSession(f"session_{session_id}.db")
188
+ orchestrator = Orchestrator(session=session)
189
+
190
+ report_content = ""
191
+ status_container = st.status("🔍 Researching...", expanded=True)
192
+
193
+ try:
194
+ async for chunk in orchestrator.run(query):
195
+ # Filtering heuristic: Orchestrator yields status messages then the final report.
196
+ # Status messages are short and specific.
197
+ if (chunk.startswith("View trace") or
198
+ chunk.startswith("Searches") or
199
+ chunk.startswith("Report written") or
200
+ chunk.startswith("Starting")):
201
+
202
+ status_container.markdown(chunk)
203
+ else:
204
+ # Assume this is the report content (or the final error note)
205
+ report_content = chunk
206
+ status_container.markdown("Processing final output...")
207
+
208
+ st.session_state.final_report = report_content
209
+ st.session_state.is_researching = False
210
+ status_container.update(label="✅ Research Complete", state="complete", expanded=False)
211
+ st.rerun()
212
+
213
+ except Exception as e:
214
+ status_container.update(label="❌ Error", state="error")
215
+ st.error(f"Error: {e}")
216
+ st.session_state.is_researching = False
217
+
218
+ # --------------------
219
+ # Layout
220
+ # --------------------
221
+
222
+ # Custom Header
223
+ st.markdown("""
224
+ <div class="header-container">
225
+ <div class="app-brand">
226
+ <span>🧠</span> Deep Research <i>(OpenAI Agentic)</i>
227
+ </div>
228
+ <div>
229
+ <!-- Could add profile or other links here -->
230
+ </div>
231
+ </div>
232
+ """, unsafe_allow_html=True)
233
+
234
+ # Sidebar Settings
235
+ with st.sidebar:
236
+ st.header("⚙️ Configuration")
237
+ research_depth = st.select_slider("Research Depth", options=["Quick", "Standard", "Deep"], value="Standard")
238
+ report_format = st.selectbox("Report Format", ["Academic", "Business", "Creative"])
239
+ st.caption("Settings affect the tone and depth of the final report.")
240
+
241
+ st.divider()
242
+ if st.button("🗑️ Clear History"):
243
+ st.session_state.final_report = ""
244
+ st.rerun()
245
+
246
+ # Main Interface
247
+ if not st.session_state.final_report and not st.session_state.is_researching:
248
+ # Centered Input View
249
+ st.markdown("""
250
+ <div class="search-wrapper">
251
+ <div class="search-headline">What do you want to know?</div>
252
+ <div class="search-subtext">Deep Research will browse the web, analyze sources, and write a comprehensive report for you.</div>
253
+ </div>
254
+ """, unsafe_allow_html=True)
255
+
256
+ col_c1, col_c2, col_c3 = st.columns([1, 2, 1])
257
+ with col_c2:
258
+ query = st.text_area("Research Topic", height=60, placeholder="e.g. The future of quantum computing in drug discovery...", label_visibility="collapsed")
259
+
260
+ col_b1, col_b2, col_b3 = st.columns([1, 1, 1])
261
+ with col_b2:
262
+ if st.button("Start Research", use_container_width=True):
263
+ if query.strip():
264
+ st.session_state.is_researching = True
265
+ st.session_state.current_query = query
266
+ st.rerun()
267
+
268
+ elif st.session_state.is_researching:
269
+ # Researching View
270
+ st.markdown("""
271
+ <div class="search-wrapper">
272
+ <div class="search-headline">Compiling Report...</div>
273
+ </div>
274
+ """, unsafe_allow_html=True)
275
+
276
+ # Trigger async run
277
+ asyncio.run(run_research(st.session_state.current_query))
278
+
279
+ else:
280
+ # Result View - Title removed to let Sticky Header be the main branding,
281
+ # and Report itself be the focus.
282
+
283
+ # Action Toolbar
284
+ col_a1, col_a2, col_a3, col_a4 = st.columns([2, 1, 1, 2])
285
+ with col_a2:
286
+ pdf_bytes = make_pdf_bytes(st.session_state.final_report)
287
+ st.download_button("📄 Download PDF", pdf_bytes, "report.pdf", mime="application/pdf", use_container_width=True)
288
+ with col_a3:
289
+ if st.button("🔄 New Search", use_container_width=True):
290
+ st.session_state.final_report = ""
291
+ st.rerun()
292
+
293
+ # Final Report Render
294
+ # We use a container with a class to apply the 'sheet' look via global CSS if possible,
295
+ # or just use standard Markdown rendering which looks best.
296
+
297
+ with st.container():
298
+ st.markdown(st.session_state.final_report)
299
+
src/deep-research/appagents/__init__.py ADDED
File without changes
src/deep-research/appagents/email_agent.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Dict
3
+
4
+ import sendgrid
5
+ from sendgrid.helpers.mail import Email, Mail, Content, To
6
+ from agents import Agent, function_tool
7
+ from core.logger import log_call
8
+
9
+
10
+ @function_tool
11
+ @log_call
12
+ def send_email(subject: str, html_body: str) -> Dict[str, str]:
13
+ """ Send an email with the given subject and HTML body """
14
+ sg = sendgrid.SendGridAPIClient(api_key=os.environ.get('SENDGRID_API_KEY'))
15
+ from_email = Email("bm80177@gmail.com") # put your verified sender here
16
+ to_email = To("bibhup_mishra@yahoo.com") # put your recipient here
17
+ content = Content("text/html", html_body)
18
+ mail = Mail(from_email, to_email, subject, content).get()
19
+ response = sg.client.mail.send.post(request_body=mail)
20
+ print("Email response", response.status_code)
21
+ return {"status": "success"}
22
+
23
+ INSTRUCTIONS = """You are able to send a nicely formatted HTML email based on a detailed report.
24
+ You will be provided with a detailed report. You should use your tool to send one email, providing the
25
+ report converted into clean, well presented HTML with an appropriate subject line."""
26
+
27
+ email_agent = Agent(
28
+ name="Email agent",
29
+ instructions=INSTRUCTIONS,
30
+ tools=[send_email],
31
+ model="gpt-4o-mini",
32
+ )
src/deep-research/appagents/guardrail_agent.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydantic import BaseModel
3
+ from agents import (
4
+ Agent,
5
+ Runner,
6
+ input_guardrail,
7
+ GuardrailFunctionOutput,
8
+ )
9
+ from tools.time_tools import TimeTools
10
+ from openai import AsyncOpenAI
11
+
12
+
13
+ # ✅ Step 1: Define structured output schema
14
+ class UnparliamentaryCheckOutput(BaseModel):
15
+ has_unparliamentary_language: bool
16
+ explanation: str
17
+
18
+
19
+ # ✅ Step 2: Define the LLM guardrail agent
20
+ guardrail_agent = Agent(
21
+ name="Unparliamentary language check",
22
+ instructions=(
23
+ "Analyze the user input and determine if it contains any unparliamentary, "
24
+ "offensive, or disrespectful language. "
25
+ "If it does, set has_unparliamentary_language=true and explain briefly why. "
26
+ "Otherwise, set it to false."
27
+ ),
28
+ output_type=UnparliamentaryCheckOutput,
29
+ model="gpt-4o-mini",
30
+ )
31
+
32
+
33
+ # ✅ Step 3: Use the input guardrail decorator
34
+ @input_guardrail
35
+ async def guardrail_against_unparliamentary(ctx, agent, message: str):
36
+ """Guardrail function that blocks messages with unparliamentary words."""
37
+ result = await Runner.run(guardrail_agent, message, context=ctx.context)
38
+ has_unparliamentary_language = result.final_output.has_unparliamentary_language
39
+
40
+ return GuardrailFunctionOutput(
41
+ output_info={
42
+ "found_unparliamentary_word": result.final_output.model_dump()
43
+ },
44
+ tripwire_triggered=has_unparliamentary_language,
45
+ )
src/deep-research/appagents/orchestrator.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agents import Runner, trace, gen_trace_id, SQLiteSession
2
+ from appagents.search_agent import search_agent
3
+ from appagents.planner_agent import planner_agent, WebSearchItem, WebSearchPlan
4
+ from appagents.writer_agent import writer_agent, ReportData
5
+ from appagents.email_agent import email_agent
6
+ from agents.exceptions import InputGuardrailTripwireTriggered
7
+ from core.logger import log_call
8
+ import asyncio
9
+
10
+ class Orchestrator:
11
+
12
+ def __init__(self, session: SQLiteSession | None = None):
13
+ self.session = session or SQLiteSession()
14
+
15
+ @log_call
16
+ async def run(self, query: str):
17
+ """ Run the deep research process, yielding the status updates and the final report"""
18
+ trace_id = gen_trace_id()
19
+ with trace("Deep Research Orchestrator", trace_id=trace_id):
20
+ print(f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}")
21
+ yield f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}"
22
+ print("Starting research...")
23
+ search_plan = await self.plan_searches(query)
24
+
25
+ if not search_plan or not getattr(search_plan, "searches", []):
26
+ note = getattr(search_plan, "note", "")
27
+ if "unparliamentary" in note.lower():
28
+ print("⚠️ Guardrail triggered – unparliamentary language detected.")
29
+ yield note
30
+ else:
31
+ yield note or "No search results found, ending research."
32
+ return
33
+
34
+ yield "Searches planned, starting to search..."
35
+ search_results = await self.perform_searches(search_plan)
36
+ yield "Searches complete, writing report..."
37
+ report = await self.write_report(query, search_results)
38
+ yield "Report written, sending email..."
39
+ # await self.send_email(report)
40
+ # yield "Email sent, research complete"
41
+ yield report.markdown_report
42
+
43
+ @log_call
44
+ async def plan_searches(self, query: str) -> WebSearchPlan:
45
+ """Plan the searches to perform for the query."""
46
+ print("Planning searches...")
47
+
48
+ try:
49
+ result = await Runner.run(
50
+ planner_agent, # use self. unless global
51
+ f"Query: {query}",
52
+ session=self.session,
53
+ )
54
+
55
+ print(f"Will perform {len(result.final_output.searches)} searches")
56
+ return result.final_output_as(WebSearchPlan)
57
+
58
+ except InputGuardrailTripwireTriggered as e:
59
+ explanation = getattr(e, "result", {}).get("output_info", {}).get(
60
+ "found_unparliamentary_word", {}
61
+ ).get("explanation", "")
62
+ print("⚠️ Guardrail triggered – unparliamentary language detected.")
63
+ return WebSearchPlan(searches=[], note=f"Blocked due to unparliamentary input. {explanation}")
64
+
65
+ except Exception as e:
66
+ print(f"❌ Error during planning: {e}")
67
+ return WebSearchPlan(searches=[], note="An error occurred while planning searches.")
68
+
69
+ @log_call
70
+ async def perform_searches(self, search_plan: WebSearchPlan) -> list[str]:
71
+ """ Perform the searches to perform for the query """
72
+ print("Searching...")
73
+ num_completed = 0
74
+ tasks = [asyncio.create_task(self.search(item)) for item in search_plan.searches]
75
+ results = []
76
+ for task in asyncio.as_completed(tasks):
77
+ result = await task
78
+ if result is not None:
79
+ results.append(result)
80
+ num_completed += 1
81
+ print(f"Searching... {num_completed}/{len(tasks)} completed")
82
+ print("Finished searching")
83
+ return results
84
+
85
+ @log_call
86
+ async def search(self, item: WebSearchItem) -> str | None:
87
+ """ Perform a search for the query """
88
+ input = f"Search term: {item.query}\nReason for searching: {item.reason}"
89
+ try:
90
+ result = await Runner.run(
91
+ search_agent,
92
+ input,
93
+ )
94
+ return str(result.final_output)
95
+ except Exception:
96
+ return None
97
+
98
+ @log_call
99
+ async def write_report(self, query: str, search_results: list[str]) -> ReportData:
100
+ """ Write the report for the query """
101
+ print("Thinking about report...")
102
+ input = f"Original query: {query}\nSummarized search results: {search_results}"
103
+ result = await Runner.run(
104
+ writer_agent,
105
+ input,
106
+ )
107
+
108
+ print("Finished writing report")
109
+ return result.final_output_as(ReportData)
110
+
111
+ @log_call
112
+ async def send_email(self, report: ReportData) -> None:
113
+ print("Writing email...")
114
+ result = await Runner.run(
115
+ email_agent,
116
+ report.markdown_report,
117
+ )
118
+ print("Email sent")
119
+ return report
src/deep-research/appagents/planner_agent.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydantic import BaseModel, Field
3
+ from agents import Agent, OpenAIChatCompletionsModel
4
+ from openai import AsyncOpenAI
5
+ from tools.time_tools import TimeTools
6
+ from appagents.guardrail_agent import guardrail_against_unparliamentary
7
+
8
+ HOW_MANY_SEARCHES = 10
9
+
10
+ INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of web searches \
11
+ to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for. \
12
+ Use the tool to find current date & time, and use it where relevant to inform your search and summary."
13
+
14
+
15
+ class WebSearchItem(BaseModel):
16
+ reason: str = Field(description="Your reasoning for why this search is important to the query.")
17
+ query: str = Field(description="The search term to use for the web search.")
18
+ current_date_time: str = Field(description="Current date and time.")
19
+
20
+
21
+ class WebSearchPlan(BaseModel):
22
+ searches: list[WebSearchItem] = Field(description="A list of web searches to perform to best answer the query.")
23
+
24
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
25
+ google_api_key = os.getenv('GOOGLE_API_KEY')
26
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
27
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
28
+
29
+ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
30
+ groq_api_key = os.getenv('GROQ_API_KEY')
31
+ groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
32
+ groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
33
+
34
+ openai_model = "gpt-4.1-mini"
35
+
36
+ # Note: Many models do not like tool call and json output_schema used together.
37
+
38
+ planner_agent = Agent(
39
+ name="PlannerAgent",
40
+ instructions=INSTRUCTIONS,
41
+ model=openai_model,
42
+ tools=[TimeTools.current_datetime],
43
+ output_type=WebSearchPlan,
44
+ input_guardrails=[guardrail_against_unparliamentary],
45
+ )
src/deep-research/appagents/search_agent.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from agents import Agent, OpenAIChatCompletionsModel, WebSearchTool
3
+ from openai import AsyncOpenAI
4
+
5
+ from agents.model_settings import ModelSettings
6
+ from tools.google_tools import GoogleTools
7
+
8
+ # INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and \
9
+ # produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 \
10
+ # words. Capture the main points. Write succintly, no need to have complete sentences or good \
11
+ # grammar. This will be consumed by someone synthesizing a report, so it's vital you capture the \
12
+ # essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
13
+
14
+ # INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web and produce a detailed synthesis of the results. \
15
+ # The output must be structured into sections, one for each search result provided by the tool. \
16
+ # For each result, you MUST include the full link/URL and the title. \
17
+ # Your response should capture the main points and relevant details from all sources. \
18
+ # Do not add any personal commentary, introductions, or conclusions. \
19
+ # Format the entire output as a single, detailed block of text in markdown format, ensuring ALL source links are visible and preserved."
20
+
21
+ INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and \
22
+ produce a concise summary of the results. The summary must 3-5 paragraphs and less than 500 \
23
+ words. Capture the main points. Write succintly, no need to have complete sentences or good \
24
+ grammar. This will be consumed by someone synthesizing a report, so it's vital you capture the \
25
+ essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
26
+
27
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
28
+ google_api_key = os.getenv('GOOGLE_API_KEY')
29
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
30
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
31
+
32
+ # search_agent = Agent(
33
+ # name="Search agent",
34
+ # instructions=INSTRUCTIONS,
35
+ # tools=[WebSearchTool(search_context_size="low")],
36
+ # # tools=[GoogleTools.search],
37
+ # model="gpt-4o-mini",
38
+ # model_settings=ModelSettings(tool_choice="required"),
39
+ # )
40
+
41
+ # -----------------------------
42
+ # CONNECT TO MCP SERVER
43
+ # -----------------------------
44
+ async def setup_mcp_tools():
45
+ """
46
+ Starts the MCP server via stdio and returns its list of tools
47
+ that can be attached to the agent.
48
+ """
49
+ # Absolute path ensures the script is found even from a notebook
50
+ import os
51
+ script_path = os.path.abspath("../mcp/search-server.py")
52
+
53
+ params = {
54
+ "command": "uvx", # or "uv" depending on your environment
55
+ "args": ["run", script_path],
56
+ }
57
+
58
+ # Start MCP server and list available tools
59
+ async with MCPServerStdio(
60
+ params=params,
61
+ client_session_timeout_seconds=60,
62
+ verbose=True, # helpful for debugging
63
+ ) as server:
64
+ mcp_tools = await server.list_tools()
65
+ print(f"✅ Connected to MCP server with {len(mcp_tools)} tool(s).")
66
+ return mcp_tools
67
+
68
+ # # Note: Gemini does not like
69
+ # search_agent = Agent(
70
+ # name="Search agent",
71
+ # instructions=INSTRUCTIONS,
72
+ # # tools=[WebSearchTool(search_context_size="low")],
73
+ # tools=[GoogleTools.search],
74
+ # model=gemini_model,
75
+ # model_settings=ModelSettings(tool_choice="required"),
76
+ # )
77
+
78
+
79
+ search_agent = Agent(
80
+ name="Search agent",
81
+ instructions=INSTRUCTIONS,
82
+ # tools=[WebSearchTool(search_context_size="low")],
83
+ tools=[GoogleTools.search],
84
+ model=gemini_model,
85
+ model_settings=ModelSettings(tool_choice="required"),
86
+ )
87
+
src/deep-research/appagents/writer_agent.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydantic import BaseModel, Field
3
+ from agents import Agent, OpenAIChatCompletionsModel, WebSearchTool
4
+ from openai import AsyncOpenAI
5
+
6
+ INSTRUCTIONS = (
7
+ "You are a senior researcher tasked with writing a cohesive report for a research query. "
8
+ "You will be provided with the original query, and some initial research done by a research assistant.\n"
9
+ "You should first come up with an outline for the report that describes the structure and "
10
+ "flow of the report. Then, generate the report and return that as your final output.\n"
11
+ "The final output should be in markdown format, and it should be lengthy and detailed. Aim "
12
+ "for 5-10 pages of content, at least 1000 words."
13
+ )
14
+
15
+
16
+ class ReportData(BaseModel):
17
+ short_summary: str = Field(description="A short 2-3 sentence summary of the findings.")
18
+
19
+ markdown_report: str = Field(description="The final report")
20
+
21
+ follow_up_questions: list[str] = Field(description="Suggested topics to research further")
22
+
23
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
24
+ google_api_key = os.getenv('GOOGLE_API_KEY')
25
+ gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
26
+ gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
27
+
28
+
29
+ # writer_agent = Agent(
30
+ # name="WriterAgent",
31
+ # instructions=INSTRUCTIONS,
32
+ # model="gpt-5-mini",
33
+ # output_type=ReportData,
34
+ # )
35
+
36
+ writer_agent = Agent(
37
+ name="WriterAgent",
38
+ instructions=INSTRUCTIONS,
39
+ model=gemini_model,
40
+ output_type=ReportData,
41
+ )
src/deep-research/core/__init__.py ADDED
File without changes
src/deep-research/core/logger.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ import datetime
3
+
4
+ def log_call(func):
5
+ """
6
+ A decorator that logs when a function is called and when it finishes.
7
+ """
8
+ @functools.wraps(func)
9
+ def wrapper(*args, **kwargs):
10
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
11
+ arg_list = ", ".join(
12
+ [repr(a) for a in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
13
+ )
14
+ print(f"[{timestamp}] 🚀 Calling: {func.__name__}({arg_list})")
15
+ try:
16
+ result = func(*args, **kwargs)
17
+ # print(f"[{timestamp}] ✅ Finished: {func.__name__}")
18
+ return result
19
+ except Exception as e:
20
+ print(f"[{timestamp}] ❌ Error in {func.__name__}: {e}")
21
+ raise
22
+ return wrapper
src/deep-research/prompts/__init__.py ADDED
File without changes
src/deep-research/tools/__init__.py ADDED
File without changes
src/deep-research/tools/google_tools.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+ from agents import function_tool
5
+ from core.logger import log_call
6
+
7
+ # Load environment variables once
8
+ load_dotenv()
9
+
10
+
11
+ # ============================================================
12
+ # 🔹 GOOGLE SEARCH TOOLSET (Serper.dev API)
13
+ # ============================================================
14
+ class GoogleTools:
15
+ """
16
+ GoogleTools provides function tools to perform web searches
17
+ using the Serper.dev API (Google Search). I am a fallback for
18
+ retrieving recent information from the web.
19
+ """
20
+
21
+ @staticmethod
22
+ @function_tool
23
+ @log_call
24
+ def search(query: str, num_results: int = 3) -> str:
25
+ """
26
+ Perform a general Google search using Serper.dev API.
27
+
28
+ Parameters:
29
+ -----------
30
+ query : str
31
+ The search query string, e.g., "latest Tesla stock news".
32
+ num_results : int, optional (default=3)
33
+ Maximum number of search results to return.
34
+
35
+ Returns:
36
+ --------
37
+ str
38
+ Nicely formatted search results.
39
+ """
40
+ try:
41
+ api_key = os.getenv("SERPER_API_KEY")
42
+ if not api_key:
43
+ return "❌ Missing SERPER_API_KEY in environment variables."
44
+
45
+ url = "https://google.serper.dev/search"
46
+ headers = {
47
+ "X-API-KEY": api_key,
48
+ "Content-Type": "application/json"
49
+ }
50
+ payload = {
51
+ "q": query,
52
+ "gl": "us", # country code (optional)
53
+ "hl": "en", # language code (optional)
54
+ }
55
+
56
+ response = requests.post(url, headers=headers, json=payload)
57
+ response.raise_for_status()
58
+ data = response.json()
59
+
60
+ organic_results = data.get("organic", [])
61
+ if not organic_results:
62
+ return "No search results found."
63
+
64
+ formatted = []
65
+ for item in organic_results[:num_results]:
66
+ title = item.get("title", "No title")
67
+ link = item.get("link", "No link")
68
+ snippet = item.get("snippet", "")
69
+ formatted.append(
70
+ f"Title: {title}\nLink: {link}\nSnippet: {snippet}\n"
71
+ )
72
+ # print(formatted[-1]) # Log each result
73
+
74
+ return "\n".join(formatted)
75
+
76
+ except requests.exceptions.RequestException as e:
77
+ return f"⚠️ Network error during Google search: {e}"
78
+ except Exception as e:
79
+ return f"⚠️ Error performing Google search: {e}"
80
+
81
+
82
+ # ============================================================
83
+ # 🔹 OPENAI & OTHER MODEL TOOLS
84
+ # ============================================================
85
+ class ModelTools:
86
+ """
87
+ ModelTools provides function tools to interact with LLM APIs
88
+ such as OpenAI, Gemini, or Groq.
89
+
90
+ Features:
91
+ - Send prompts to a language model.
92
+ - Receive structured text completions.
93
+ - Can be extended to support multiple LLM providers.
94
+ """
95
+
96
+ @staticmethod
97
+ @function_tool
98
+ def query_openai(prompt: str, model: str = "gpt-4o-mini") -> str:
99
+ """
100
+ Query an OpenAI language model with a prompt.
101
+
102
+ Parameters:
103
+ -----------
104
+ prompt : str
105
+ User-provided prompt for the model.
106
+ model : str, optional (default="gpt-4o-mini")
107
+ Model name to query (e.g., "gpt-4o-mini", "gpt-4").
108
+
109
+ Returns:
110
+ --------
111
+ str
112
+ Model's response content as text.
113
+ If an error occurs (network/API), returns an error message.
114
+
115
+ Example:
116
+ --------
117
+ query_openai("Explain AI in finance")
118
+
119
+ Output:
120
+ "AI in finance refers to the use of machine learning and natural language
121
+ processing techniques to automate trading, risk assessment, and customer service..."
122
+ """
123
+ try:
124
+ from openai import OpenAI # delayed import
125
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
126
+ response = client.chat.completions.create(
127
+ model=model,
128
+ messages=[{"role": "user", "content": prompt}],
129
+ )
130
+ return response.choices[0].message.content
131
+ except Exception as e:
132
+ return f"Error querying OpenAI API: {e}"
src/deep-research/tools/time_tools.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from agents import function_tool
3
+ from core.logger import log_call
4
+
5
+ class TimeTools:
6
+ """Provides tools related to current date and time."""
7
+
8
+ @staticmethod
9
+ @function_tool
10
+ @log_call
11
+ def current_datetime(format: str = "%Y-%m-%d %H:%M:%S") -> str:
12
+ """
13
+ Returns the current date and time as a formatted string.
14
+
15
+ Args:
16
+ format (str): Optional datetime format (default: "YYYY-MM-DD HH:MM:SS")
17
+
18
+ Returns:
19
+ str: Current date and time in the specified format
20
+ """
21
+ now = datetime.now()
22
+ return now.strftime(format)
uv.lock CHANGED
@@ -14,6 +14,7 @@ dependencies = [
14
  { name = "autogen-ext", extra = ["grpc", "mcp", "ollama", "openai"] },
15
  { name = "beautifulsoup4" },
16
  { name = "chromadb" },
 
17
  { name = "ddgs" },
18
  { name = "duckduckgo-search" },
19
  { name = "faiss-cpu" },
@@ -52,6 +53,7 @@ dependencies = [
52
  { name = "reportlab" },
53
  { name = "requests" },
54
  { name = "scikit-learn" },
 
55
  { name = "sentence-transformers" },
56
  { name = "serpapi" },
57
  { name = "smithery" },
@@ -79,6 +81,7 @@ requires-dist = [
79
  { name = "autogen-ext", extras = ["grpc", "mcp", "ollama", "openai"], specifier = ">=0.7.5" },
80
  { name = "beautifulsoup4", specifier = ">=4.12.3" },
81
  { name = "chromadb", specifier = "==1.3.5" },
 
82
  { name = "ddgs", specifier = ">=9.9.2" },
83
  { name = "duckduckgo-search" },
84
  { name = "faiss-cpu", specifier = ">=1.13.0" },
@@ -117,6 +120,7 @@ requires-dist = [
117
  { name = "reportlab", specifier = ">=4.4.5" },
118
  { name = "requests", specifier = ">=2.32.3" },
119
  { name = "scikit-learn", specifier = ">=1.7.2" },
 
120
  { name = "sentence-transformers", specifier = ">=5.1.2" },
121
  { name = "serpapi" },
122
  { name = "smithery", specifier = ">=0.4.4" },
@@ -719,6 +723,31 @@ wheels = [
719
  { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" },
720
  ]
721
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
722
  [[package]]
723
  name = "ddgs"
724
  version = "9.9.2"
@@ -757,6 +786,15 @@ wheels = [
757
  { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" },
758
  ]
759
 
 
 
 
 
 
 
 
 
 
760
  [[package]]
761
  name = "distro"
762
  version = "1.9.0"
@@ -917,6 +955,11 @@ wheels = [
917
  { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
918
  ]
919
 
 
 
 
 
 
920
  [[package]]
921
  name = "gitdb"
922
  version = "4.0.12"
@@ -2039,6 +2082,22 @@ wheels = [
2039
  { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
2040
  ]
2041
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2042
  [[package]]
2043
  name = "multitasking"
2044
  version = "0.0.12"
@@ -3083,6 +3142,15 @@ wheels = [
3083
  { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
3084
  ]
3085
 
 
 
 
 
 
 
 
 
 
3086
  [[package]]
3087
  name = "python-multipart"
3088
  version = "0.0.20"
@@ -3364,6 +3432,20 @@ wheels = [
3364
  { url = "https://files.pythonhosted.org/packages/ce/69/c5c7807fd007dad4f48e0a5f2153038dc96e8725d3345b9ee31b2b7bed46/scipy-1.16.3-cp312-cp312-win_arm64.whl", hash = "sha256:a8a26c78ef223d3e30920ef759e25625a0ecdd0d60e5a8818b7513c3e5384cf2", size = 25463014, upload-time = "2025-10-28T17:33:25.975Z" },
3365
  ]
3366
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3367
  [[package]]
3368
  name = "sentence-transformers"
3369
  version = "5.1.2"
@@ -4019,6 +4101,18 @@ wheels = [
4019
  { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
4020
  ]
4021
 
 
 
 
 
 
 
 
 
 
 
 
 
4022
  [[package]]
4023
  name = "wikipedia"
4024
  version = "1.4.0"
 
14
  { name = "autogen-ext", extra = ["grpc", "mcp", "ollama", "openai"] },
15
  { name = "beautifulsoup4" },
16
  { name = "chromadb" },
17
+ { name = "datasets" },
18
  { name = "ddgs" },
19
  { name = "duckduckgo-search" },
20
  { name = "faiss-cpu" },
 
53
  { name = "reportlab" },
54
  { name = "requests" },
55
  { name = "scikit-learn" },
56
+ { name = "sendgrid" },
57
  { name = "sentence-transformers" },
58
  { name = "serpapi" },
59
  { name = "smithery" },
 
81
  { name = "autogen-ext", extras = ["grpc", "mcp", "ollama", "openai"], specifier = ">=0.7.5" },
82
  { name = "beautifulsoup4", specifier = ">=4.12.3" },
83
  { name = "chromadb", specifier = "==1.3.5" },
84
+ { name = "datasets", specifier = ">=4.4.1" },
85
  { name = "ddgs", specifier = ">=9.9.2" },
86
  { name = "duckduckgo-search" },
87
  { name = "faiss-cpu", specifier = ">=1.13.0" },
 
120
  { name = "reportlab", specifier = ">=4.4.5" },
121
  { name = "requests", specifier = ">=2.32.3" },
122
  { name = "scikit-learn", specifier = ">=1.7.2" },
123
+ { name = "sendgrid" },
124
  { name = "sentence-transformers", specifier = ">=5.1.2" },
125
  { name = "serpapi" },
126
  { name = "smithery", specifier = ">=0.4.4" },
 
723
  { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" },
724
  ]
725
 
726
+ [[package]]
727
+ name = "datasets"
728
+ version = "4.4.1"
729
+ source = { registry = "https://pypi.org/simple" }
730
+ dependencies = [
731
+ { name = "dill" },
732
+ { name = "filelock" },
733
+ { name = "fsspec", extra = ["http"] },
734
+ { name = "httpx" },
735
+ { name = "huggingface-hub" },
736
+ { name = "multiprocess" },
737
+ { name = "numpy" },
738
+ { name = "packaging" },
739
+ { name = "pandas" },
740
+ { name = "pyarrow" },
741
+ { name = "pyyaml" },
742
+ { name = "requests" },
743
+ { name = "tqdm" },
744
+ { name = "xxhash" },
745
+ ]
746
+ sdist = { url = "https://files.pythonhosted.org/packages/93/bf/0dae295d6d1ba0b1a200a9dd216838464b5bbd05da01407cb1330b377445/datasets-4.4.1.tar.gz", hash = "sha256:80322699aa8c0bbbdb7caa87906da689c3c2e29523cff698775c67f28fdab1fc", size = 585341, upload-time = "2025-11-05T16:00:38.162Z" }
747
+ wheels = [
748
+ { url = "https://files.pythonhosted.org/packages/3b/5e/6f8d874366788ad5d549e9ba258037d974dda6e004843be1bda794571701/datasets-4.4.1-py3-none-any.whl", hash = "sha256:c1163de5211e42546079ab355cc0250c7e6db16eb209ac5ac6252f801f596c44", size = 511591, upload-time = "2025-11-05T16:00:36.365Z" },
749
+ ]
750
+
751
  [[package]]
752
  name = "ddgs"
753
  version = "9.9.2"
 
786
  { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" },
787
  ]
788
 
789
+ [[package]]
790
+ name = "dill"
791
+ version = "0.4.0"
792
+ source = { registry = "https://pypi.org/simple" }
793
+ sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
794
+ wheels = [
795
+ { url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" },
796
+ ]
797
+
798
  [[package]]
799
  name = "distro"
800
  version = "1.9.0"
 
955
  { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
956
  ]
957
 
958
+ [package.optional-dependencies]
959
+ http = [
960
+ { name = "aiohttp" },
961
+ ]
962
+
963
  [[package]]
964
  name = "gitdb"
965
  version = "4.0.12"
 
2082
  { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
2083
  ]
2084
 
2085
+ [[package]]
2086
+ name = "multiprocess"
2087
+ version = "0.70.18"
2088
+ source = { registry = "https://pypi.org/simple" }
2089
+ dependencies = [
2090
+ { name = "dill" },
2091
+ ]
2092
+ sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" }
2093
+ wheels = [
2094
+ { url = "https://files.pythonhosted.org/packages/ba/d8/0cba6cf51a1a31f20471fbc823a716170c73012ddc4fb85d706630ed6e8f/multiprocess-0.70.18-py310-none-any.whl", hash = "sha256:60c194974c31784019c1f459d984e8f33ee48f10fcf42c309ba97b30d9bd53ea", size = 134948, upload-time = "2025-04-17T03:11:20.223Z" },
2095
+ { url = "https://files.pythonhosted.org/packages/4b/88/9039f2fed1012ef584751d4ceff9ab4a51e5ae264898f0b7cbf44340a859/multiprocess-0.70.18-py311-none-any.whl", hash = "sha256:5aa6eef98e691281b3ad923be2832bf1c55dd2c859acd73e5ec53a66aae06a1d", size = 144462, upload-time = "2025-04-17T03:11:21.657Z" },
2096
+ { url = "https://files.pythonhosted.org/packages/bf/b6/5f922792be93b82ec6b5f270bbb1ef031fd0622847070bbcf9da816502cc/multiprocess-0.70.18-py312-none-any.whl", hash = "sha256:9b78f8e5024b573730bfb654783a13800c2c0f2dfc0c25e70b40d184d64adaa2", size = 150287, upload-time = "2025-04-17T03:11:22.69Z" },
2097
+ { url = "https://files.pythonhosted.org/packages/3b/c3/ca84c19bd14cdfc21c388fdcebf08b86a7a470ebc9f5c3c084fc2dbc50f7/multiprocess-0.70.18-py38-none-any.whl", hash = "sha256:dbf705e52a154fe5e90fb17b38f02556169557c2dd8bb084f2e06c2784d8279b", size = 132636, upload-time = "2025-04-17T03:11:24.936Z" },
2098
+ { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
2099
+ ]
2100
+
2101
  [[package]]
2102
  name = "multitasking"
2103
  version = "0.0.12"
 
3142
  { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
3143
  ]
3144
 
3145
+ [[package]]
3146
+ name = "python-http-client"
3147
+ version = "3.3.7"
3148
+ source = { registry = "https://pypi.org/simple" }
3149
+ sdist = { url = "https://files.pythonhosted.org/packages/56/fa/284e52a8c6dcbe25671f02d217bf2f85660db940088faf18ae7a05e97313/python_http_client-3.3.7.tar.gz", hash = "sha256:bf841ee45262747e00dec7ee9971dfb8c7d83083f5713596488d67739170cea0", size = 9377, upload-time = "2022-03-09T20:23:56.386Z" }
3150
+ wheels = [
3151
+ { url = "https://files.pythonhosted.org/packages/29/31/9b360138f4e4035ee9dac4fe1132b6437bd05751aaf1db2a2d83dc45db5f/python_http_client-3.3.7-py3-none-any.whl", hash = "sha256:ad371d2bbedc6ea15c26179c6222a78bc9308d272435ddf1d5c84f068f249a36", size = 8352, upload-time = "2022-03-09T20:23:54.862Z" },
3152
+ ]
3153
+
3154
  [[package]]
3155
  name = "python-multipart"
3156
  version = "0.0.20"
 
3432
  { url = "https://files.pythonhosted.org/packages/ce/69/c5c7807fd007dad4f48e0a5f2153038dc96e8725d3345b9ee31b2b7bed46/scipy-1.16.3-cp312-cp312-win_arm64.whl", hash = "sha256:a8a26c78ef223d3e30920ef759e25625a0ecdd0d60e5a8818b7513c3e5384cf2", size = 25463014, upload-time = "2025-10-28T17:33:25.975Z" },
3433
  ]
3434
 
3435
+ [[package]]
3436
+ name = "sendgrid"
3437
+ version = "6.12.5"
3438
+ source = { registry = "https://pypi.org/simple" }
3439
+ dependencies = [
3440
+ { name = "cryptography" },
3441
+ { name = "python-http-client" },
3442
+ { name = "werkzeug" },
3443
+ ]
3444
+ sdist = { url = "https://files.pythonhosted.org/packages/da/fa/f718b2b953f99c1f0085811598ac7e31ccbd4229a81ec2a5290be868187a/sendgrid-6.12.5.tar.gz", hash = "sha256:ea9aae30cd55c332e266bccd11185159482edfc07c149b6cd15cf08869fabdb7", size = 50310, upload-time = "2025-09-19T06:23:09.229Z" }
3445
+ wheels = [
3446
+ { url = "https://files.pythonhosted.org/packages/bd/55/b3c3880a77082e8f7374954e0074aafafaa9bc78bdf9c8f5a92c2e7afc6a/sendgrid-6.12.5-py3-none-any.whl", hash = "sha256:96f92cc91634bf552fdb766b904bbb53968018da7ae41fdac4d1090dc0311ca8", size = 102173, upload-time = "2025-09-19T06:23:07.93Z" },
3447
+ ]
3448
+
3449
  [[package]]
3450
  name = "sentence-transformers"
3451
  version = "5.1.2"
 
4101
  { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
4102
  ]
4103
 
4104
+ [[package]]
4105
+ name = "werkzeug"
4106
+ version = "3.1.4"
4107
+ source = { registry = "https://pypi.org/simple" }
4108
+ dependencies = [
4109
+ { name = "markupsafe" },
4110
+ ]
4111
+ sdist = { url = "https://files.pythonhosted.org/packages/45/ea/b0f8eeb287f8df9066e56e831c7824ac6bab645dd6c7a8f4b2d767944f9b/werkzeug-3.1.4.tar.gz", hash = "sha256:cd3cd98b1b92dc3b7b3995038826c68097dcb16f9baa63abe35f20eafeb9fe5e", size = 864687, upload-time = "2025-11-29T02:15:22.841Z" }
4112
+ wheels = [
4113
+ { url = "https://files.pythonhosted.org/packages/2f/f9/9e082990c2585c744734f85bec79b5dae5df9c974ffee58fe421652c8e91/werkzeug-3.1.4-py3-none-any.whl", hash = "sha256:2ad50fb9ed09cc3af22c54698351027ace879a0b60a3b5edf5730b2f7d876905", size = 224960, upload-time = "2025-11-29T02:15:21.13Z" },
4114
+ ]
4115
+
4116
  [[package]]
4117
  name = "wikipedia"
4118
  version = "1.4.0"