Spaces:

mishrabp
/

deep-research

Sleeping

App Files Files Community

mishrabp commited on Dec 9, 2025

Commit

d06c4bf

verified ·

1 Parent(s): 4adf224

Upload folder using huggingface_hub

Browse files

Files changed (47) hide show

Dockerfile +8 -4
common/__init__.py +0 -0
common/aagents/__init__.py +0 -0
common/aagents/google_agent.py +139 -0
common/aagents/healthcare_agent.py +100 -0
common/aagents/news_agent.py +106 -0
common/aagents/weather_agent.py +69 -0
common/aagents/web_agent.py +53 -0
common/aagents/web_research_agent.py +83 -0
common/aagents/yf_agent.py +78 -0
common/mcp/README.md +139 -0
common/mcp/__init__.py +0 -0
common/mcp/mcp_server.py +171 -0
common/mcp/tools/__init__.py +0 -0
common/mcp/tools/google_tools.py +139 -0
common/mcp/tools/news_tools.py +200 -0
common/mcp/tools/rag_tool.py +106 -0
common/mcp/tools/search_tools.py +115 -0
common/mcp/tools/time_tools.py +32 -0
common/mcp/tools/weather_tools.py +235 -0
common/mcp/tools/yf_tools.py +192 -0
common/rag/rag.py +94 -0
common/utility/__init__.py +0 -0
common/utility/embedding_factory.py +49 -0
common/utility/llm_factory.py +130 -0
common/utility/llm_factory2.py +75 -0
common/utility/logger.py +22 -0
pyproject.toml +2 -0
run.py +215 -11
src/deep-research/.env.name +9 -0
src/deep-research/Dockerfile +35 -0
src/deep-research/README.md +191 -0
src/deep-research/app.py +299 -0
src/deep-research/appagents/__init__.py +0 -0
src/deep-research/appagents/email_agent.py +32 -0
src/deep-research/appagents/guardrail_agent.py +45 -0
src/deep-research/appagents/orchestrator.py +119 -0
src/deep-research/appagents/planner_agent.py +45 -0
src/deep-research/appagents/search_agent.py +87 -0
src/deep-research/appagents/writer_agent.py +41 -0
src/deep-research/core/__init__.py +0 -0
src/deep-research/core/logger.py +22 -0
src/deep-research/prompts/__init__.py +0 -0
src/deep-research/tools/__init__.py +0 -0
src/deep-research/tools/google_tools.py +132 -0
src/deep-research/tools/time_tools.py +22 -0
uv.lock +94 -0

Dockerfile CHANGED Viewed

@@ -2,7 +2,7 @@ FROM python:3.12-slim
 ENV PYTHONUNBUFFERED=1 \
     DEBIAN_FRONTEND=noninteractive \
-    PYTHONPATH=/app:$PYTHONPATH
 WORKDIR /app
@@ -19,13 +19,17 @@ ENV PATH="/root/.local/bin:$PATH"
 COPY pyproject.toml .
 COPY uv.lock .
 # Install dependencies using uv, then export and install with pip to system
 RUN uv sync --frozen --no-dev && \
     uv pip install -e . --system
-# Copy your source code
-COPY . .
 EXPOSE 7860
-CMD ["streamlit", "run", "ui/app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.headless=true"]

 ENV PYTHONUNBUFFERED=1 \
     DEBIAN_FRONTEND=noninteractive \
+    PYTHONPATH=/app:/app/common:$PYTHONPATH
 WORKDIR /app
 COPY pyproject.toml .
 COPY uv.lock .
+# Copy required folders
+COPY common/ ./common/
+COPY src/deep-research/ ./src/deep-research/
 # Install dependencies using uv, then export and install with pip to system
 RUN uv sync --frozen --no-dev && \
     uv pip install -e . --system
+# Copy entry point
+COPY run.py .
 EXPOSE 7860
+CMD ["python", "run.py", "deep-research", "--port", "7860"]

common/__init__.py ADDED Viewed

File without changes

common/aagents/__init__.py ADDED Viewed

File without changes

common/aagents/google_agent.py ADDED Viewed

	@@ -0,0 +1,139 @@

+"""Google search agent module for web search and information retrieval."""
+import os
+from agents import Agent, OpenAIChatCompletionsModel
+from dotenv import load_dotenv
+from mcp.tools.google_tools import google_search, google_search_recent
+from mcp.tools.search_tools import duckduckgo_search, fetch_page_content
+from mcp.tools.time_tools import current_datetime
+from openai import AsyncOpenAI
+# ---------------------------------------------------------
+# Load environment variables
+# ---------------------------------------------------------
+load_dotenv()
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
+GROQ_BASE_URL = "https://api.groq.com/openai/v1"
+groq_api_key = os.getenv('GROQ_API_KEY')
+groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
+groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
+google_agent = Agent(
+    name="GoogleSearchAgent",
+    model=gemini_model,
+    tools=[current_datetime, google_search, google_search_recent, duckduckgo_search, fetch_page_content],
+    instructions="""
+        You are a GoogleSearchAgent specialized in finding and retrieving information from the web.
+        Your role is to help users find accurate, relevant, and up-to-date information using web search.
+        ## Tool Priority & Usage
+        **PRIMARY TOOLS (Google via Serper.dev API):**
+        1. 'google_search': General Google search with recent results (last 24 hours by default)
+           - Use for most search queries
+           - Returns: Title, Link, Snippet
+           - Input: { "query": "search terms", "num_results": 3 }
+        2. 'google_search_recent': Time-filtered Google search
+           - Use when user specifies a time range (today, this week, this month, this year)
+           - Timeframes: "d" (day), "w" (week), "m" (month), "y" (year)
+           - Input: { "query": "search terms", "num_results": 3, "timeframe": "d" }
+        **FALLBACK TOOL (DuckDuckGo Search):**
+        3. 'duckduckgo_search': Use ONLY when Google tools fail or SERPER_API_KEY is missing
+           - Provides similar search functionality
+           - Input: { "query": "search terms", "max_results": 5, "search_type": "text", "timelimit": "d" }
+        **CONTENT EXTRACTION:**
+        4. 'fetch_page_content': Extract full text content from a specific URL
+           - Use when user wants detailed information from a specific page
+           - Use after search to get complete content for analysis
+           - Input: { "url": "https://example.com", "timeout": 3 }
+        **TIME CONTEXT:**
+        5. 'current_datetime': Get current date/time for context
+           - Input: { "format": "natural" }
+        ## Workflow
+        1. **Understand the Query**: Determine what information the user needs
+           - General search → use google_search
+           - Time-specific search → use google_search_recent with appropriate timeframe
+           - Deep dive into a page → use fetch_page_content after getting the URL
+        2. **Try Primary Tools First**: Always attempt Google tools (Serper.dev) before fallback
+        3. **Fallback if Needed**: If Google tools return an error (missing API key, no results),
+           automatically use duckduckgo_search
+        4. **Extract Content if Needed**: If user wants detailed information or summary,
+           use fetch_page_content on relevant URLs from search results
+        5. **Provide Context**: Use current_datetime when temporal context is important
+        ## Search Strategy
+        **For factual queries:**
+        - Use google_search or google_search_recent
+        - Summarize findings from multiple sources
+        - Cite sources with URLs
+        **For recent events/news:**
+        - Use google_search_recent with timeframe="d" or "w"
+        - Focus on most recent information
+        - Include publication dates if available
+        **For in-depth research:**
+        - First: Use google_search to find relevant pages
+        - Then: Use fetch_page_content to extract full content from top results
+        - Synthesize information from multiple sources
+        ## Output Format
+        Structure your response based on the query type:
+        **For Search Results:**
+        **Search Results for "[Query]"** - [Current Date]
+        1. **[Title]**
+           - Source: [URL]
+           - Summary: [Snippet or extracted info]
+        2. **[Next Result]**
+           ...
+        **Key Findings:**
+        - [Synthesized insight 1]
+        - [Synthesized insight 2]
+        **For Content Extraction:**
+        **Analysis of [Page Title]**
+        [Summarized content with key points]
+        Source: [URL]
+        ## Important Rules
+        - Always cite sources with URLs
+        - Prioritize recent information when relevant
+        - If API key is missing, inform user and use fallback automatically
+        - Never fabricate information or sources
+        - Synthesize information from multiple sources when possible
+        - Be transparent about limitations (e.g., "Based on search results from...")
+        - Use fetch_page_content sparingly (only when deep content is needed)
+        - Respect timeouts and handle errors gracefully
+        """,
+)
+__all__ = ["google_agent", "google_search", "google_search_recent", "duckduckgo_search", "fetch_page_content", "current_datetime"]

common/aagents/healthcare_agent.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""Healthcare RAG Agent - Combines RAG retrieval with web search for comprehensive medical information."""
+import os
+from agents import Agent, OpenAIChatCompletionsModel
+from dotenv import load_dotenv
+from openai import AsyncOpenAI
+# Import tools
+from mcp.tools.rag_tool import rag_search, UserContext
+from mcp.tools.search_tools import duckduckgo_search
+from mcp.tools.time_tools import current_datetime
+# ---------------------------------------------------------
+# Load environment variables
+# ---------------------------------------------------------
+load_dotenv()
+# ---------------------------------------------------------
+# Model Configuration
+# ---------------------------------------------------------
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
+GROQ_BASE_URL = "https://api.groq.com/openai/v1"
+groq_api_key = os.getenv('GROQ_API_KEY')
+groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
+groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
+# ---------------------------------------------------------
+# Healthcare RAG Agent
+# ---------------------------------------------------------
+healthcare_agent = Agent[UserContext](
+    name="HealthcareRAGAgent",
+    model=gemini_model,
+    tools=[rag_search, duckduckgo_search],
+    instructions="""
+        You are a healthcare information retrieval agent. You retrieve information from tools and synthesize it into well-formatted markdown responses.
+        ## CRITICAL RULES
+        1. **NEVER use your pre-trained knowledge** - Only use tool results
+        2. **ALWAYS call rag_search first** for every question
+        3. **Evaluate RAG results carefully** - if content is useless (just references, acknowledgments, page numbers), call duckduckgo_search
+        4. **If rag_search returns "No relevant information", MUST call duckduckgo_search**
+        5. **Synthesize tool results into clear, well-structured markdown**
+        6. **If both tools fail, say "I don't have information on this topic"**
+        ## Workflow (MANDATORY)
+        For EVERY question:
+        Step 1: Call `rag_search(query="user question")`
+        Step 2: Evaluate the result:
+        - Returns "No relevant information"? → MUST call duckduckgo_search (go to Step 3)
+        - Returns content BUT it's NOT useful (just references, acknowledgments, page numbers, file names, credits)? → MUST call duckduckgo_search (go to Step 3)
+        - Returns useful information (definitions, explanations, medical details)? → Synthesize and format (go to Step 4)
+        Step 3: Call `duckduckgo_search(params={"query": "user question", "max_results": 3})`
+        Step 4: Synthesize and format response using markdown
+        ## Response Format (Markdown)
+        ## [Topic Name]
+        [Brief introduction/definition]
+        ### Key Points
+        - **Point 1**: Description
+        - **Point 2**: Description
+        ### Detailed Information
+        [Organized paragraphs with medical details]
+        ---
+        **Source:** Knowledge Base / Web Search
+        **Disclaimer:** This information is for educational purposes only. Always consult a qualified healthcare provider for medical advice.
+        ## Critical Reminders
+        🚨 You MUST:
+        - Call rag_search first, evaluate if content is useful
+        - If RAG content is useless (references/credits), call duckduckgo_search
+        - Use proper markdown formatting
+        - Cite the source
+        🚨 You MUST NOT:
+        - Use your pre-trained knowledge
+        - Skip evaluating RAG content quality
+        - Accept useless RAG results without calling web search
+        """,
+)
+__all__ = ["healthcare_agent"]

common/aagents/news_agent.py ADDED Viewed

	@@ -0,0 +1,106 @@

+"""News agent module for fetching and analyzing news articles."""
+import os
+from agents import Agent, OpenAIChatCompletionsModel
+from dotenv import load_dotenv
+from mcp.tools.news_tools import get_top_headlines, search_news, get_news_by_category
+from mcp.tools.search_tools import duckduckgo_search
+from mcp.tools.time_tools import current_datetime
+from openai import AsyncOpenAI
+# ---------------------------------------------------------
+# Load environment variables
+# ---------------------------------------------------------
+load_dotenv()
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
+GROQ_BASE_URL = "https://api.groq.com/openai/v1"
+groq_api_key = os.getenv('GROQ_API_KEY')
+groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
+groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
+news_agent = Agent(
+    name="NewsAgent",
+    model=gemini_model,
+    tools=[current_datetime, get_top_headlines, search_news, get_news_by_category, duckduckgo_search],
+    instructions="""
+        You are a NewsAgent specialized in fetching and analyzing recent news articles and headlines.
+        Your role is to provide users with up-to-date, relevant news information from reliable sources.
+        ## Tool Priority & Usage
+        **PRIMARY TOOLS (NewsAPI.org):**
+        1. 'get_top_headlines': Fetch the latest top headlines for a specific country
+           - Use when user asks for general news, breaking news, or top stories
+           - Input: { "country": "us", "num_results": 5 }
+        2. 'search_news': Search for news articles about a specific topic
+           - Use when user asks about a specific subject, company, person, or event
+           - Input: { "query": "topic name", "num_results": 5, "days_back": 7 }
+        3. 'get_news_by_category': Fetch headlines by category
+           - Use when user asks for category-specific news (business, tech, sports, etc.)
+           - Categories: "business", "entertainment", "general", "health", "science", "sports", "technology"
+           - Input: { "category": "business", "country": "us", "num_results": 5 }
+        **FALLBACK TOOL (DuckDuckGo Search):**
+        4. 'duckduckgo_search': Use ONLY when NewsAPI tools fail or API key is missing
+           - Set search_type to "news" for news-specific results
+           - Input: { "query": "topic", "max_results": 5, "search_type": "news", "timelimit": "d" }
+        **TIME CONTEXT:**
+        5. 'current_datetime': Use to provide current date/time context in your responses
+           - Input: { "format": "natural" }
+        ## Workflow
+        1. **Determine Intent**: Understand what type of news the user wants
+           - General headlines → use get_top_headlines
+           - Topic-specific → use search_news
+           - Category-specific → use get_news_by_category
+        2. **Try Primary Tools First**: Always attempt NewsAPI tools before fallback
+        3. **Fallback if Needed**: If NewsAPI returns an error (missing API key, no results),
+           use duckduckgo_search with search_type="news"
+        4. **Include Time Context**: Use current_datetime to provide temporal context
+        5. **Format Response**: Present news in a clear, organized format with:
+           - Headlines/titles
+           - Sources
+           - Publication dates
+           - Brief summaries
+           - URLs for full articles
+        ## Output Format
+        Structure your response as:
+        **[News Category/Topic] - [Current Date]**
+        1. **[Headline]**
+           - Source: [News Source]
+           - Published: [Date/Time]
+           - Summary: [Brief description]
+           - Read more: [URL]
+        2. **[Next Headline]**
+           ...
+        ## Important Rules
+        - Always cite sources and include publication dates
+        - Prioritize recent news (within last 7 days unless specified otherwise)
+        - If API key is missing, inform the user and use the fallback tool
+        - Never fabricate news or sources
+        - Present news objectively without bias
+        - Include URLs so users can read full articles
+        - Use current_datetime to ensure temporal accuracy
+        """,
+)
+__all__ = ["news_agent", "get_top_headlines", "search_news", "get_news_by_category", "duckduckgo_search", "current_datetime"]

common/aagents/weather_agent.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""Web search agent module for internet queries."""
+import os
+from agents import Agent
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+from mcp.tools.weather_tools import get_weather_forecast, search_weather_fallback_ddgs, search_weather_fallback_bs
+from mcp.tools.time_tools import current_datetime
+from agents import Agent, OpenAIChatCompletionsModel
+from openai import AsyncOpenAI
+# ---------------------------------------------------------
+# Load environment variables
+# ---------------------------------------------------------
+load_dotenv()
+################################
+# Learning: gemini models struggles to construct the output_type when it's a Pydantic model.
+# So we use list[dict] as output_type instead of list[searchResult].
+# Then in the calling code, we can convert dicts back to searchResult models if needed.
+################################
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-flash-latest", openai_client=gemini_client)
+GROQ_BASE_URL = "https://api.groq.com/openai/v1"
+groq_api_key = os.getenv('GROQ_API_KEY')
+groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
+groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
+weather_agent = Agent(
+    name="WeatherAgent",
+    model=gemini_model, #"gpt-4o-mini",
+    # description="An agent that can perform web searches using DuckDuckGo.",
+    tools=[current_datetime, get_weather_forecast, search_weather_fallback_ddgs, search_weather_fallback_bs],
+    instructions="""
+        You are a Weather Forecast agent who forecasts weather information ONLY.
+        You can use the 'current_datetime' tool to determine the current date as reference for the weather forecast.
+        When given a query, you use the 'get_weather_forecast' tool to retrieve weather data.
+        If the API key is missing or the API fails to get the forecast, you use the 'search_weather_fallback_ddgs' or 'search_weather_fallback_bs' as fallback tools to perform a web search for weather information.
+        Tool: get_weather_forecast Input:
+        A JSON object with the following structure:
+            {   "city": "The city name to get the weather for.",
+                "date": "Optional date in YYYY-MM-DD format to get the forecast for a specific day. If not provided, return the current weather."
+            }
+        Output the weather information MUST be in a JSON well-formatted form as below:
+        {
+        "city": "City name",
+        "forecasts": [
+            {
+                "date": "Date of the forecast in YYYY-MM-DD format",
+                "weather": {
+                    "description": "Weather description",
+                    "temperature": "Temperature in Fahrenheit. Report both the high and low temperatures.",
+                    "humidity": "Humidity percentage",
+                    "wind_speed": "Wind speed in Miles per Hour (MPH)"
+                }
+            }.
+        ]
+        """,
+    # output_type=AgentOutputSchema(list[searchResult], strict_json_schema=False),
+    # output_type=list[dict],  # safer than list[searchResult],
+    # output_type=list[searchResult],
+)
+__all__ = ["weather_agent", "get_weather_forecast", "search_weather_fallback_ddgs", "search_weather_fallback_bs"]

common/aagents/web_agent.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""Web search agent module for internet queries."""
+import os
+from agents import AgentOutputSchema, function_tool, Agent
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+from mcp.tools.search_tools import duckduckgo_search, searchQuery, searchResult
+from agents import Agent, OpenAIChatCompletionsModel
+from openai import AsyncOpenAI
+# ---------------------------------------------------------
+# Load environment variables
+# ---------------------------------------------------------
+load_dotenv()
+################################
+# Learning: gemini models struggles to construct the output_type when it's a Pydantic model.
+# So we use list[dict] as output_type instead of list[searchResult].
+# Then in the calling code, we can convert dicts back to searchResult models if needed.
+################################
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
+GROQ_BASE_URL = "https://api.groq.com/openai/v1"
+groq_api_key = os.getenv('GROQ_API_KEY')
+groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
+groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
+web_agent = Agent(
+    name="WebAgent",
+    model="gpt-4o-mini",
+    # description="An agent that can perform web searches using DuckDuckGo.",
+    tools=[duckduckgo_search],
+    instructions="""
+        You are a WebAgent that can perform web searches to find information on the internet.
+        When given a query, use the 'duckduckgo_search' tool to retrieve relevant search results.
+        Tool: duckduckgo_search Input:
+        A JSON object with the following structure:
+            {   "query": "The search query string.",
+                "max_results": "The maximum number of search results to return (default is 5).",
+                "search_type": "The type of search to perform. Options: 'text' (default) or 'news'. Use 'news' to get publication dates.",
+                "timelimit": "Time limit for search results. Options: 'd' (day), 'w' (week), 'm' (month), 'y' (year).",
+                "region": "Region for search results (e.g., 'us-en', 'uk-en'). Default is 'wt-wt' (world)."
+            }
+        """,
+    # output_type=AgentOutputSchema(list[searchResult], strict_json_schema=False),
+    # output_type=list[dict],  # safer than list[searchResult],
+    output_type=list[searchResult],
+)
+__all__ = ["web_agent", "duckduckgo_search", "searchQuery", "searchResult"]

common/aagents/web_research_agent.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""Web search agent module for internet queries."""
+import os
+from agents import AgentOutputSchema, function_tool, Agent
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+from mcp.tools.search_tools import duckduckgo_search, searchQuery, searchResult, fetch_page_content
+from agents import Agent, OpenAIChatCompletionsModel
+from openai import AsyncOpenAI
+# ---------------------------------------------------------
+# Load environment variables
+# ---------------------------------------------------------
+load_dotenv()
+################################
+# Learning: gemini models struggles to construct the output_type when it's a Pydantic model.
+# So we use list[dict] as output_type instead of list[searchResult].
+# Then in the calling code, we can convert dicts back to searchResult models if needed.
+################################
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
+GROQ_BASE_URL = "https://api.groq.com/openai/v1"
+groq_api_key = os.getenv('GROQ_API_KEY')
+groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
+groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
+web_research_agent = Agent(
+    name="WebResearchAgent",
+    model="gpt-4o-mini",
+    # description="An agent that can perform web searches using DuckDuckGo.",
+    tools=[duckduckgo_search, fetch_page_content],
+    instructions="""
+You are WebResearchAgent — an advanced internet research assistant with two core abilities:
+1) Use the tool `duckduckgo_search` to discover relevant webpages for the user’s query.
+2) Use the tool `fetch_page_content` to retrieve full text content from any webpage returned by the search tool.
+===========================
+AGENT RESPONSIBILITIES
+===========================
+• Always begin by invoking `duckduckgo_search` to gather an initial set of webpages relevant to the user's question.
+• After receiving the search results, you MUST fetch the full content for *all result URLs* by invoking
+  `fetch_page_content` once per URL.
+• These fetch calls should be made **in parallel**:
+  - Do NOT wait for one fetch call to finish before issuing the next.
+  - Issue all fetch calls immediately after you receive the search results.
+• You MUST NOT wait more than 3 seconds for any individual page to respond.
+  If content is missing or a fetch fails, continue with what you have.
+===========================
+ANALYSIS & FINAL ANSWER
+===========================
+• After search and fetch operations complete, analyze:
+  – the snippets from the search results
+  – the full content from `fetch_page_content` (for pages that responded)
+• Synthesize the collected information and provide a clear, factual, concise answer.
+• Your final output MUST be a structured, easy-to-read Markdown summary.
+===========================
+IMPORTANT RULES
+===========================
+• Never fabricate URLs or content not returned by the tools.
+• Never claim to have visited pages without using `fetch_page_content`.
+• Use the tools exactly as required — search first, fetch after.
+• The final response should answer the user’s query using the combined evidence.
+• MUST provide references to the research.
+"""
+    ,
+)
+__all__ = ["web_research_agent", "duckduckgo_search", "fetch_page_content", "searchQuery", "searchResult"]

common/aagents/yf_agent.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""Yahoo Finance agent module for financial analysis and market research."""
+import os
+from agents import Agent, OpenAIChatCompletionsModel
+from dotenv import load_dotenv
+from mcp.tools.yf_tools import get_summary, get_market_sentiment, get_history
+from mcp.tools.time_tools import current_datetime
+from openai import AsyncOpenAI
+# ---------------------------------------------------------
+# Load environment variables
+# ---------------------------------------------------------
+load_dotenv()
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
+GROQ_BASE_URL = "https://api.groq.com/openai/v1"
+groq_api_key = os.getenv('GROQ_API_KEY')
+groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
+groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
+yf_agent = Agent(
+    name="YahooFinanceAgent",
+    model=gemini_model,
+    tools=[current_datetime, get_summary, get_market_sentiment, get_history],
+    instructions="""
+        You are a specialized **Financial Analysis Agent** 💰, expert in market research, financial data retrieval, and market analysis.
+        Your primary role is to provide *actionable*, *data-driven*, and *concise* financial reports based on the available tools.
+        ## Core Directives & Priorities
+        1. **Time Sensitivity:** Always use the 'current_datetime' tool to ensure all analysis is contextually relevant to the current date and time.
+           Financial data is extremely time-sensitive.
+        2. **Financial Data Integrity:** Use the Yahoo Finance tools for specific stock/index data:
+           - 'get_summary': Get latest summary information and intraday price data for a ticker
+           - 'get_market_sentiment': Analyze recent price changes and provide market sentiment (Bullish/Bearish/Neutral)
+           - 'get_history': Fetch historical price data for a given ticker
+           Be precise about the date range and data source.
+        3. **Synthesis and Analysis:** Do not just list data. You must **synthesize** financial data (prices, volume, sentiment)
+           to provide a complete analytical perspective (e.g., "Stock X is up 5% today driven by strong market momentum").
+        4. **Professional Clarity:** Present information in a clear, professional, and structured format.
+           Use numerical data and financial terminology correctly.
+        5. **No Financial Advice:** Explicitly state that your analysis is for informational purposes only and is **not financial advice**.
+        6. **Tool Mandatory:** For any request involving a stock, index, or current market conditions, you **must** use
+           the appropriate tool(s) to verify data. **Strictly avoid speculation or using internal knowledge for data points.**
+        ## Tool Usage Examples
+        Tool: current_datetime
+        Input: { "format": "natural" }
+        Tool: get_summary
+        Input: { "symbol": "AAPL", "period": "1d", "interval": "1h" }
+        Tool: get_market_sentiment
+        Input: { "symbol": "AAPL", "period": "1mo" }
+        Tool: get_history
+        Input: { "symbol": "AAPL", "period": "1mo" }
+        ## Output Format Guidelines
+        * Use **bold** for key financial metrics (e.g., Stock Symbol, Price, Volume).
+        * Cite the tools used to obtain the data (e.g., "Data sourced from Yahoo Finance as of [Date]").
+        * If a symbol or data point cannot be found, clearly state "Data for [X] is unavailable or invalid."
+        * Always include a disclaimer: "This analysis is for informational purposes only and is not financial advice."
+        """,
+)
+__all__ = ["yf_agent", "get_summary", "get_market_sentiment", "get_history", "current_datetime"]

common/mcp/README.md ADDED Viewed

	@@ -0,0 +1,139 @@

+# MCP Tools Server
+A Model Context Protocol (MCP) server that exposes all tools from the `tools/` folder via stdio transport.
+## Features
+- **Dynamic Tool Discovery**: Automatically discovers and registers all tools from the tools folder
+- **Stdio Transport**: Compatible with Claude Desktop and other MCP clients
+- **Comprehensive Tool Coverage**: Exposes ~13 tools across 6 categories:
+  - Google Search (google_tools)
+  - News API (news_tools)
+  - DuckDuckGo Search (search_tools)
+  - Time Utilities (time_tools)
+  - Weather Forecast (weather_tools)
+  - Yahoo Finance (yf_tools)
+## Installation
+1. Install required dependencies:
+```bash
+pip install mcp requests beautifulsoup4 ddgs yfinance python-dotenv pydantic
+```
+2. Set up environment variables in `.env`:
+```bash
+# Google Search (Serper.dev)
+SERPER_API_KEY=your_serper_api_key
+# News API
+NEWS_API_KEY=your_news_api_key
+# Weather API
+OPENWEATHER_API_KEY=your_openweather_api_key
+# Google AI (for agents)
+GOOGLE_API_KEY=your_google_api_key
+# Groq (for agents)
+GROQ_API_KEY=your_groq_api_key
+```
+## Usage
+### Running the Server
+```bash
+cd common/mcp
+python mcp_server.py
+```
+The server will:
+1. Discover all tools from the `tools/` folder
+2. Print registered tools to stderr
+3. Start listening on stdio for MCP protocol messages
+### Integrating with Claude Desktop
+Add to your Claude Desktop config (`claude_desktop_config.json`):
+```json
+{
+  "mcpServers": {
+    "tools-server": {
+      "command": "python",
+      "args": ["/absolute/path/to/agenticaiprojects/common/mcp/mcp_server.py"],
+      "env": {
+        "SERPER_API_KEY": "your_key",
+        "NEWS_API_KEY": "your_key",
+        "OPENWEATHER_API_KEY": "your_key"
+      }
+    }
+  }
+}
+```
+### Available Tools
+The server exposes the following tools:
+**Google Search:**
+- `google_tools.google_search` - General Google search
+- `google_tools.google_search_recent` - Time-filtered Google search
+**News:**
+- `news_tools.get_top_headlines` - Top headlines by country
+- `news_tools.search_news` - Search news by topic
+- `news_tools.get_news_by_category` - News by category
+**Search & Content:**
+- `search_tools.duckduckgo_search` - DuckDuckGo search
+- `search_tools.fetch_page_content` - Extract page content
+**Time:**
+- `time_tools.current_datetime` - Get current date/time
+**Weather:**
+- `weather_tools.get_weather_forecast` - Weather forecast via API
+- `weather_tools.search_weather_fallback_ddgs` - Weather via DuckDuckGo
+- `weather_tools.search_weather_fallback_bs` - Weather via web scraping
+**Finance:**
+- `yf_tools.get_summary` - Stock summary
+- `yf_tools.get_market_sentiment` - Market sentiment analysis
+- `yf_tools.get_history` - Historical stock data
+## Development
+### Adding New Tools
+1. Create a new file in `tools/` folder (e.g., `my_tools.py`)
+2. Decorate functions with `@function_tool`
+3. The server will automatically discover and register them on next restart
+### Testing
+```bash
+# Test the server
+cd common/mcp
+python mcp_server.py
+# In another terminal, you can send MCP protocol messages via stdin
+# Or use an MCP client library to test
+```
+## Troubleshooting
+**Tools not discovered:**
+- Check that functions are decorated with `@function_tool`
+- Verify the module is in the `tools/` folder
+- Check stderr output for registration messages
+**API errors:**
+- Verify environment variables are set correctly
+- Check API key validity
+- Review tool-specific error messages in stderr
+## License
+Part of the agenticaiprojects repository.

common/mcp/__init__.py ADDED Viewed

File without changes

common/mcp/mcp_server.py ADDED Viewed

	@@ -0,0 +1,171 @@

+#!/usr/bin/env python3
+"""
+MCP Server with stdio transport that exposes all tools from the tools folder.
+"""
+import asyncio
+import sys
+import os
+import inspect
+import importlib
+from pathlib import Path
+from typing import Any, Callable
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from mcp.server import Server
+from mcp.server.stdio import stdio_server
+from mcp.types import Tool, TextContent
+# Initialize MCP server
+app = Server("tools-server")
+# Dictionary to store all discovered tools
+TOOLS_REGISTRY: dict[str, Callable] = {}
+def discover_tools():
+    """
+    Dynamically discover all @function_tool decorated functions from the tools folder.
+    """
+    tools_dir = Path(__file__).parent / "tools"
+    tool_modules = [
+        "google_tools",
+        "news_tools",
+        "search_tools",
+        "time_tools",
+        "weather_tools",
+        "yf_tools"
+    ]
+    print(f"[MCP Server] Discovering tools from: {tools_dir}", file=sys.stderr)
+    for module_name in tool_modules:
+        try:
+            # Import the module
+            module = importlib.import_module(f"mcp.tools.{module_name}")
+            # Find all functions in the module
+            for name, obj in inspect.getmembers(module, inspect.isfunction):
+                # Check if it has the function_tool decorator
+                # The @function_tool decorator typically adds metadata to the function
+                if hasattr(obj, '__wrapped__') or name.startswith('_'):
+                    continue
+                # Check if it's a tool by looking for common patterns
+                if callable(obj) and not name.startswith('_'):
+                    # Register the tool
+                    tool_name = f"{module_name}.{name}"
+                    TOOLS_REGISTRY[tool_name] = obj
+                    print(f"[MCP Server] Registered tool: {tool_name}", file=sys.stderr)
+        except Exception as e:
+            print(f"[MCP Server] Error loading module {module_name}: {e}", file=sys.stderr)
+    print(f"[MCP Server] Total tools registered: {len(TOOLS_REGISTRY)}", file=sys.stderr)
+@app.list_tools()
+async def list_tools() -> list[Tool]:
+    """
+    List all available tools.
+    """
+    tools = []
+    for tool_name, tool_func in TOOLS_REGISTRY.items():
+        # Extract function signature and docstring
+        sig = inspect.signature(tool_func)
+        doc = inspect.getdoc(tool_func) or "No description available"
+        # Build input schema from function parameters
+        properties = {}
+        required = []
+        for param_name, param in sig.parameters.items():
+            param_type = "string"  # Default type
+            param_desc = ""
+            # Try to infer type from annotation
+            if param.annotation != inspect.Parameter.empty:
+                annotation = param.annotation
+                if annotation == int:
+                    param_type = "integer"
+                elif annotation == bool:
+                    param_type = "boolean"
+                elif annotation == float:
+                    param_type = "number"
+            properties[param_name] = {
+                "type": param_type,
+                "description": param_desc or f"Parameter: {param_name}"
+            }
+            # Check if parameter is required (no default value)
+            if param.default == inspect.Parameter.empty:
+                required.append(param_name)
+        # Create tool definition
+        tool = Tool(
+            name=tool_name,
+            description=doc.split('\n')[0][:200],  # First line, max 200 chars
+            inputSchema={
+                "type": "object",
+                "properties": properties,
+                "required": required
+            }
+        )
+        tools.append(tool)
+    return tools
+@app.call_tool()
+async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
+    """
+    Execute a tool with the provided arguments.
+    """
+    print(f"[MCP Server] Calling tool: {name} with args: {arguments}", file=sys.stderr)
+    if name not in TOOLS_REGISTRY:
+        raise ValueError(f"Tool not found: {name}")
+    tool_func = TOOLS_REGISTRY[name]
+    try:
+        # Call the tool function
+        if inspect.iscoroutinefunction(tool_func):
+            result = await tool_func(**arguments)
+        else:
+            result = tool_func(**arguments)
+        # Convert result to string if needed
+        if not isinstance(result, str):
+            result = str(result)
+        return [TextContent(type="text", text=result)]
+    except Exception as e:
+        error_msg = f"Error executing tool {name}: {str(e)}"
+        print(f"[MCP Server] {error_msg}", file=sys.stderr)
+        return [TextContent(type="text", text=error_msg)]
+async def main():
+    """
+    Main entry point for the MCP server.
+    """
+    # Discover all tools before starting the server
+    discover_tools()
+    print(f"[MCP Server] Starting MCP server with {len(TOOLS_REGISTRY)} tools", file=sys.stderr)
+    # Run the server with stdio transport
+    async with stdio_server() as (read_stream, write_stream):
+        await app.run(
+            read_stream,
+            write_stream,
+            app.create_initialization_options()
+        )
+if __name__ == "__main__":
+    asyncio.run(main())

common/mcp/tools/__init__.py ADDED Viewed

File without changes

common/mcp/tools/google_tools.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import os
+import requests
+from dotenv import load_dotenv
+from agents import function_tool
+from typing import Optional
+# ---------------------------------------------------------
+# Load environment variables
+# ---------------------------------------------------------
+load_dotenv()
+# ============================================================
+# 🔹 GOOGLE SEARCH TOOLSET (Serper.dev API)
+# ============================================================
+@function_tool
+def google_search(query: str, num_results: int = 3) -> str:
+    """
+    Perform a general Google search using Serper.dev API.
+    Parameters:
+    -----------
+    query : str
+        The search query string, e.g., "latest Tesla stock news".
+    num_results : int, optional (default=3)
+        Maximum number of search results to return.
+    Returns:
+    --------
+    str
+        Formatted string of top search results, each including:
+        - Title of the page
+        - URL link
+        - Snippet / description
+        If no results are found or API key is missing, returns an error message.
+    Example:
+    --------
+    google_search("AI in finance", num_results=2)
+    Output:
+    Title: How AI is Transforming Finance
+    Link: https://example.com/ai-finance
+    Snippet: AI is increasingly used for trading, risk management...
+    Title: AI Applications in Banking
+    Link: https://example.com/ai-banking
+    Snippet: Banks are leveraging AI for customer service, fraud detection...
+    """
+    print(f"[DEBUG] google_search called with query='{query}', num_results={num_results}")
+    try:
+        api_key = os.getenv("SERPER_API_KEY")
+        if not api_key:
+            return "Error: SERPER_API_KEY missing in environment variables."
+        url = "https://google.serper.dev/search"
+        headers = {"X-API-KEY": api_key, "Content-Type": "application/json"}
+        payload = {"q": query, "num": num_results, "tbs": "qdr:d"}  # results from last 24h
+        response = requests.post(url, headers=headers, json=payload, timeout=10)
+        response.raise_for_status()
+        data = response.json()
+        if "organic" not in data or not data["organic"]:
+            return f"No results found for query: '{query}'"
+        formatted_results = [
+            f"Title: {item.get('title')}\n"
+            f"Link: {item.get('link')}\n"
+            f"Snippet: {item.get('snippet', '')}\n"
+            for item in data["organic"][:num_results]
+        ]
+        return "\n".join(formatted_results)
+    except requests.exceptions.RequestException as e:
+        print(f"[DEBUG] Network error during Google search: {e}")
+        return f"Network error during Google search: {e}"
+    except Exception as e:
+        print(f"[DEBUG] Error performing Google search: {e}")
+        return f"Error performing Google search: {e}"
+@function_tool
+def google_search_recent(query: str, num_results: int = 3, timeframe: str = "d") -> str:
+    """
+    Perform a Google search with time-based filtering using Serper.dev API.
+    Parameters:
+    -----------
+    query : str
+        The search query string.
+    num_results : int, optional (default=3)
+        Maximum number of search results to return.
+    timeframe : str, optional (default="d")
+        Time range for results:
+        - "d" = past day
+        - "w" = past week
+        - "m" = past month
+        - "y" = past year
+    Returns:
+    --------
+    str
+        Formatted string of recent search results.
+    """
+    print(f"[DEBUG] google_search_recent called with query='{query}', timeframe={timeframe}")
+    try:
+        api_key = os.getenv("SERPER_API_KEY")
+        if not api_key:
+            return "Error: SERPER_API_KEY missing in environment variables."
+        url = "https://google.serper.dev/search"
+        headers = {"X-API-KEY": api_key, "Content-Type": "application/json"}
+        payload = {"q": query, "num": num_results, "tbs": f"qdr:{timeframe}"}
+        response = requests.post(url, headers=headers, json=payload, timeout=10)
+        response.raise_for_status()
+        data = response.json()
+        if "organic" not in data or not data["organic"]:
+            return f"No recent results found for query: '{query}'"
+        formatted_results = [
+            f"Title: {item.get('title')}\n"
+            f"Link: {item.get('link')}\n"
+            f"Snippet: {item.get('snippet', '')}\n"
+            for item in data["organic"][:num_results]
+        ]
+        return f"Recent results ({timeframe}):\n\n" + "\n".join(formatted_results)
+    except requests.exceptions.RequestException as e:
+        print(f"[DEBUG] Network error: {e}")
+        return f"Network error during Google search: {e}"
+    except Exception as e:
+        print(f"[DEBUG] Error: {e}")
+        return f"Error performing Google search: {e}"

common/mcp/tools/news_tools.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import os
+import requests
+from dotenv import load_dotenv
+from agents import function_tool
+from typing import Optional
+import datetime
+# ---------------------------------------------------------
+# Load environment variables
+# ---------------------------------------------------------
+load_dotenv()
+# ============================================================
+# 🔹 NEWS TOOLSET (NewsAPI.org)
+# ============================================================
+@function_tool
+def get_top_headlines(country: str = "us", num_results: int = 5) -> str:
+    """
+    Fetch the latest top headlines for a country using NewsAPI.org.
+    Parameters:
+    -----------
+    country : str, optional (default="us")
+        Two-letter country code (e.g., "us", "gb", "in").
+    num_results : int, optional (default=5)
+        Number of articles to fetch.
+    Returns:
+    --------
+    str
+        Formatted headlines with title, source, published date, and URL.
+        If API key is missing or no results found, returns an error message.
+    """
+    print(f"[DEBUG] get_top_headlines called for country={country}, num_results={num_results}")
+    try:
+        api_key = os.getenv("NEWS_API_KEY")
+        if not api_key:
+            return "Error: NEWS_API_KEY missing in environment variables."
+        url = "https://newsapi.org/v2/top-headlines"
+        params = {
+            "country": country,
+            "pageSize": num_results,
+            "apiKey": api_key
+        }
+        response = requests.get(url, params=params, timeout=10)
+        response.raise_for_status()
+        data = response.json()
+        if not data.get("articles"):
+            return f"No top headlines found for country: {country}"
+        formatted = []
+        for article in data["articles"][:num_results]:
+            formatted.append(
+                f"📰 {article.get('title')}\n"
+                f"   Source: {article.get('source', {}).get('name')}\n"
+                f"   Published: {article.get('publishedAt', 'N/A')}\n"
+                f"   URL: {article.get('url')}\n"
+            )
+        return f"Top Headlines ({country.upper()}):\n\n" + "\n".join(formatted)
+    except requests.exceptions.RequestException as e:
+        print(f"[DEBUG] Network error: {e}")
+        return f"Network error while calling News API: {e}"
+    except Exception as e:
+        print(f"[DEBUG] Error: {e}")
+        return f"Unexpected error fetching news: {e}"
+@function_tool
+def search_news(query: str, num_results: int = 5, days_back: int = 7) -> str:
+    """
+    Search for recent news articles about a specific topic using NewsAPI.org.
+    Parameters:
+    -----------
+    query : str
+        Keyword or topic to search (e.g., "Tesla earnings", "AI healthcare").
+    num_results : int, optional (default=5)
+        Number of articles to fetch.
+    days_back : int, optional (default=7)
+        Number of days to look back for articles (1-30).
+    Returns:
+    --------
+    str
+        Formatted news articles with title, source, published date, and URL.
+        If API key is missing or no results found, returns an error message.
+    """
+    print(f"[DEBUG] search_news called with query='{query}', num_results={num_results}, days_back={days_back}")
+    try:
+        api_key = os.getenv("NEWS_API_KEY")
+        if not api_key:
+            return "Error: NEWS_API_KEY missing in environment variables."
+        # Calculate date range
+        today = datetime.datetime.utcnow()
+        from_date = (today - datetime.timedelta(days=days_back)).strftime('%Y-%m-%dT%H:%M:%SZ')
+        url = "https://newsapi.org/v2/everything"
+        params = {
+            "q": query,
+            "pageSize": num_results,
+            "apiKey": api_key,
+            "sortBy": "publishedAt",
+            "language": "en",
+            "from": from_date
+        }
+        response = requests.get(url, params=params, timeout=10)
+        response.raise_for_status()
+        data = response.json()
+        if not data.get("articles"):
+            return f"No news found for query: '{query}'"
+        formatted = []
+        for article in data["articles"][:num_results]:
+            formatted.append(
+                f"📰 {article.get('title')}\n"
+                f"   Source: {article.get('source', {}).get('name')}\n"
+                f"   Published: {article.get('publishedAt', 'N/A')}\n"
+                f"   URL: {article.get('url')}\n"
+            )
+        return f"News Search Results for '{query}' (last {days_back} days):\n\n" + "\n".join(formatted)
+    except requests.exceptions.RequestException as e:
+        print(f"[DEBUG] Network error: {e}")
+        return f"Network error while calling News API: {e}"
+    except Exception as e:
+        print(f"[DEBUG] Error: {e}")
+        return f"Unexpected error fetching news: {e}"
+@function_tool
+def get_news_by_category(category: str = "business", country: str = "us", num_results: int = 5) -> str:
+    """
+    Fetch top headlines by category using NewsAPI.org.
+    Parameters:
+    -----------
+    category : str, optional (default="business")
+        News category: "business", "entertainment", "general", "health",
+        "science", "sports", "technology".
+    country : str, optional (default="us")
+        Two-letter country code.
+    num_results : int, optional (default=5)
+        Number of articles to fetch.
+    Returns:
+    --------
+    str
+        Formatted headlines for the specified category.
+    """
+    print(f"[DEBUG] get_news_by_category called for category={category}, country={country}")
+    try:
+        api_key = os.getenv("NEWS_API_KEY")
+        if not api_key:
+            return "Error: NEWS_API_KEY missing in environment variables."
+        url = "https://newsapi.org/v2/top-headlines"
+        params = {
+            "category": category,
+            "country": country,
+            "pageSize": num_results,
+            "apiKey": api_key
+        }
+        response = requests.get(url, params=params, timeout=10)
+        response.raise_for_status()
+        data = response.json()
+        if not data.get("articles"):
+            return f"No headlines found for category: {category}"
+        formatted = []
+        for article in data["articles"][:num_results]:
+            formatted.append(
+                f"📰 {article.get('title')}\n"
+                f"   Source: {article.get('source', {}).get('name')}\n"
+                f"   Published: {article.get('publishedAt', 'N/A')}\n"
+                f"   URL: {article.get('url')}\n"
+            )
+        return f"Top {category.capitalize()} Headlines ({country.upper()}):\n\n" + "\n".join(formatted)
+    except requests.exceptions.RequestException as e:
+        print(f"[DEBUG] Network error: {e}")
+        return f"Network error while calling News API: {e}"
+    except Exception as e:
+        print(f"[DEBUG] Error: {e}")
+        return f"Unexpected error fetching news: {e}"

common/mcp/tools/rag_tool.py ADDED Viewed

	@@ -0,0 +1,106 @@

+"""RAG Search Tool - Search the local healthcare knowledge base"""
+import os
+from pathlib import Path
+from agents import function_tool, RunContextWrapper
+from dotenv import load_dotenv
+from rag.rag import Retriever
+from dataclasses import dataclass
+@dataclass
+class UserContext:
+    uid: str
+    db_path: str = ""
+    file_path: str = ""
+    similarity_threshold: float = 0.4  # FAISS L2 distance threshold for RAG relevance
+# ---------------------------------------------------------
+# Load environment variables
+# ---------------------------------------------------------
+load_dotenv()
+# ---------------------------------------------------------
+# Initialize RAG Retriever
+# ---------------------------------------------------------
+# Get the healthcare-rag-chatbot directory path
+# healthcare_dir = str(Path(__file__).parent.parent.parent)
+# retriever = None
+# ---------------------------------------------------------
+# RAG Search Tool
+# ---------------------------------------------------------
+@function_tool
+def rag_search(wrapper: RunContextWrapper[UserContext], query: str) -> str:
+    """
+    Search the local healthcare knowledge base for relevant information.
+    Args:
+        query: The medical question or topic to search for
+    Returns:
+        Relevant information from the healthcare knowledge base
+    """
+    print(f"[DEBUG] RAG_SEARCH called with query: '{query}'")
+    # Get similarity threshold from user context
+    similarity_threshold = wrapper.context.similarity_threshold
+    print(f"[DEBUG] RAG_SEARCH: Using similarity threshold: {similarity_threshold}")
+    try:
+        # Initialize retriever with user context
+        retriever = Retriever(
+            db_path=wrapper.context.db_path,
+            file_path=wrapper.context.file_path
+        )
+        # Get results with similarity scores
+        results_with_scores = retriever.retrieve_with_scores(query, k=5)  # Increased from 4 to 5
+        if not results_with_scores:
+            print("[DEBUG] RAG_SEARCH: No results found in knowledge base")
+            return "No relevant information found in the knowledge base."
+        print(f"[DEBUG] RAG_SEARCH: Found {len(results_with_scores)} results")
+        # Check if the best match meets the threshold
+        # FAISS returns (document, distance) where lower distance = better match
+        best_score = results_with_scores[0][1]
+        print(f"[DEBUG] RAG_SEARCH: Best similarity score (distance): {best_score:.4f} (threshold: {similarity_threshold})")
+        if best_score > similarity_threshold:
+            print(f"[DEBUG] RAG_SEARCH: Best match score {best_score:.4f} is above threshold {similarity_threshold}")
+            print("[DEBUG] RAG_SEARCH: Results not relevant enough, triggering web search fallback")
+            return "No relevant information found in the knowledge base."
+        print(f"[DEBUG] RAG_SEARCH: Results are relevant (score: {best_score:.4f} <= {similarity_threshold})")
+        # Log all scores for debugging
+        all_scores = [f"{score:.4f}" for _, score in results_with_scores]
+        print(f"[DEBUG] RAG_SEARCH: All scores: {', '.join(all_scores)}")
+        # Format results - only include documents that meet the similarity threshold
+        formatted_results = []
+        for i, (doc, score) in enumerate(results_with_scores[:5], 1):  # Top 5 results
+            if score <= similarity_threshold:
+                content = doc.page_content.strip()
+                formatted_results.append(f"Result {i} (score: {score:.4f}):\n{content}\n")
+        if not formatted_results:
+            print("[DEBUG] RAG_SEARCH: No results met the similarity threshold")
+            print("[DEBUG] RAG_SEARCH: Triggering web search fallback")
+            return "No relevant information found in the knowledge base."
+        result_text = "\n".join(formatted_results)
+        print(f"[DEBUG] RAG_SEARCH: Returning {len(formatted_results)} results, total length: {len(result_text)} characters")
+        print(f"[DEBUG] RAG_SEARCH: First 300 chars: {result_text[:300]}...")
+        return result_text
+    except Exception as e:
+        print(f"[DEBUG] RAG_SEARCH: Error occurred - {str(e)}")
+        return f"Error retrieving from knowledge base: {str(e)}"
+__all__ = ["rag_search", "retriever"]

common/mcp/tools/search_tools.py ADDED Viewed

	@@ -0,0 +1,115 @@

+from ddgs import DDGS
+from agents import function_tool
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+import requests
+from bs4 import BeautifulSoup
+from typing import Optional
+# ---------------------------------------------------------
+# Load environment variables
+# ---------------------------------------------------------
+load_dotenv()
+# ---------------------- MODELS ---------------------------
+class searchQuery(BaseModel):
+    query: str = Field(..., description="The search query string.")
+    max_results: int = Field(5, description="The maximum number of search results to return.")
+    search_type: str = Field(
+        "text",
+        description="Search type: 'text' (default) or 'news'. Use 'news' to get publication dates."
+    )
+    timelimit: str = Field(
+        'd',
+        description="Time limit for search results: 'd' (day), 'w' (week), 'm' (month), 'y' (year)."
+    )
+    region: str = Field("us-en", description="Region for search results (e.g., 'us-en').")
+class searchResult(BaseModel):
+    title: str
+    link: str
+    snippet: str
+    datetime: Optional[str] = None
+# ---------------------- PAGE FETCH TOOL ---------------------------
+@function_tool
+def fetch_page_content(url: str, timeout: int = 3) -> Optional[str]:
+    """Fetch and extract text content from a web page."""
+    print(f"[DEBUG] fetch_page_content called with: {url} - timeout: {timeout}")
+    try:
+        headers = {
+            'User-Agent': (
+                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
+                'AppleWebKit/537.36 (KHTML, like Gecko) '
+                'Chrome/91.0.4472.124 Safari/537.36'
+            )
+        }
+        response = requests.get(url, headers=headers, timeout=timeout)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.content, 'html.parser')
+        # Remove irrelevant elements
+        for tag in soup(["script", "style", "nav", "footer", "header"]):
+            tag.decompose()
+        # Extract text
+        text = soup.get_text(separator='\n', strip=True)
+        # Clean whitespace
+        lines = (line.strip() for line in text.splitlines())
+        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+        text = '\n'.join(chunk for chunk in chunks if chunk)
+        return text
+    except Exception as e:
+        print(f"[WARNING] Failed to fetch content from {url}: {str(e)}")
+        return None
+# ---------------------- SEARCH TOOL ---------------------------
+@function_tool
+def duckduckgo_search(params: searchQuery) -> list[dict]:
+    """Perform a DuckDuckGo search and return only snippets.
+    No page content fetched here."""
+    print(f"[DEBUG] duckduckgo_search called with: {params}")
+    results = []
+    with DDGS() as ddgs:
+        if params.search_type == "news":
+            search_results = ddgs.news(
+                params.query,
+                max_results=params.max_results,
+                timelimit=params.timelimit,
+                region=params.region
+            )
+            for result in search_results:
+                results.append(
+                    searchResult(
+                        title=result.get("title", ""),
+                        link=result.get("url", ""),
+                        snippet=result.get("body", ""),
+                        datetime=result.get("date", "")
+                    ).model_dump()
+                )
+        else:
+            search_results = ddgs.text(
+                params.query,
+                max_results=params.max_results,
+                timelimit=params.timelimit,
+                region=params.region
+            )
+            for result in search_results:
+                results.append(
+                    searchResult(
+                        title=result.get("title", ""),
+                        link=result.get("href", ""),
+                        snippet=result.get("body", "")
+                    ).model_dump()
+                )
+    print(f"[DEBUG] duckduckgo_search returning {len(results)} results")
+    return results

common/mcp/tools/time_tools.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from datetime import datetime
+from agents import function_tool
+# from ..common.utility.logger import log_call
+@function_tool
+# @log_call
+def current_datetime(format: str = "natural") -> str:
+    """
+    Returns the current date and time as a formatted string.
+    Args:
+        format (str): Format style for the datetime. Options:
+            - "natural" (default): "Saturday, December 7, 2025 at 3:59 PM"
+            - "natural_short": "Dec 7, 2025 at 3:59 PM"
+            - "natural_full": "Saturday, December 7, 2025 at 3:59:30 PM CST"
+            - Custom strftime format string (e.g., "%Y-%m-%d %H:%M:%S")
+    Returns:
+        str: Current date and time in the specified format
+    """
+    now = datetime.now()
+    # Natural format options
+    if format == "natural":
+        return now.strftime("%A, %B %d, %Y at %I:%M %p")
+    elif format == "natural_short":
+        return now.strftime("%b %d, %Y at %I:%M %p")
+    elif format == "natural_full":
+        return now.strftime("%A, %B %d, %Y at %I:%M:%S %p %Z")
+    else:
+        # Custom format string
+        return now.strftime(format)

common/mcp/tools/weather_tools.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import os
+import re
+import requests
+import datetime
+from dotenv import load_dotenv
+from typing import Optional
+from ddgs import DDGS
+from agents import function_tool
+# ---------------------------------------------------------
+# Load environment variables
+# ---------------------------------------------------------
+load_dotenv()
+@function_tool
+def get_weather_forecast(city: str, date: Optional[str] = None) -> str:
+    """
+    PRIMARY TOOL: Fetch weather using OpenWeatherMap API.
+    """
+    print(f"[DEBUG] Primary API get_weather_forecast called for city={city}")
+    api_key = os.getenv("OPENWEATHER_API_KEY")
+    if not api_key:
+        return "Error: OPENWEATHER_API_KEY missing. Please use the fallback search tool."
+    url = "https://api.openweathermap.org/data/2.5/forecast"
+    try:
+        response = requests.get(
+            url,
+            params={"q": city, "appid": api_key, "units": "metric"},
+            timeout=5
+        )
+        data = response.json()
+    except Exception as e:
+        return f"Error calling weather API: {str(e)}"
+    if str(data.get("cod")) != "200":
+        return f"Error from API: {data.get('message', 'Unknown error')}"
+    # Build the report string
+    report_lines = []
+    found_date = False
+    for entry in data.get("list", []):
+        dt_txt = entry["dt_txt"].split(" ")[0]
+        if date and dt_txt != date:
+            continue
+        found_date = True
+        desc = entry['weather'][0]['description'].capitalize()
+        temp = entry['main']['temp']
+        hum = entry['main']['humidity']
+        wind = entry['wind']['speed']
+        report_lines.append(f"{dt_txt}: {desc}, Temp: {temp}°C, Humidity: {hum}%, Wind: {wind} m/s")
+    # Handle "Date not found" case
+    if date and not found_date:
+        return f"API valid, but date {date} is out of range (5-day limit). Try the search fallback tool."
+    final_report = "\n".join(report_lines)
+    return f"API Forecast for {city}:\n{final_report}"
+# ---------------------------------------------------------
+# Tool 2: Web Search Fallback (Secondary)
+# ---------------------------------------------------------
+@function_tool
+def search_weather_fallback_ddgs(city: str, date: Optional[str] = None) -> str:
+    """
+    SECONDARY TOOL: Search-based fallback that produces an API-like structured forecast.
+    """
+    print(f"[DEBUG] Fallback API (DDGS) called for city={city}, date={date}")
+    # --- Build Query ---
+    try:
+        if date:
+            try:
+                dt_obj = datetime.strptime(date, "%Y-%m-%d")
+                natural_date = dt_obj.strftime("%B %d, %Y")
+                month_name = dt_obj.strftime("%B")
+            except ValueError:
+                natural_date = date
+                month_name = ""
+        else:
+            natural_date = datetime.now().strftime("%B %d, %Y")
+            month_name = natural_date.split()[0]  # Month name
+        query = f"weather {city} {natural_date}"
+        print(f"[DEBUG] Search query: {query}")
+        # --- Perform Search ---
+        results = list(DDGS().text(query, max_results=3))
+        print(f"[DEBUG] Number of search results: {len(results)}")
+        if not results:
+            return f"Web Estimated Forecast for {city}:\nNo reliable search data found."
+        # --- Aggregate Text ---
+        full_text = " ".join([r.get("body", "") for r in results])
+        # --- Extract Values with Robust Regex ---
+        temp_match = re.findall(r'(-?\d+)\s*(?:°|deg|C|F)', full_text, re.I)
+        temperature = temp_match[0] if temp_match else "?"
+        humidity_match = re.findall(r'(\d+)\s*%', full_text)
+        humidity = humidity_match[0] if humidity_match else "?"
+        wind_match = re.findall(r'(\d+)\s*(?:mph|km/h|m/s)', full_text, re.I)
+        wind = wind_match[0] if wind_match else "?"
+        # --- Condition ---
+        # Take first word(s) of first title as best guess
+        condition_raw = results[0].get("title", "Unknown").split("-")[0].strip()
+        condition = condition_raw[0].upper() + condition_raw[1:] if condition_raw else "Unknown"
+        # --- Construct API-like Forecast ---
+        forecast = (
+            f"Web Estimated Forecast for {city}:\n"
+            f"{natural_date}: {condition}, Temp: {temperature}° (approx), "
+            f"Humidity: {humidity}%, Wind: {wind}\n"
+        )
+        # Optional: add raw snippets for debugging
+        # snippet_block = "\nSearch Snippets (Raw):\n" + "\n".join(
+        #     f"- {r['title']}: {r['body']}" for r in results
+        # )
+        # return forecast + snippet_block
+        return forecast
+    except Exception as e:
+        print(f"[DEBUG] Error in fallback: {e}")
+        return f"Error performing web search: {str(e)}"
+import requests
+from bs4 import BeautifulSoup
+import re
+from typing import Optional
+from agents import function_tool
+from datetime import datetime
+@function_tool
+def search_weather_fallback_bs(city: str, date: Optional[str] = None) -> str:
+    """
+    SECONDARY TOOL: Web-scraping fallback using BeautifulSoup.
+    Produces an API-like structured forecast.
+    """
+    import requests
+    from bs4 import BeautifulSoup
+    import re
+    from datetime import datetime
+    print(f"[DEBUG] Fallback API (BeautifulSoup) called for city={city}, date={date}")
+    try:
+        # --- Build Query ---
+        if date:
+            try:
+                dt_obj = datetime.strptime(date, "%Y-%m-%d")
+                natural_date = dt_obj.strftime("%B %d, %Y")
+            except ValueError:
+                natural_date = date
+        else:
+            natural_date = datetime.now().strftime("%B %d, %Y")
+        query = f"weather {city} {natural_date}"
+        print(f"[DEBUG] Search query: {query}")
+        # --- DuckDuckGo Search ---
+        search_url = f"https://duckduckgo.com/html/?q={query.replace(' ', '+')}"
+        headers = {"User-Agent": "Mozilla/5.0"}
+        response = requests.get(search_url, headers=headers, timeout=5)
+        if response.status_code != 200:
+            return f"Error fetching search results: {response.status_code}"
+        soup = BeautifulSoup(response.text, "html.parser")
+        results = []
+        for result in soup.select(".result__body"):
+            title_tag = result.select_one(".result__title a")
+            snippet_tag = result.select_one(".result__snippet")
+            if title_tag and snippet_tag:
+                results.append({
+                    "title": title_tag.get_text(strip=True),
+                    "body": snippet_tag.get_text(strip=True)
+                })
+        if not results:
+            return f"Web Estimated Forecast for {city}:\nNo reliable search data found."
+        # --- Aggregate Text ---
+        full_text = " ".join([r["body"] for r in results])
+        # --- Extract Temperature ---
+        temp_matches = re.findall(r'(-?\d{1,2})\s*(?:°|deg|C|F)', full_text, re.I)
+        temperature = temp_matches[0] if temp_matches else "?"
+        # --- Extract Humidity ---
+        humidity_matches = re.findall(r'(\d{1,3})\s*%', full_text)
+        humidity = humidity_matches[0] if humidity_matches else "?"
+        # --- Extract Wind ---
+        wind_matches = re.findall(r'(\d{1,3})\s*(?:mph|km/h|m/s)', full_text, re.I)
+        wind = wind_matches[0] if wind_matches else "?"
+        # --- Extract Condition ---
+        # Look in all results first, fallback to first title
+        condition = "Unknown"
+        for r in results:
+            m = re.search(r'(clear|sunny|cloudy|rain|snow|storm|fog|mist)', r["body"], re.I)
+            if m:
+                condition = m.group(1).capitalize()
+                break
+        if condition == "Unknown":
+            # Fallback
+            condition_raw = results[0]["title"].split("-")[0].strip()
+            condition = condition_raw[0].upper() + condition_raw[1:] if condition_raw else "Unknown"
+        # --- Build Forecast ---
+        forecast = (
+            f"Web Estimated Forecast for {city}:\n"
+            f"{natural_date}: {condition}, Temp: {temperature}° (approx), "
+            f"Humidity: {humidity}%, Wind: {wind}\n"
+        )
+        return forecast
+    except Exception as e:
+        print(f"[DEBUG] Error in fallback: {e}")
+        return f"Error performing web search: {str(e)}"

common/mcp/tools/yf_tools.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import os
+import requests
+import yfinance as yf
+from dotenv import load_dotenv
+from agents import function_tool
+from datetime import datetime, timedelta
+# Load environment variables
+load_dotenv()
+# ============================================================
+# 🔹 YAHOO FINANCE TOOLSET
+# ============================================================
+@function_tool
+def get_summary(symbol: str, period: str = "1d", interval: str = "1h") -> str:
+    """
+    Fetch the latest summary information and intraday price data for a given ticker.
+    Ensures recent data is retrieved by calculating start/end dates dynamically.
+    Parameters:
+    -----------
+    symbol : str
+        The ticker symbol (e.g., "AAPL", "GOOG", "BTC-USD").
+    period : str, optional (default="1d")
+        Time range for price data. Examples: "1d", "5d", "1mo", "3mo".
+    interval : str, optional (default="1h")
+        Granularity of the data. Examples: "1m", "5m", "1h", "1d".
+    Returns:
+    --------
+    str
+        A formatted string containing:
+        - Company/ticker name
+        - Current price and change
+        - Open, High, Low prices
+        - Volume
+        - Period and interval used
+    """
+    try:
+        ticker = yf.Ticker(symbol)
+        # Calculate start and end dates based on period
+        end_date = datetime.today()
+        if period.endswith("d"):
+            days = int(period[:-1])
+        elif period.endswith("mo"):
+            days = int(period[:-2]) * 30
+        elif period.endswith("y"):
+            days = int(period[:-1]) * 365
+        else:
+            days = 30  # default 1 month
+        start_date = end_date - timedelta(days=days)
+        # Fetch recent data explicitly
+        data = ticker.history(
+            start=start_date.strftime("%Y-%m-%d"),
+            end=end_date.strftime("%Y-%m-%d"),
+            interval=interval
+        )
+        if data.empty:
+            return f"No data found for symbol '{symbol}'."
+        latest = data.iloc[-1]
+        current_price = round(latest["Close"], 2)
+        open_price = round(latest["Open"], 2)
+        change = round(current_price - open_price, 2)
+        pct_change = round((change / open_price) * 100, 2)
+        info = ticker.info
+        long_name = info.get("longName", symbol)
+        currency = info.get("currency", "USD")
+        formatted = [
+            f"📈 {long_name} ({symbol})",
+            f"Current Price: {current_price} {currency}",
+            f"Change: {change} ({pct_change}%)",
+            f"Open: {open_price} | High: {round(latest['High'], 2)} | Low: {round(latest['Low'], 2)}",
+            f"Volume: {int(latest['Volume'])}",
+            f"Period: {period} | Interval: {interval}",
+        ]
+        return "\n".join(formatted)
+    except Exception as e:
+        return f"Error fetching data for '{symbol}': {e}"
+@function_tool
+def get_market_sentiment(symbol: str, period: str = "1mo") -> str:
+    """
+    Analyze recent price changes and provide a simple market sentiment.
+    Uses dynamic start/end dates to ensure recent data.
+    This tool computes the percentage change over the specified period and
+    classifies the sentiment as:
+    - Bullish (if price increased >2%)
+    - Bearish (if price decreased >2%)
+    - Neutral (otherwise)
+    Parameters:
+    -----------
+    symbol : str
+        The ticker symbol (e.g., "AAPL", "GOOG", "BTC-USD").
+    period : str, optional (default="1mo")
+        Time range to analyze. Examples: "7d", "1mo", "3mo".
+    Returns:
+    --------
+    str
+        A human-readable sentiment string including percentage change.
+    """
+    try:
+        ticker = yf.Ticker(symbol)
+        # Calculate start/end dynamically
+        end_date = datetime.today()
+        if period.endswith("d"):
+            days = int(period[:-1])
+        elif period.endswith("mo"):
+            days = int(period[:-2]) * 30
+        elif period.endswith("y"):
+            days = int(period[:-1]) * 365
+        else:
+            days = 30
+        start_date = end_date - timedelta(days=days)
+        data = ticker.history(
+            start=start_date.strftime("%Y-%m-%d"),
+            end=end_date.strftime("%Y-%m-%d")
+        )
+        if data.empty:
+            return f"No data for {symbol}."
+        recent_change = data["Close"].iloc[-1] - data["Close"].iloc[0]
+        pct_change = (recent_change / data["Close"].iloc[0]) * 100
+        sentiment = "Neutral"
+        if pct_change > 2:
+            sentiment = "Bullish"
+        elif pct_change < -2:
+            sentiment = "Bearish"
+        return f"{symbol} market sentiment ({period}): {sentiment} ({pct_change:.2f}% change)"
+    except Exception as e:
+        return f"Error fetching market sentiment for '{symbol}': {e}"
+@function_tool
+def get_history(symbol: str, period: str = "1mo") -> str:
+    """
+    Fetch historical price data for a given ticker.
+    Ensures recent data is retrieved dynamically using start/end dates.
+    Parameters:
+    -----------
+    symbol : str
+        The ticker symbol (e.g., "AAPL", "GOOG", "BTC-USD").
+    period : str, optional (default="1mo")
+        The length of historical data to retrieve. Examples: "1d", "5d", "1mo", "3mo", "1y", "5y".
+    Returns:
+    --------
+    str
+        A formatted string showing the last 5 rows of historical prices (Open, High, Low, Close, Volume).
+    """
+    try:
+        ticker = yf.Ticker(symbol)
+        # Calculate start/end dynamically
+        end_date = datetime.today()
+        if period.endswith("d"):
+            days = int(period[:-1])
+        elif period.endswith("mo"):
+            days = int(period[:-2]) * 30
+        elif period.endswith("y"):
+            days = int(period[:-1]) * 365
+        else:
+            days = 30
+        start_date = end_date - timedelta(days=days)
+        data = ticker.history(
+            start=start_date.strftime("%Y-%m-%d"),
+            end=end_date.strftime("%Y-%m-%d")
+        )
+        if data.empty:
+            return f"No historical data found for '{symbol}'."
+        return f"Historical data for {symbol} ({period}):\n{data.tail(5).to_string()}"
+    except Exception as e:
+        return f"Error fetching historical data for '{symbol}': {e}"

common/rag/rag.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import os
+import shutil
+from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+DB_NAME = 'healthcare_db'
+DIRECTORY_NAME = "healthcare"
+class Retriever:
+    def __init__(self,
+                        file_path:str = os.path.join(os.getcwd(), "data"),
+                        db_path:str = os.path.join(os.getcwd(), "db") ):
+        self.directory_path = os.path.join(file_path, DIRECTORY_NAME)
+        self.db_path = os.path.join(db_path, DB_NAME)
+        self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1024,
+            chunk_overlap=200,
+            length_function=len,
+            # separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
+            is_separator_regex=False,
+        )
+        self.retriever = None
+    def load_knowledge_base(self):
+        if os.path.exists(self.db_path):
+            self.retriever = FAISS.load_local(
+                self.db_path,
+                self.embeddings,
+                allow_dangerous_deserialization=True
+            ).as_retriever()
+        else:
+            self.retriever = self._create_knowledge_base()
+    def _create_knowledge_base(self):
+        documents = self._load_documents()
+        chunks = self._split_documents(documents)
+        # embeddings = self._embed_documents(texts)
+        vectorstore = FAISS.from_documents(chunks, self.embeddings)
+        vectorstore.save_local(self.db_path)
+        return vectorstore.as_retriever()
+    def _load_documents(self):
+        documents = []
+        loader = DirectoryLoader(
+            self.directory_path,
+            glob="**/*.pdf",
+            loader_cls=PyPDFLoader,
+            show_progress=True
+        )
+        documents = loader.load()
+        return documents
+    def _split_documents(self, documents):
+        chunks = []
+        for doc in documents:
+            chunks.extend(self.text_splitter.split_documents([doc]))
+        return chunks
+    # def _embed_documents(self, texts):
+    #     return [self.embeddings.embed_query(text.page_content) for text in texts]
+    def retrieve(self, query, k=4):
+        """Retrieve documents without scores (backward compatible)"""
+        if not self.retriever:
+            self.load_knowledge_base()
+        return self.retriever.invoke(query)
+    def retrieve_with_scores(self, query, k=4):
+        """Retrieve documents with similarity scores"""
+        if not self.retriever:
+            self.load_knowledge_base()
+        # Get the underlying vectorstore from the retriever
+        vectorstore = self.retriever.vectorstore
+        # Use similarity_search_with_score to get scores
+        # Note: FAISS returns L2 distance, lower is better
+        results = vectorstore.similarity_search_with_score(query, k=k)
+        return results
+    def update_knowledge_base(self):
+        self._create_knowledge_base()
+    def delete_knowledge_base(self):
+        if os.path.exists(self.db_path):
+            shutil.rmtree(self.db_path)
+    # No cleanup needed for VectorStoreRetriever

common/utility/__init__.py ADDED Viewed

File without changes

common/utility/embedding_factory.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+from typing import Union
+# from azure.identity import DefaultAzureCredential
+from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
+from langchain_ollama import OllamaEmbeddings
+from langchain_huggingface import HuggingFaceEmbeddings
+class EmbeddingFactory:
+    """
+    A static utility class to create and return LLM Embedding instances based on the input type.
+    """
+    @staticmethod
+    def get_llm(llm_type: str) -> Union[AzureOpenAIEmbeddings, OpenAIEmbeddings]:
+        """
+        Returns an LLM instance based on the specified type.
+        Parameters:
+            llm_type (str): The type of LLM to return. Valid values are 'azure' or 'openai'.
+        Returns:
+            Union[AzureOpenAIEmbeddings, OpenAIEmbeddings]: The LLM instance.
+        """
+        if llm_type.lower() == "azure":
+            # Get the Azure Credential
+            # credential = DefaultAzureCredential()
+            # token=credential.get_token("https://cognitiveservices.azure.com/.default").token
+            # if not token:
+            #     raise ValueError("Token is required for AzureOpenAIEmbeddings.")
+            # return AzureOpenAIEmbeddings(
+            #     azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
+            #     azure_deployment="text-embedding-3-small", #os.environ["AZURE_OPENAI_API_BASE_MODEL"],
+            #     api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+            #     api_key=token
+            # )
+            pass
+        elif llm_type.lower() == "openai":
+            return OpenAIEmbeddings(
+                api_key=os.environ["OPENAI_API_KEY"],
+                model="text-embedding-3-large"
+            )
+        elif llm_type.lower() == "ollama": # must have ollama running locally with the following model
+            return OllamaEmbeddings(model="gemma:2b")
+        elif llm_type.lower() == "hf": # must have key update in env:HF_TOKEN
+            return HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+        else:
+            raise ValueError("Invalid llm_type. Use 'azure' or 'openai'.")

common/utility/llm_factory.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import os
+import tiktoken
+from typing import Any
+from langchain_openai.chat_models import ChatOpenAI, AzureChatOpenAI
+from langchain_openai.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings
+# from azure.identity import DefaultAzureCredential
+from huggingface_hub import login
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEmbeddings
+from langchain_ollama import ChatOllama, OllamaEmbeddings
+from langchain_groq import ChatGroq
+# from langchain_openai import OpenAIEmbeddings
+class LLMFactory:
+    """
+    Factory class to provide LLM and embedding model instances for different providers.
+    """
+    @staticmethod
+    def get_llm(provider: str, **kwargs) -> Any:
+        """
+        Returns a chat/completion LLM instance based on the provider.
+        Supported providers: openai, azureopenai, huggingface, ollama, groq
+        """
+        if provider == "openai":
+            # OpenAI Chat Model
+            return ChatOpenAI(
+                openai_api_key=kwargs.get("api_key", os.environ.get("OPENAI_API_KEY")),
+                model_name=kwargs.get("model_name", "gpt-4")
+            )
+        # elif provider == "azureopenai":
+        #     # Azure OpenAI Chat Model using Azure Identity for token
+        #     credential = DefaultAzureCredential()
+        #     token = credential.get_token("https://cognitiveservices.azure.com/.default").token
+        #     if not token:
+        #         raise ValueError("Token is required for AzureChatOpenAI.")
+        #     return AzureChatOpenAI(
+        #         azure_endpoint=kwargs["endpoint"],
+        #         azure_deployment=kwargs.get("deployment_name", "gpt-4"),
+        #         api_version=kwargs["api_version"],
+        #         api_key=token
+        #     )
+        # pip install langchain langchain-huggingface huggingface_hub
+        elif provider == "huggingface":
+            # If using a private model or endpoint, authenticate
+            login(token=kwargs.get("api_key", os.environ.get("HF_TOKEN")))
+            return ChatHuggingFace(
+                repo_id=kwargs.get("model_name", "mistralai/Mistral-Nemo-Instruct-2407"),  # Or any other chat-friendly model
+                task="text-generation",
+                model_kwargs={
+                    "temperature": 0.7,
+                    "max_new_tokens": 256
+                }
+            )
+        elif provider == "ollama":
+            # Ollama local model
+            return ChatOllama(
+                model=kwargs.get("model_name", "gemma:2b"),
+                temperature=0
+            )
+        elif provider == "groq":
+            # Groq LLM
+            return ChatGroq(
+                model=kwargs.get("model_name", "Gemma2-9b-It"),
+                max_tokens=512,
+                api_key=kwargs.get("api_key", os.environ.get("GROQ_API_KEY"))
+            )
+        else:
+            raise ValueError(f"Unsupported provider: {provider}")
+    @staticmethod
+    def get_embedding_model(provider: str, **kwargs) -> Any:
+        """
+        Returns an embedding model instance based on the provider.
+        Supported providers: openai, huggingface
+        """
+        if provider == "openai":
+            return OpenAIEmbeddings(
+                model=kwargs.get("model_name", "text-embedding-3-large"),
+                openai_api_key=kwargs.get("api_key", os.environ.get("OPENAI_API_KEY"))
+            )
+        # if provider == "azureopenai":
+        #     # Get the Azure Credential
+        #     credential = DefaultAzureCredential()
+        #     token=credential.get_token("https://cognitiveservices.azure.com/.default").token
+        #     if not token:
+        #         raise ValueError("Token is required for AzureOpenAIEmbeddings.")
+        #     return AzureOpenAIEmbeddings(
+        #         azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
+        #         azure_deployment=kwargs.get("azure_deployment", "text-embedding-3-large"),
+        #         api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+        #         api_key=token
+        #     )
+        elif provider == "huggingface":
+            # If using a private model or endpoint, authenticate
+            login(token=kwargs.get("api_key", os.environ.get("HF_TOKEN")))
+            return HuggingFaceEmbeddings(
+                model_name=kwargs.get("model_name", "all-MiniLM-L6-v2")
+            )
+        elif provider == "groq":
+            raise ValueError(f"No embedding support from the provider: {provider}")
+        elif provider == "ollama":
+            return OllamaEmbeddings(model=kwargs.get("model_name", "gemma:2b"))
+        else:
+            raise ValueError(f"Unsupported embedding provider: {provider}")
+    @staticmethod
+    def num_tokens_from_messages(messages) -> int:
+        """
+        Return the number of tokens used by a list of messages.
+        Adapted from the OpenAI cookbook token counter.
+        """
+        encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
+        tokens_per_message = 3  # <|start|>, role, <|end|>
+        num_tokens = 0
+        for message in messages:
+            num_tokens += tokens_per_message
+            for key, value in message.items():
+                num_tokens += len(encoding.encode(value))
+        num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
+        return num_tokens

common/utility/llm_factory2.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import os
+import tiktoken
+from typing import Union
+# from azure.identity import DefaultAzureCredential
+from langchain_openai.chat_models import AzureChatOpenAI, ChatOpenAI
+class LLMFactory:
+    """
+    A static utility class to create and return LLM instances based on the input type.
+    """
+    @staticmethod
+    def get_llm(llm_type: str) -> Union[AzureChatOpenAI, ChatOpenAI]:
+        """
+        Returns an LLM instance based on the specified type.
+        Parameters:
+            llm_type (str): The type of LLM to return. Valid values are 'azure' or 'openai'.
+        Returns:
+            Union[AzureChatOpenAI, ChatOpenAI]: The LLM instance.
+        """
+        if llm_type.lower() == "azure":
+            # # Get the Azure Credential
+            # credential = DefaultAzureCredential()
+            # token=credential.get_token("https://cognitiveservices.azure.com/.default").token
+            # if not token:
+            #     raise ValueError("Token is required for AzureChatOpenAI.")
+            # return AzureChatOpenAI(
+            #     azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
+            #     azure_deployment=os.environ["AZURE_OPENAI_API_BASE_MODEL"],
+            #     api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+            #     api_key=token
+            # )
+            pass
+        elif llm_type.lower() == "openai":
+            return ChatOpenAI(
+                api_key=os.environ["OPENAI_API_KEY"],
+                model_name="gpt-4"
+            )
+        elif llm_type.lower() == "openai_chat":
+            return ChatOpenAI(
+                api_key=os.environ["OPENAI_API_KEY"],
+                model_name="gpt-4"
+            )
+        else:
+            raise ValueError("Invalid llm_type. Use 'azure' or 'openai'.")
+    @staticmethod
+    def num_tokens_from_messages(messages):
+        """
+        Return the number of tokens used by a list of messages.
+        Adapted from the Open AI cookbook token counter
+        """
+        encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
+        # Each message is sandwiched with <|start|>role and <|end|>
+        # Hence, messages look like: <|start|>system or user or assistant{message}<|end|>
+        tokens_per_message = 3 # token1:<|start|>, token2:system(or user or assistant), token3:<|end|>
+        num_tokens = 0
+        for message in messages:
+            num_tokens += tokens_per_message
+            for key, value in message.items():
+                num_tokens += len(encoding.encode(value))
+        num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
+        return num_tokens

common/utility/logger.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import functools
+import datetime
+def log_call(func):
+    """
+    A decorator that logs when a function is called and when it finishes.
+    """
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        arg_list = ", ".join(
+            [repr(a) for a in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
+        )
+        print(f"[{timestamp}] 🚀 Calling: {func.__name__}({arg_list})")
+        try:
+            result = func(*args, **kwargs)
+            print(f"[{timestamp}] ✅ Finished: {func.__name__}")
+            return result
+        except Exception as e:
+            print(f"[{timestamp}] ❌ Error in {func.__name__}: {e}")
+            raise
+    return wrapper

pyproject.toml CHANGED Viewed

@@ -67,6 +67,7 @@ dependencies = [
     "logfire",
     "serpapi",
     "smithery>=0.4.4",
     # =======================
     # WEB SCRAPING
@@ -100,6 +101,7 @@ dependencies = [
     # =======================
     "scikit-learn>=1.7.2",
     "huggingface_hub<=1.1.4",
     # =======================
     # IPYNB SUPPORT

     "logfire",
     "serpapi",
     "smithery>=0.4.4",
+    "sendgrid",
     # =======================
     # WEB SCRAPING
     # =======================
     "scikit-learn>=1.7.2",
     "huggingface_hub<=1.1.4",
+    "datasets>=4.4.1",
     # =======================
     # IPYNB SUPPORT

run.py CHANGED Viewed

@@ -1,11 +1,215 @@
-import os
-import subprocess
-import sys
-# Use module execution to guarantee Streamlit runs inside the current interpreter
-subprocess.run([
-    sys.executable, "-m", "streamlit",
-    "run",
-    os.path.join("ui", "app.py"),
-    "--server.runOnSave", "true"
-])

+#!/usr/bin/env python3
+"""
+Universal App Launcher for AgenticAI Projects
+Usage:
+    python run.py <app_name> [--port PORT] [--help]
+Examples:
+    python run.py healthcare
+    python run.py deep-research --port 8502
+    python run.py stock-advisor
+    python run.py --list
+"""
+import sys
+import os
+import subprocess
+import argparse
+from pathlib import Path
+from typing import Dict, Optional
+# App registry - maps app names to their paths and entry points
+APP_REGISTRY: Dict[str, Dict[str, str]] = {
+    "healthcare": {
+        "path": "src/healthcare-assistant",
+        "entry": "app.py",
+        "description": "Healthcare Assistant - Medical information with RAG and web search"
+    },
+    "deep-research": {
+        "path": "src/deep-research",
+        "entry": "app.py",
+        "description": "Deep Research AI - Comprehensive research assistant"
+    },
+    "stock-advisor": {
+        "path": "src/stock-advisor",
+        "entry": "app.py",
+        "description": "Stock Advisor - Financial analysis and stock recommendations"
+    },
+    "travel-agent": {
+        "path": "src/travel-agent",
+        "entry": "app.py",
+        "description": "Travel Agent - Trip planning and travel recommendations"
+    },
+    "trip-planner": {
+        "path": "src/trip-planner",
+        "entry": "app.py",
+        "description": "Trip Planner - Detailed trip itinerary planning"
+    },
+    "chatbot": {
+        "path": "src/chatbot",
+        "entry": "app.py",
+        "description": "General Chatbot - Multi-purpose conversational AI"
+    },
+    "accessibility": {
+        "path": "src/accessibility",
+        "entry": "app.py",
+        "description": "Accessibility Tools - Assistive technology applications"
+    }
+}
+def print_banner():
+    """Print a nice banner."""
+    print("=" * 70)
+    print("🚀 AgenticAI Projects Launcher".center(70))
+    print("=" * 70)
+    print()
+def list_apps():
+    """List all available apps."""
+    print_banner()
+    print("Available Applications:\n")
+    max_name_len = max(len(name) for name in APP_REGISTRY.keys())
+    for name, config in sorted(APP_REGISTRY.items()):
+        print(f"  {name.ljust(max_name_len + 2)} - {config['description']}")
+    print("\n" + "=" * 70)
+    print("\nUsage: python run.py <app_name> [--port PORT]")
+    print("Example: python run.py healthcare --port 8501\n")
+def validate_app(app_name: str) -> Optional[Dict[str, str]]:
+    """
+    Validate that the app exists and its files are present.
+    Args:
+        app_name: Name of the app to validate
+    Returns:
+        App configuration dict if valid, None otherwise
+    """
+    if app_name not in APP_REGISTRY:
+        print(f"❌ Error: Unknown app '{app_name}'")
+        print(f"\nAvailable apps: {', '.join(sorted(APP_REGISTRY.keys()))}")
+        print("\nRun 'python run.py --list' to see all available apps.")
+        return None
+    config = APP_REGISTRY[app_name]
+    project_root = Path(__file__).parent
+    app_path = project_root / config["path"] / config["entry"]
+    if not app_path.exists():
+        print(f"❌ Error: App file not found at {app_path}")
+        return None
+    return config
+def launch_app(app_name: str, port: Optional[int] = None):
+    """
+    Launch a Streamlit app.
+    Args:
+        app_name: Name of the app to launch
+        port: Optional port number (default: 8501)
+    """
+    config = validate_app(app_name)
+    if not config:
+        sys.exit(1)
+    project_root = Path(__file__).parent
+    app_dir = project_root / config["path"]
+    app_file = config["entry"]
+    print_banner()
+    print(f"📱 Launching: {config['description']}")
+    print(f"📂 Location: {config['path']}")
+    print(f"🌐 Entry Point: {app_file}")
+    # Build streamlit command
+    cmd = ["streamlit", "run", app_file]
+    # Add port if specified
+    if port:
+        cmd.extend(["--server.port", str(port)])
+        print(f"🔌 Port: {port}")
+    else:
+        print(f"🔌 Port: 8501 (default)")
+    print("\n" + "=" * 70)
+    print("\n🎯 Starting application...\n")
+    try:
+        # Change to app directory and run
+        os.chdir(app_dir)
+        subprocess.run(cmd)
+    except KeyboardInterrupt:
+        print("\n\n👋 Application stopped by user")
+    except FileNotFoundError:
+        print("\n❌ Error: Streamlit not found. Please install it:")
+        print("   pip install streamlit")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ Error launching app: {e}")
+        sys.exit(1)
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Universal launcher for AgenticAI project applications",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python run.py healthcare              # Launch healthcare chatbot
+  python run.py deep-research --port 8502   # Launch on custom port
+  python run.py --list                  # List all available apps
+Available Apps:
+  """ + "\n  ".join(f"{name}: {config['description']}"
+                     for name, config in sorted(APP_REGISTRY.items()))
+    )
+    parser.add_argument(
+        "app_name",
+        nargs="?",
+        help="Name of the app to launch"
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        help="Port number for Streamlit server (default: 8501)"
+    )
+    parser.add_argument(
+        "--list",
+        action="store_true",
+        help="List all available apps"
+    )
+    args = parser.parse_args()
+    # Handle --list flag
+    if args.list:
+        list_apps()
+        return
+    # Require app name if not listing
+    if not args.app_name:
+        parser.print_help()
+        print("\n")
+        list_apps()
+        return
+    # Launch the app
+    launch_app(args.app_name, args.port)
+if __name__ == "__main__":
+    main()

src/deep-research/.env.name ADDED Viewed

	@@ -0,0 +1,9 @@

+OPENAI_API_KEY=""
+GROQ_API_KEY=""
+GOOGLE_API_KEY=""
+#https://serper.dev/api-keys
+SERPER_API_KEY=""
+#https://newsapi.org/v2/everything
+NEWS_API_KEY=""
+#https://app.sendgrid.com/ - bm80177
+SENDGRID_API_KEY=""

src/deep-research/Dockerfile ADDED Viewed

	@@ -0,0 +1,35 @@

+FROM python:3.12-slim
+ENV PYTHONUNBUFFERED=1 \
+    DEBIAN_FRONTEND=noninteractive \
+    PYTHONPATH=/app:/app/common:$PYTHONPATH
+WORKDIR /app
+# System deps
+RUN apt-get update && apt-get install -y \
+    git build-essential curl \
+    && rm -rf /var/lib/apt/lists/*
+# Install uv
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+ENV PATH="/root/.local/bin:$PATH"
+# Copy project metadata
+COPY pyproject.toml .
+COPY uv.lock .
+# Copy required folders
+COPY common/ ./common/
+COPY src/deep-research/ ./src/deep-research/
+# Install dependencies using uv, then export and install with pip to system
+RUN uv sync --frozen --no-dev && \
+    uv pip install -e . --system
+# Copy entry point
+COPY run.py .
+EXPOSE 7860
+CMD ["python", "run.py", "deep-research", "--port", "7860"]

src/deep-research/README.md ADDED Viewed

	@@ -0,0 +1,191 @@

+---
+title: AI Deep Researcher        # Give your app a title
+emoji: 🤖                       # Pick an emoji
+colorFrom: indigo               # Theme start color
+colorTo: blue                   # Theme end color
+sdk: docker                     # SDK type
+sdk_version: "4.39.0"           # Example Gradio version
+app_file: ui/app.py             # <-- points to your app.py inside ui/
+pinned: false
+---
+# AI Deep Researcher
+**AI Deep Researcher** is a generative AI learning project built using the OpenAI Agentic Framework. This app performs deep-level web research based on user queries and generates a well-structured, consolidated report.
+To achieve this, the project integrates the following technologies and AI features:
+- **OpenAI SDK**
+- **OpenAI Agents**
+- **OpenAI WebSearch Tool**
+- **Serper API** - a free alternative to OpenAI WebSearch Tool (https://serper.dev/api-keys)
+- **News API** (https://newsapi.org/v2/everything)
+- **SendGrid** (for emailing report)
+- **LLMs** - (OpenAI, Geminia, Groq)
+## How it works?
+The system is a multi-agent solution, where each agent has a specific responsibility:
+1. **Planner Agent**
+    - Receives the user query and builds a structured query plan.
+2. **Guardrail Agent**
+    - Validates user input and ensures compliance.
+    - Stops the workflow if the input contains inappropriate or unparliamentary words.
+3. **Search Agent**
+    - Executes the query plan.
+    - Runs multiple web searches in parallel to gather data.
+4. **Writer Agent**
+    - Reads results from all search agents.
+    - Generates a well-formatted, consolidated report.
+5. **Email Agent (not functional at present)**
+    - Responsible for sending the report via email using SendGrid.
+6. **Orchestrator**
+    - The entry point of the system.
+    - Facilitates communication and workflow between all agents.
+## Project Folder Structure
+```
+deep-research/
+├── ui/
+│   ├── app.py                    # Main Streamlit application entry point
+│   └── __pycache__/              # Python bytecode cache
+├── appagents/
+│   ├── __init__.py               # Package initialization
+│   ├── orchestrator.py           # Orchestrator agent - coordinates all agents
+│   ├── planner_agent.py          # Planner agent - builds structured query plans
+│   ├── guardrail_agent.py        # Guardrail agent - validates user input
+│   ├── search_agent.py           # Search agent - performs web searches
+│   ├── writer_agent.py           # Writer agent - generates consolidated reports
+│   ├── email_agent.py            # Email agent - sends reports via email (not functional)
+│   └── __pycache__/              # Python bytecode cache
+├── core/
+│   ├── __init__.py               # Package initialization
+│   ├── logger.py                 # Centralized logging configuration
+│   └── __pycache__/              # Python bytecode cache
+├── tools/
+│   ├── __init__.py               # Package initialization
+│   ├── google_tools.py           # Google search utilities
+│   ├── time_tools.py             # Time-related utility functions
+│   └── __pycache__/              # Python bytecode cache
+├── prompts/
+│   ├── __init__.py               # Package initialization (if present)
+│   ├── planner_prompt.txt        # Prompt for planner agent (if present)
+│   ├── guardrail_prompt.txt      # Prompt for guardrail agent (if present)
+│   ├── search_prompt.txt         # Prompt for search agent (if present)
+│   └── writer_prompt.txt         # Prompt for writer agent (if present)
+├── Dockerfile                     # Docker configuration for container deployment
+├── pyproject.toml                 # Project metadata and dependencies (copied from root)
+├── uv.lock                        # Locked dependency versions (copied from root)
+├── README.md                      # Project documentation
+└── run.py                         # Script to run the application locally (if present)
+```
+## File Descriptions
+### UI Layer (`ui/`)
+- **app.py** - Main Streamlit web application that provides the user interface. Handles:
+  - Text input for research queries
+  - Run/Download buttons (PDF, Markdown)
+  - Real-time streaming of results
+  - Display of final research reports
+  - Session state management
+  - Button enable/disable during streaming
+### Agents (`appagents/`)
+- **orchestrator.py** - Central coordinator that:
+  - Manages the multi-agent workflow
+  - Handles communication between all agents
+  - Streams results back to the UI
+  - Implements the research pipeline
+- **planner_agent.py** - Creates a structured plan for the query:
+  - Breaks down user query into actionable research steps
+  - Defines search queries and research angles
+- **guardrail_agent.py** - Validates user input:
+  - Checks for inappropriate content
+  - Ensures compliance with policies
+  - Stops workflow if violations detected
+- **search_agent.py** - Executes web searches:
+  - Performs parallel web searches
+  - Integrates with Google Search / Serper API
+  - Gathers raw research data
+- **writer_agent.py** - Generates final report:
+  - Consolidates search results
+  - Formats findings into structured markdown
+  - Creates well-organized research summaries
+- **email_agent.py** - Email delivery (not functional):
+  - Intended to send reports via SendGrid
+  - Currently not integrated in the workflow
+### Core Utilities (`core/`)
+- **logger.py** - Centralized logging configuration:
+  - Provides consistent logging across agents
+  - Handles log levels and formatting
+### Tools (`tools/`)
+- **google_tools.py** - Google/Serper API wrapper:
+  - Executes web searches
+  - Handles API authentication and response parsing
+- **time_tools.py** - Utility functions:
+  - Time-related operations
+  - Timestamp management
+### Configuration Files
+- **Dockerfile** - Container deployment:
+  - Builds Docker image with Python 3.12
+  - Installs dependencies using `uv`
+  - Sets up Streamlit server on port 7860
+  - Configures PYTHONPATH for module imports
+- **pyproject.toml** - Project metadata:
+  - Package name: "agents"
+  - Python version requirement: 3.12
+  - Lists all dependencies (OpenAI, LangChain, Streamlit, etc.)
+- **uv.lock** - Dependency lock file:
+  - Ensures reproducible builds
+  - Pins exact versions of all dependencies
+## Key Technologies
+| Component | Technology | Purpose |
+|-----------|-----------|---------|
+| LLM Framework | OpenAI Agents | Multi-agent orchestration |
+| Web Search | Serper API / Google Search | Research data gathering |
+| Web UI | Streamlit | User interface and interaction |
+| Document Export | ReportLab | PDF generation from markdown |
+| Async Operations | AsyncIO | Parallel agent execution |
+| Dependencies | UV | Fast Python package management |
+| Containerization | Docker | Cloud deployment |
+## Running Locally
+```bash
+# Install dependencies
+uv sync
+# Set environment variables defined in .env.name file
+export OPENAI_API_KEY="your-key"
+export SERPER_API_KEY="your-key"
+# Run the Streamlit app
+python run.py
+```
+## Deployment
+The project is deployed on Hugging Face Spaces as a Docker container:
+- **Space**: https://huggingface.co/spaces/mishrabp/deep-research
+- **URL**: https://huggingface.co/spaces/mishrabp/deep-research
+- **Trigger**: Automatic deployment on push to `main` branch
+- **Configuration**: `.github/workflows/deep-research-app-hf.yml`

src/deep-research/app.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import streamlit as st
+import asyncio
+import time
+import html
+from io import BytesIO
+import os
+import sys
+# Add project root
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".")))
+from dotenv import load_dotenv
+from reportlab.platypus import SimpleDocTemplate, Paragraph
+from reportlab.lib.styles import getSampleStyleSheet
+from appagents.orchestrator import Orchestrator
+from agents import SQLiteSession
+load_dotenv(override=True)
+# --------------------
+# Page config
+# --------------------
+st.set_page_config(page_title="Deep Research AI", layout="wide", page_icon="🧠")
+# --------------------
+# Premium CSS
+# --------------------
+st.markdown("""
+<style>
+    /* Global Defaults */
+    .stApp {
+        background-color: #f8f9fa;
+        font-family: 'Inter', sans-serif;
+    }
+    /* Remove default Streamlit top padding but add space for Fixed Header - Revert: Just remove top padding */
+    .block-container {
+        padding-top: 1rem !important; /* Small buffer */
+    }
+    /* Sticky Header */
+    header[data-testid="stHeader"] { display: none; } /* Hide default streamlit header */
+    .header-container {
+        position: sticky;
+        top: 0;
+        z-index: 999;
+        background: linear-gradient(135deg, #0f2027 0%, #203a43 50%, #2c5364 100%);
+        color: #ffffff;
+        padding: 3rem 2rem;
+        display: flex;
+        justify-content: space-between;
+        align-items: center;
+        box-shadow: 0 4px 20px rgba(0,0,0,0.15);
+        margin-top: -4rem; /* Pull up aggressively to cover top gap */
+        margin-left: -5rem;
+        margin-right: -5rem;
+        border-bottom: none;
+        border-radius: 0 0 1rem 1rem;
+    }
+    .app-brand {
+        font-family: 'Inter', sans-serif;
+        font-size: 1.6rem;
+        font-weight: 700;
+        letter-spacing: -0.02em;
+        color: #ffffff;
+        display: flex;
+        gap: 0.75rem;
+        align-items: center;
+    }
+    /* Centered Search Area */
+    .search-wrapper {
+        max-width: 800px;
+        margin: 4rem auto 2rem auto;
+        text-align: center;
+    }
+    .search-headline {
+        font-size: 2.5rem;
+        font-weight: 800;
+        color: #111;
+        margin-bottom: 0.5rem;
+        letter-spacing: -0.03em;
+    }
+    .search-subtext {
+        font-size: 1.1rem;
+        color: #666;
+        margin-bottom: 2.5rem;
+    }
+    /* Input styling override */
+    .stTextArea textarea {
+        border-radius: 12px !important;
+        border: 1px solid #e0e0e0 !important;
+        padding: 1rem !important;
+        background: white !important;
+        box-shadow: 0 4px 12px rgba(0,0,0,0.03) !important;
+        font-size: 1.1rem !important;
+    }
+    .stTextArea textarea:focus {
+        border-color: #667eea !important;
+        box-shadow: 0 4px 15px rgba(102, 126, 234, 0.1) !important;
+    }
+    /* Custom Button */
+    .stButton button {
+        background: black !important;
+        color: white !important;
+        border-radius: 30px !important;
+        padding: 0.5rem 2rem !important;
+        border: none !important;
+        box-shadow: 0 4px 10px rgba(0,0,0,0.2) !important;
+        transition: transform 0.1s ease;
+    }
+    .stButton button:hover {
+        transform: scale(1.02);
+    }
+    /* Report Paper Style */
+    .report-paper {
+        max-width: 850px;
+        margin: 2rem auto;
+        background: white;
+        padding: 4rem;
+        min-height: 800px;
+        box-shadow: 0 1px 3px rgba(0,0,0,0.1), 0 20px 40px rgba(0,0,0,0.05);
+        color: #2c3e50;
+        border: 1px solid #f0f0f0;
+    }
+</style>
+""", unsafe_allow_html=True)
+# --------------------
+# Session State
+# --------------------
+if "session_id" not in st.session_state:
+    st.session_state.session_id = str(id(st))
+if "final_report" not in st.session_state:
+    st.session_state.final_report = ""
+if "is_researching" not in st.session_state:
+    st.session_state.is_researching = False
+if "research_logs" not in st.session_state:
+    st.session_state.research_logs = []
+# --------------------
+# Helpers
+# --------------------
+def make_pdf_bytes(text: str) -> bytes:
+    buf = BytesIO()
+    doc = SimpleDocTemplate(buf, topMargin=0.5*72, bottomMargin=0.5*72, leftMargin=0.75*72, rightMargin=0.75*72)
+    styles = getSampleStyleSheet()
+    story = []
+    for line in text.split("\n"):
+        stripped = line.strip()
+        if not stripped:
+            story.append(Paragraph(" ", styles["Normal"]))
+            continue
+        if stripped.startswith("# "):
+            story.append(Paragraph(html.escape(stripped[2:]), styles["Heading1"]))
+        elif stripped.startswith("## "):
+            story.append(Paragraph(html.escape(stripped[3:]), styles["Heading2"]))
+        elif stripped.startswith("- "):
+            story.append(Paragraph("• " + html.escape(stripped[2:]), styles["Normal"]))
+        else:
+            story.append(Paragraph(html.escape(stripped), styles["Normal"]))
+    doc.build(story)
+    buf.seek(0)
+    return buf.read()
+# --------------------
+# Logic
+# --------------------
+async def run_research(query: str):
+    session_id = st.session_state.session_id
+    session = SQLiteSession(f"session_{session_id}.db")
+    orchestrator = Orchestrator(session=session)
+    report_content = ""
+    status_container = st.status("🔍 Researching...", expanded=True)
+    try:
+        async for chunk in orchestrator.run(query):
+            # Filtering heuristic: Orchestrator yields status messages then the final report.
+            # Status messages are short and specific.
+            if (chunk.startswith("View trace") or
+                chunk.startswith("Searches") or
+                chunk.startswith("Report written") or
+                chunk.startswith("Starting")):
+                status_container.markdown(chunk)
+            else:
+                # Assume this is the report content (or the final error note)
+                report_content = chunk
+                status_container.markdown("Processing final output...")
+        st.session_state.final_report = report_content
+        st.session_state.is_researching = False
+        status_container.update(label="✅ Research Complete", state="complete", expanded=False)
+        st.rerun()
+    except Exception as e:
+        status_container.update(label="❌ Error", state="error")
+        st.error(f"Error: {e}")
+        st.session_state.is_researching = False
+# --------------------
+# Layout
+# --------------------
+# Custom Header
+st.markdown("""
+<div class="header-container">
+    <div class="app-brand">
+        <span>🧠</span> Deep Research <i>(OpenAI Agentic)</i>
+    </div>
+    <div>
+        <!-- Could add profile or other links here -->
+    </div>
+</div>
+""", unsafe_allow_html=True)
+# Sidebar Settings
+with st.sidebar:
+    st.header("⚙️ Configuration")
+    research_depth = st.select_slider("Research Depth", options=["Quick", "Standard", "Deep"], value="Standard")
+    report_format = st.selectbox("Report Format", ["Academic", "Business", "Creative"])
+    st.caption("Settings affect the tone and depth of the final report.")
+    st.divider()
+    if st.button("🗑️ Clear History"):
+        st.session_state.final_report = ""
+        st.rerun()
+# Main Interface
+if not st.session_state.final_report and not st.session_state.is_researching:
+    # Centered Input View
+    st.markdown("""
+    <div class="search-wrapper">
+        <div class="search-headline">What do you want to know?</div>
+        <div class="search-subtext">Deep Research will browse the web, analyze sources, and write a comprehensive report for you.</div>
+    </div>
+    """, unsafe_allow_html=True)
+    col_c1, col_c2, col_c3 = st.columns([1, 2, 1])
+    with col_c2:
+        query = st.text_area("Research Topic", height=60, placeholder="e.g. The future of quantum computing in drug discovery...", label_visibility="collapsed")
+        col_b1, col_b2, col_b3 = st.columns([1, 1, 1])
+        with col_b2:
+            if st.button("Start Research", use_container_width=True):
+                if query.strip():
+                    st.session_state.is_researching = True
+                    st.session_state.current_query = query
+                    st.rerun()
+elif st.session_state.is_researching:
+    # Researching View
+    st.markdown("""
+    <div class="search-wrapper">
+        <div class="search-headline">Compiling Report...</div>
+    </div>
+    """, unsafe_allow_html=True)
+    # Trigger async run
+    asyncio.run(run_research(st.session_state.current_query))
+else:
+    # Result View - Title removed to let Sticky Header be the main branding,
+    # and Report itself be the focus.
+    # Action Toolbar
+    col_a1, col_a2, col_a3, col_a4 = st.columns([2, 1, 1, 2])
+    with col_a2:
+        pdf_bytes = make_pdf_bytes(st.session_state.final_report)
+        st.download_button("📄 Download PDF", pdf_bytes, "report.pdf", mime="application/pdf", use_container_width=True)
+    with col_a3:
+        if st.button("🔄 New Search", use_container_width=True):
+             st.session_state.final_report = ""
+             st.rerun()
+    # Final Report Render
+    # We use a container with a class to apply the 'sheet' look via global CSS if possible,
+    # or just use standard Markdown rendering which looks best.
+    with st.container():
+        st.markdown(st.session_state.final_report)

src/deep-research/appagents/__init__.py ADDED Viewed

File without changes

src/deep-research/appagents/email_agent.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import os
+from typing import Dict
+import sendgrid
+from sendgrid.helpers.mail import Email, Mail, Content, To
+from agents import Agent, function_tool
+from core.logger import log_call
+@function_tool
+@log_call
+def send_email(subject: str, html_body: str) -> Dict[str, str]:
+    """ Send an email with the given subject and HTML body """
+    sg = sendgrid.SendGridAPIClient(api_key=os.environ.get('SENDGRID_API_KEY'))
+    from_email = Email("bm80177@gmail.com") # put your verified sender here
+    to_email = To("bibhup_mishra@yahoo.com") # put your recipient here
+    content = Content("text/html", html_body)
+    mail = Mail(from_email, to_email, subject, content).get()
+    response = sg.client.mail.send.post(request_body=mail)
+    print("Email response", response.status_code)
+    return {"status": "success"}
+INSTRUCTIONS = """You are able to send a nicely formatted HTML email based on a detailed report.
+You will be provided with a detailed report. You should use your tool to send one email, providing the
+report converted into clean, well presented HTML with an appropriate subject line."""
+email_agent = Agent(
+    name="Email agent",
+    instructions=INSTRUCTIONS,
+    tools=[send_email],
+    model="gpt-4o-mini",
+)

src/deep-research/appagents/guardrail_agent.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+from pydantic import BaseModel
+from agents import (
+    Agent,
+    Runner,
+    input_guardrail,
+    GuardrailFunctionOutput,
+)
+from tools.time_tools import TimeTools
+from openai import AsyncOpenAI
+# ✅ Step 1: Define structured output schema
+class UnparliamentaryCheckOutput(BaseModel):
+    has_unparliamentary_language: bool
+    explanation: str
+# ✅ Step 2: Define the LLM guardrail agent
+guardrail_agent = Agent(
+    name="Unparliamentary language check",
+    instructions=(
+        "Analyze the user input and determine if it contains any unparliamentary, "
+        "offensive, or disrespectful language. "
+        "If it does, set has_unparliamentary_language=true and explain briefly why. "
+        "Otherwise, set it to false."
+    ),
+    output_type=UnparliamentaryCheckOutput,
+    model="gpt-4o-mini",
+)
+# ✅ Step 3: Use the input guardrail decorator
+@input_guardrail
+async def guardrail_against_unparliamentary(ctx, agent, message: str):
+    """Guardrail function that blocks messages with unparliamentary words."""
+    result = await Runner.run(guardrail_agent, message, context=ctx.context)
+    has_unparliamentary_language = result.final_output.has_unparliamentary_language
+    return GuardrailFunctionOutput(
+        output_info={
+            "found_unparliamentary_word": result.final_output.model_dump()
+        },
+        tripwire_triggered=has_unparliamentary_language,
+    )

src/deep-research/appagents/orchestrator.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from agents import Runner, trace, gen_trace_id, SQLiteSession
+from appagents.search_agent import search_agent
+from appagents.planner_agent import planner_agent, WebSearchItem, WebSearchPlan
+from appagents.writer_agent import writer_agent, ReportData
+from appagents.email_agent import email_agent
+from agents.exceptions import InputGuardrailTripwireTriggered
+from core.logger import log_call
+import asyncio
+class Orchestrator:
+    def __init__(self, session: SQLiteSession | None = None):
+        self.session = session or SQLiteSession()
+    @log_call
+    async def run(self, query: str):
+        """ Run the deep research process, yielding the status updates and the final report"""
+        trace_id = gen_trace_id()
+        with trace("Deep Research Orchestrator", trace_id=trace_id):
+            print(f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}")
+            yield f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}"
+            print("Starting research...")
+            search_plan = await self.plan_searches(query)
+            if not search_plan or not getattr(search_plan, "searches", []):
+                note = getattr(search_plan, "note", "")
+                if "unparliamentary" in note.lower():
+                    print("⚠️ Guardrail triggered – unparliamentary language detected.")
+                    yield note
+                else:
+                    yield note or "No search results found, ending research."
+                    return
+            yield "Searches planned, starting to search..."
+            search_results = await self.perform_searches(search_plan)
+            yield "Searches complete, writing report..."
+            report = await self.write_report(query, search_results)
+            yield "Report written, sending email..."
+            # await self.send_email(report)
+            # yield "Email sent, research complete"
+            yield report.markdown_report
+    @log_call
+    async def plan_searches(self, query: str) -> WebSearchPlan:
+        """Plan the searches to perform for the query."""
+        print("Planning searches...")
+        try:
+            result = await Runner.run(
+                planner_agent,              # use self. unless global
+                f"Query: {query}",
+                session=self.session,
+            )
+            print(f"Will perform {len(result.final_output.searches)} searches")
+            return result.final_output_as(WebSearchPlan)
+        except InputGuardrailTripwireTriggered as e:
+            explanation = getattr(e, "result", {}).get("output_info", {}).get(
+                "found_unparliamentary_word", {}
+            ).get("explanation", "")
+            print("⚠️ Guardrail triggered – unparliamentary language detected.")
+            return WebSearchPlan(searches=[], note=f"Blocked due to unparliamentary input. {explanation}")
+        except Exception as e:
+            print(f"❌ Error during planning: {e}")
+            return WebSearchPlan(searches=[], note="An error occurred while planning searches.")
+    @log_call
+    async def perform_searches(self, search_plan: WebSearchPlan) -> list[str]:
+        """ Perform the searches to perform for the query """
+        print("Searching...")
+        num_completed = 0
+        tasks = [asyncio.create_task(self.search(item)) for item in search_plan.searches]
+        results = []
+        for task in asyncio.as_completed(tasks):
+            result = await task
+            if result is not None:
+                results.append(result)
+            num_completed += 1
+            print(f"Searching... {num_completed}/{len(tasks)} completed")
+        print("Finished searching")
+        return results
+    @log_call
+    async def search(self, item: WebSearchItem) -> str | None:
+        """ Perform a search for the query """
+        input = f"Search term: {item.query}\nReason for searching: {item.reason}"
+        try:
+            result = await Runner.run(
+                search_agent,
+                input,
+            )
+            return str(result.final_output)
+        except Exception:
+            return None
+    @log_call
+    async def write_report(self, query: str, search_results: list[str]) -> ReportData:
+        """ Write the report for the query """
+        print("Thinking about report...")
+        input = f"Original query: {query}\nSummarized search results: {search_results}"
+        result = await Runner.run(
+            writer_agent,
+            input,
+        )
+        print("Finished writing report")
+        return result.final_output_as(ReportData)
+    @log_call
+    async def send_email(self, report: ReportData) -> None:
+        print("Writing email...")
+        result = await Runner.run(
+            email_agent,
+            report.markdown_report,
+        )
+        print("Email sent")
+        return report

src/deep-research/appagents/planner_agent.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+from pydantic import BaseModel, Field
+from agents import Agent, OpenAIChatCompletionsModel
+from openai import AsyncOpenAI
+from tools.time_tools import TimeTools
+from appagents.guardrail_agent import guardrail_against_unparliamentary
+HOW_MANY_SEARCHES = 10
+INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of web searches \
+to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for.  \
+Use the tool to find current date & time, and use it where relevant to inform your search and summary."
+class WebSearchItem(BaseModel):
+    reason: str = Field(description="Your reasoning for why this search is important to the query.")
+    query: str = Field(description="The search term to use for the web search.")
+    current_date_time: str = Field(description="Current date and time.")
+class WebSearchPlan(BaseModel):
+    searches: list[WebSearchItem] = Field(description="A list of web searches to perform to best answer the query.")
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
+GROQ_BASE_URL = "https://api.groq.com/openai/v1"
+groq_api_key = os.getenv('GROQ_API_KEY')
+groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
+groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
+openai_model = "gpt-4.1-mini"
+# Note: Many models do not like tool call and json output_schema used together.
+planner_agent = Agent(
+    name="PlannerAgent",
+    instructions=INSTRUCTIONS,
+    model=openai_model,
+    tools=[TimeTools.current_datetime],
+    output_type=WebSearchPlan,
+    input_guardrails=[guardrail_against_unparliamentary],
+)

src/deep-research/appagents/search_agent.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import os
+from agents import Agent, OpenAIChatCompletionsModel, WebSearchTool
+from openai import AsyncOpenAI
+from agents.model_settings import ModelSettings
+from tools.google_tools import GoogleTools
+# INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and \
+# produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 \
+# words. Capture the main points. Write succintly, no need to have complete sentences or good \
+# grammar. This will be consumed by someone synthesizing a report, so it's vital you capture the \
+# essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
+# INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web and produce a detailed synthesis of the results. \
+# The output must be structured into sections, one for each search result provided by the tool. \
+# For each result, you MUST include the full link/URL and the title. \
+# Your response should capture the main points and relevant details from all sources. \
+# Do not add any personal commentary, introductions, or conclusions. \
+# Format the entire output as a single, detailed block of text in markdown format, ensuring ALL source links are visible and preserved."
+INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and \
+produce a concise summary of the results. The summary must 3-5 paragraphs and less than 500 \
+words. Capture the main points. Write succintly, no need to have complete sentences or good \
+grammar. This will be consumed by someone synthesizing a report, so it's vital you capture the \
+essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
+# search_agent = Agent(
+#     name="Search agent",
+#     instructions=INSTRUCTIONS,
+#     tools=[WebSearchTool(search_context_size="low")],
+#     # tools=[GoogleTools.search],
+#     model="gpt-4o-mini",
+#     model_settings=ModelSettings(tool_choice="required"),
+# )
+# -----------------------------
+# CONNECT TO MCP SERVER
+# -----------------------------
+async def setup_mcp_tools():
+    """
+    Starts the MCP server via stdio and returns its list of tools
+    that can be attached to the agent.
+    """
+    # Absolute path ensures the script is found even from a notebook
+    import os
+    script_path = os.path.abspath("../mcp/search-server.py")
+    params = {
+        "command": "uvx",  # or "uv" depending on your environment
+        "args": ["run", script_path],
+    }
+    # Start MCP server and list available tools
+    async with MCPServerStdio(
+        params=params,
+        client_session_timeout_seconds=60,
+        verbose=True,  # helpful for debugging
+    ) as server:
+        mcp_tools = await server.list_tools()
+        print(f"✅ Connected to MCP server with {len(mcp_tools)} tool(s).")
+        return mcp_tools
+# # Note: Gemini does not like
+# search_agent = Agent(
+#     name="Search agent",
+#     instructions=INSTRUCTIONS,
+#     # tools=[WebSearchTool(search_context_size="low")],
+#     tools=[GoogleTools.search],
+#     model=gemini_model,
+#     model_settings=ModelSettings(tool_choice="required"),
+# )
+search_agent = Agent(
+    name="Search agent",
+    instructions=INSTRUCTIONS,
+    # tools=[WebSearchTool(search_context_size="low")],
+    tools=[GoogleTools.search],
+    model=gemini_model,
+    model_settings=ModelSettings(tool_choice="required"),
+)

src/deep-research/appagents/writer_agent.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+from pydantic import BaseModel, Field
+from agents import Agent, OpenAIChatCompletionsModel, WebSearchTool
+from openai import AsyncOpenAI
+INSTRUCTIONS = (
+    "You are a senior researcher tasked with writing a cohesive report for a research query. "
+    "You will be provided with the original query, and some initial research done by a research assistant.\n"
+    "You should first come up with an outline for the report that describes the structure and "
+    "flow of the report. Then, generate the report and return that as your final output.\n"
+    "The final output should be in markdown format, and it should be lengthy and detailed. Aim "
+    "for 5-10 pages of content, at least 1000 words."
+)
+class ReportData(BaseModel):
+    short_summary: str = Field(description="A short 2-3 sentence summary of the findings.")
+    markdown_report: str = Field(description="The final report")
+    follow_up_questions: list[str] = Field(description="Suggested topics to research further")
+GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+google_api_key = os.getenv('GOOGLE_API_KEY')
+gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
+gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
+# writer_agent = Agent(
+#     name="WriterAgent",
+#     instructions=INSTRUCTIONS,
+#     model="gpt-5-mini",
+#     output_type=ReportData,
+# )
+writer_agent = Agent(
+    name="WriterAgent",
+    instructions=INSTRUCTIONS,
+    model=gemini_model,
+    output_type=ReportData,
+)

src/deep-research/core/__init__.py ADDED Viewed

File without changes

src/deep-research/core/logger.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import functools
+import datetime
+def log_call(func):
+    """
+    A decorator that logs when a function is called and when it finishes.
+    """
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        arg_list = ", ".join(
+            [repr(a) for a in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
+        )
+        print(f"[{timestamp}] 🚀 Calling: {func.__name__}({arg_list})")
+        try:
+            result = func(*args, **kwargs)
+            # print(f"[{timestamp}] ✅ Finished: {func.__name__}")
+            return result
+        except Exception as e:
+            print(f"[{timestamp}] ❌ Error in {func.__name__}: {e}")
+            raise
+    return wrapper

src/deep-research/prompts/__init__.py ADDED Viewed

File without changes

src/deep-research/tools/__init__.py ADDED Viewed

File without changes

src/deep-research/tools/google_tools.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import os
+import requests
+from dotenv import load_dotenv
+from agents import function_tool
+from core.logger import log_call
+# Load environment variables once
+load_dotenv()
+# ============================================================
+# 🔹 GOOGLE SEARCH TOOLSET (Serper.dev API)
+# ============================================================
+class GoogleTools:
+    """
+    GoogleTools provides function tools to perform web searches
+    using the Serper.dev API (Google Search). I am a fallback for
+    retrieving recent information from the web.
+    """
+    @staticmethod
+    @function_tool
+    @log_call
+    def search(query: str, num_results: int = 3) -> str:
+        """
+        Perform a general Google search using Serper.dev API.
+        Parameters:
+        -----------
+        query : str
+            The search query string, e.g., "latest Tesla stock news".
+        num_results : int, optional (default=3)
+            Maximum number of search results to return.
+        Returns:
+        --------
+        str
+            Nicely formatted search results.
+        """
+        try:
+            api_key = os.getenv("SERPER_API_KEY")
+            if not api_key:
+                return "❌ Missing SERPER_API_KEY in environment variables."
+            url = "https://google.serper.dev/search"
+            headers = {
+                "X-API-KEY": api_key,
+                "Content-Type": "application/json"
+            }
+            payload = {
+                "q": query,
+                "gl": "us",   # country code (optional)
+                "hl": "en",   # language code (optional)
+            }
+            response = requests.post(url, headers=headers, json=payload)
+            response.raise_for_status()
+            data = response.json()
+            organic_results = data.get("organic", [])
+            if not organic_results:
+                return "No search results found."
+            formatted = []
+            for item in organic_results[:num_results]:
+                title = item.get("title", "No title")
+                link = item.get("link", "No link")
+                snippet = item.get("snippet", "")
+                formatted.append(
+                    f"Title: {title}\nLink: {link}\nSnippet: {snippet}\n"
+                )
+                # print(formatted[-1])  # Log each result
+            return "\n".join(formatted)
+        except requests.exceptions.RequestException as e:
+            return f"⚠️ Network error during Google search: {e}"
+        except Exception as e:
+            return f"⚠️ Error performing Google search: {e}"
+# ============================================================
+# 🔹 OPENAI & OTHER MODEL TOOLS
+# ============================================================
+class ModelTools:
+    """
+    ModelTools provides function tools to interact with LLM APIs
+    such as OpenAI, Gemini, or Groq.
+    Features:
+    - Send prompts to a language model.
+    - Receive structured text completions.
+    - Can be extended to support multiple LLM providers.
+    """
+    @staticmethod
+    @function_tool
+    def query_openai(prompt: str, model: str = "gpt-4o-mini") -> str:
+        """
+        Query an OpenAI language model with a prompt.
+        Parameters:
+        -----------
+        prompt : str
+            User-provided prompt for the model.
+        model : str, optional (default="gpt-4o-mini")
+            Model name to query (e.g., "gpt-4o-mini", "gpt-4").
+        Returns:
+        --------
+        str
+            Model's response content as text.
+            If an error occurs (network/API), returns an error message.
+        Example:
+        --------
+        query_openai("Explain AI in finance")
+        Output:
+        "AI in finance refers to the use of machine learning and natural language
+        processing techniques to automate trading, risk assessment, and customer service..."
+        """
+        try:
+            from openai import OpenAI  # delayed import
+            client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+            response = client.chat.completions.create(
+                model=model,
+                messages=[{"role": "user", "content": prompt}],
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            return f"Error querying OpenAI API: {e}"

src/deep-research/tools/time_tools.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from datetime import datetime
+from agents import function_tool
+from core.logger import log_call
+class TimeTools:
+    """Provides tools related to current date and time."""
+    @staticmethod
+    @function_tool
+    @log_call
+    def current_datetime(format: str = "%Y-%m-%d %H:%M:%S") -> str:
+        """
+        Returns the current date and time as a formatted string.
+        Args:
+            format (str): Optional datetime format (default: "YYYY-MM-DD HH:MM:SS")
+        Returns:
+            str: Current date and time in the specified format
+        """
+        now = datetime.now()
+        return now.strftime(format)

uv.lock CHANGED Viewed

@@ -14,6 +14,7 @@ dependencies = [
     { name = "autogen-ext", extra = ["grpc", "mcp", "ollama", "openai"] },
     { name = "beautifulsoup4" },
     { name = "chromadb" },
     { name = "ddgs" },
     { name = "duckduckgo-search" },
     { name = "faiss-cpu" },
@@ -52,6 +53,7 @@ dependencies = [
     { name = "reportlab" },
     { name = "requests" },
     { name = "scikit-learn" },
     { name = "sentence-transformers" },
     { name = "serpapi" },
     { name = "smithery" },
@@ -79,6 +81,7 @@ requires-dist = [
     { name = "autogen-ext", extras = ["grpc", "mcp", "ollama", "openai"], specifier = ">=0.7.5" },
     { name = "beautifulsoup4", specifier = ">=4.12.3" },
     { name = "chromadb", specifier = "==1.3.5" },
     { name = "ddgs", specifier = ">=9.9.2" },
     { name = "duckduckgo-search" },
     { name = "faiss-cpu", specifier = ">=1.13.0" },
@@ -117,6 +120,7 @@ requires-dist = [
     { name = "reportlab", specifier = ">=4.4.5" },
     { name = "requests", specifier = ">=2.32.3" },
     { name = "scikit-learn", specifier = ">=1.7.2" },
     { name = "sentence-transformers", specifier = ">=5.1.2" },
     { name = "serpapi" },
     { name = "smithery", specifier = ">=0.4.4" },
@@ -719,6 +723,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" },
 ]
 [[package]]
 name = "ddgs"
 version = "9.9.2"
@@ -757,6 +786,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" },
 ]
 [[package]]
 name = "distro"
 version = "1.9.0"
@@ -917,6 +955,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
 ]
 [[package]]
 name = "gitdb"
 version = "4.0.12"
@@ -2039,6 +2082,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
 ]
 [[package]]
 name = "multitasking"
 version = "0.0.12"
@@ -3083,6 +3142,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
 ]
 [[package]]
 name = "python-multipart"
 version = "0.0.20"
@@ -3364,6 +3432,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ce/69/c5c7807fd007dad4f48e0a5f2153038dc96e8725d3345b9ee31b2b7bed46/scipy-1.16.3-cp312-cp312-win_arm64.whl", hash = "sha256:a8a26c78ef223d3e30920ef759e25625a0ecdd0d60e5a8818b7513c3e5384cf2", size = 25463014, upload-time = "2025-10-28T17:33:25.975Z" },
 ]
 [[package]]
 name = "sentence-transformers"
 version = "5.1.2"
@@ -4019,6 +4101,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
 ]
 [[package]]
 name = "wikipedia"
 version = "1.4.0"

     { name = "autogen-ext", extra = ["grpc", "mcp", "ollama", "openai"] },
     { name = "beautifulsoup4" },
     { name = "chromadb" },
+    { name = "datasets" },
     { name = "ddgs" },
     { name = "duckduckgo-search" },
     { name = "faiss-cpu" },
     { name = "reportlab" },
     { name = "requests" },
     { name = "scikit-learn" },
+    { name = "sendgrid" },
     { name = "sentence-transformers" },
     { name = "serpapi" },
     { name = "smithery" },
     { name = "autogen-ext", extras = ["grpc", "mcp", "ollama", "openai"], specifier = ">=0.7.5" },
     { name = "beautifulsoup4", specifier = ">=4.12.3" },
     { name = "chromadb", specifier = "==1.3.5" },
+    { name = "datasets", specifier = ">=4.4.1" },
     { name = "ddgs", specifier = ">=9.9.2" },
     { name = "duckduckgo-search" },
     { name = "faiss-cpu", specifier = ">=1.13.0" },
     { name = "reportlab", specifier = ">=4.4.5" },
     { name = "requests", specifier = ">=2.32.3" },
     { name = "scikit-learn", specifier = ">=1.7.2" },
+    { name = "sendgrid" },
     { name = "sentence-transformers", specifier = ">=5.1.2" },
     { name = "serpapi" },
     { name = "smithery", specifier = ">=0.4.4" },
     { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" },
 ]
+[[package]]
+name = "datasets"
+version = "4.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dill" },
+    { name = "filelock" },
+    { name = "fsspec", extra = ["http"] },
+    { name = "httpx" },
+    { name = "huggingface-hub" },
+    { name = "multiprocess" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "pandas" },
+    { name = "pyarrow" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "tqdm" },
+    { name = "xxhash" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/93/bf/0dae295d6d1ba0b1a200a9dd216838464b5bbd05da01407cb1330b377445/datasets-4.4.1.tar.gz", hash = "sha256:80322699aa8c0bbbdb7caa87906da689c3c2e29523cff698775c67f28fdab1fc", size = 585341, upload-time = "2025-11-05T16:00:38.162Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/5e/6f8d874366788ad5d549e9ba258037d974dda6e004843be1bda794571701/datasets-4.4.1-py3-none-any.whl", hash = "sha256:c1163de5211e42546079ab355cc0250c7e6db16eb209ac5ac6252f801f596c44", size = 511591, upload-time = "2025-11-05T16:00:36.365Z" },
+]
 [[package]]
 name = "ddgs"
 version = "9.9.2"
     { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" },
 ]
+[[package]]
+name = "dill"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" },
+]
 [[package]]
 name = "distro"
 version = "1.9.0"
     { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
 ]
+[package.optional-dependencies]
+http = [
+    { name = "aiohttp" },
+]
 [[package]]
 name = "gitdb"
 version = "4.0.12"
     { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
 ]
+[[package]]
+name = "multiprocess"
+version = "0.70.18"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dill" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/d8/0cba6cf51a1a31f20471fbc823a716170c73012ddc4fb85d706630ed6e8f/multiprocess-0.70.18-py310-none-any.whl", hash = "sha256:60c194974c31784019c1f459d984e8f33ee48f10fcf42c309ba97b30d9bd53ea", size = 134948, upload-time = "2025-04-17T03:11:20.223Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/88/9039f2fed1012ef584751d4ceff9ab4a51e5ae264898f0b7cbf44340a859/multiprocess-0.70.18-py311-none-any.whl", hash = "sha256:5aa6eef98e691281b3ad923be2832bf1c55dd2c859acd73e5ec53a66aae06a1d", size = 144462, upload-time = "2025-04-17T03:11:21.657Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/b6/5f922792be93b82ec6b5f270bbb1ef031fd0622847070bbcf9da816502cc/multiprocess-0.70.18-py312-none-any.whl", hash = "sha256:9b78f8e5024b573730bfb654783a13800c2c0f2dfc0c25e70b40d184d64adaa2", size = 150287, upload-time = "2025-04-17T03:11:22.69Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/c3/ca84c19bd14cdfc21c388fdcebf08b86a7a470ebc9f5c3c084fc2dbc50f7/multiprocess-0.70.18-py38-none-any.whl", hash = "sha256:dbf705e52a154fe5e90fb17b38f02556169557c2dd8bb084f2e06c2784d8279b", size = 132636, upload-time = "2025-04-17T03:11:24.936Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
+]
 [[package]]
 name = "multitasking"
 version = "0.0.12"
     { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
 ]
+[[package]]
+name = "python-http-client"
+version = "3.3.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/56/fa/284e52a8c6dcbe25671f02d217bf2f85660db940088faf18ae7a05e97313/python_http_client-3.3.7.tar.gz", hash = "sha256:bf841ee45262747e00dec7ee9971dfb8c7d83083f5713596488d67739170cea0", size = 9377, upload-time = "2022-03-09T20:23:56.386Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/29/31/9b360138f4e4035ee9dac4fe1132b6437bd05751aaf1db2a2d83dc45db5f/python_http_client-3.3.7-py3-none-any.whl", hash = "sha256:ad371d2bbedc6ea15c26179c6222a78bc9308d272435ddf1d5c84f068f249a36", size = 8352, upload-time = "2022-03-09T20:23:54.862Z" },
+]
 [[package]]
 name = "python-multipart"
 version = "0.0.20"
     { url = "https://files.pythonhosted.org/packages/ce/69/c5c7807fd007dad4f48e0a5f2153038dc96e8725d3345b9ee31b2b7bed46/scipy-1.16.3-cp312-cp312-win_arm64.whl", hash = "sha256:a8a26c78ef223d3e30920ef759e25625a0ecdd0d60e5a8818b7513c3e5384cf2", size = 25463014, upload-time = "2025-10-28T17:33:25.975Z" },
 ]
+[[package]]
+name = "sendgrid"
+version = "6.12.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "python-http-client" },
+    { name = "werkzeug" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/da/fa/f718b2b953f99c1f0085811598ac7e31ccbd4229a81ec2a5290be868187a/sendgrid-6.12.5.tar.gz", hash = "sha256:ea9aae30cd55c332e266bccd11185159482edfc07c149b6cd15cf08869fabdb7", size = 50310, upload-time = "2025-09-19T06:23:09.229Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bd/55/b3c3880a77082e8f7374954e0074aafafaa9bc78bdf9c8f5a92c2e7afc6a/sendgrid-6.12.5-py3-none-any.whl", hash = "sha256:96f92cc91634bf552fdb766b904bbb53968018da7ae41fdac4d1090dc0311ca8", size = 102173, upload-time = "2025-09-19T06:23:07.93Z" },
+]
 [[package]]
 name = "sentence-transformers"
 version = "5.1.2"
     { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
 ]
+[[package]]
+name = "werkzeug"
+version = "3.1.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/45/ea/b0f8eeb287f8df9066e56e831c7824ac6bab645dd6c7a8f4b2d767944f9b/werkzeug-3.1.4.tar.gz", hash = "sha256:cd3cd98b1b92dc3b7b3995038826c68097dcb16f9baa63abe35f20eafeb9fe5e", size = 864687, upload-time = "2025-11-29T02:15:22.841Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2f/f9/9e082990c2585c744734f85bec79b5dae5df9c974ffee58fe421652c8e91/werkzeug-3.1.4-py3-none-any.whl", hash = "sha256:2ad50fb9ed09cc3af22c54698351027ace879a0b60a3b5edf5730b2f7d876905", size = 224960, upload-time = "2025-11-29T02:15:21.13Z" },
+]
 [[package]]
 name = "wikipedia"
 version = "1.4.0"