Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- Dockerfile +8 -4
- common/__init__.py +0 -0
- common/aagents/__init__.py +0 -0
- common/aagents/google_agent.py +139 -0
- common/aagents/healthcare_agent.py +100 -0
- common/aagents/news_agent.py +106 -0
- common/aagents/weather_agent.py +69 -0
- common/aagents/web_agent.py +53 -0
- common/aagents/web_research_agent.py +83 -0
- common/aagents/yf_agent.py +78 -0
- common/mcp/README.md +139 -0
- common/mcp/__init__.py +0 -0
- common/mcp/mcp_server.py +171 -0
- common/mcp/tools/__init__.py +0 -0
- common/mcp/tools/google_tools.py +139 -0
- common/mcp/tools/news_tools.py +200 -0
- common/mcp/tools/rag_tool.py +106 -0
- common/mcp/tools/search_tools.py +115 -0
- common/mcp/tools/time_tools.py +32 -0
- common/mcp/tools/weather_tools.py +235 -0
- common/mcp/tools/yf_tools.py +192 -0
- common/rag/rag.py +94 -0
- common/utility/__init__.py +0 -0
- common/utility/embedding_factory.py +49 -0
- common/utility/llm_factory.py +130 -0
- common/utility/llm_factory2.py +75 -0
- common/utility/logger.py +22 -0
- pyproject.toml +2 -0
- run.py +215 -11
- src/deep-research/.env.name +9 -0
- src/deep-research/Dockerfile +35 -0
- src/deep-research/README.md +191 -0
- src/deep-research/app.py +299 -0
- src/deep-research/appagents/__init__.py +0 -0
- src/deep-research/appagents/email_agent.py +32 -0
- src/deep-research/appagents/guardrail_agent.py +45 -0
- src/deep-research/appagents/orchestrator.py +119 -0
- src/deep-research/appagents/planner_agent.py +45 -0
- src/deep-research/appagents/search_agent.py +87 -0
- src/deep-research/appagents/writer_agent.py +41 -0
- src/deep-research/core/__init__.py +0 -0
- src/deep-research/core/logger.py +22 -0
- src/deep-research/prompts/__init__.py +0 -0
- src/deep-research/tools/__init__.py +0 -0
- src/deep-research/tools/google_tools.py +132 -0
- src/deep-research/tools/time_tools.py +22 -0
- uv.lock +94 -0
Dockerfile
CHANGED
|
@@ -2,7 +2,7 @@ FROM python:3.12-slim
|
|
| 2 |
|
| 3 |
ENV PYTHONUNBUFFERED=1 \
|
| 4 |
DEBIAN_FRONTEND=noninteractive \
|
| 5 |
-
PYTHONPATH=/app:$PYTHONPATH
|
| 6 |
|
| 7 |
WORKDIR /app
|
| 8 |
|
|
@@ -19,13 +19,17 @@ ENV PATH="/root/.local/bin:$PATH"
|
|
| 19 |
COPY pyproject.toml .
|
| 20 |
COPY uv.lock .
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# Install dependencies using uv, then export and install with pip to system
|
| 23 |
RUN uv sync --frozen --no-dev && \
|
| 24 |
uv pip install -e . --system
|
| 25 |
|
| 26 |
-
# Copy
|
| 27 |
-
COPY . .
|
| 28 |
|
| 29 |
EXPOSE 7860
|
| 30 |
|
| 31 |
-
CMD ["
|
|
|
|
| 2 |
|
| 3 |
ENV PYTHONUNBUFFERED=1 \
|
| 4 |
DEBIAN_FRONTEND=noninteractive \
|
| 5 |
+
PYTHONPATH=/app:/app/common:$PYTHONPATH
|
| 6 |
|
| 7 |
WORKDIR /app
|
| 8 |
|
|
|
|
| 19 |
COPY pyproject.toml .
|
| 20 |
COPY uv.lock .
|
| 21 |
|
| 22 |
+
# Copy required folders
|
| 23 |
+
COPY common/ ./common/
|
| 24 |
+
COPY src/deep-research/ ./src/deep-research/
|
| 25 |
+
|
| 26 |
# Install dependencies using uv, then export and install with pip to system
|
| 27 |
RUN uv sync --frozen --no-dev && \
|
| 28 |
uv pip install -e . --system
|
| 29 |
|
| 30 |
+
# Copy entry point
|
| 31 |
+
COPY run.py .
|
| 32 |
|
| 33 |
EXPOSE 7860
|
| 34 |
|
| 35 |
+
CMD ["python", "run.py", "deep-research", "--port", "7860"]
|
common/__init__.py
ADDED
|
File without changes
|
common/aagents/__init__.py
ADDED
|
File without changes
|
common/aagents/google_agent.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Google search agent module for web search and information retrieval."""
|
| 2 |
+
import os
|
| 3 |
+
from agents import Agent, OpenAIChatCompletionsModel
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from mcp.tools.google_tools import google_search, google_search_recent
|
| 6 |
+
from mcp.tools.search_tools import duckduckgo_search, fetch_page_content
|
| 7 |
+
from mcp.tools.time_tools import current_datetime
|
| 8 |
+
from openai import AsyncOpenAI
|
| 9 |
+
|
| 10 |
+
# ---------------------------------------------------------
|
| 11 |
+
# Load environment variables
|
| 12 |
+
# ---------------------------------------------------------
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 16 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 17 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 18 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
|
| 19 |
+
|
| 20 |
+
GROQ_BASE_URL = "https://api.groq.com/openai/v1"
|
| 21 |
+
groq_api_key = os.getenv('GROQ_API_KEY')
|
| 22 |
+
groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
|
| 23 |
+
groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
|
| 24 |
+
|
| 25 |
+
google_agent = Agent(
|
| 26 |
+
name="GoogleSearchAgent",
|
| 27 |
+
model=gemini_model,
|
| 28 |
+
tools=[current_datetime, google_search, google_search_recent, duckduckgo_search, fetch_page_content],
|
| 29 |
+
instructions="""
|
| 30 |
+
You are a GoogleSearchAgent specialized in finding and retrieving information from the web.
|
| 31 |
+
Your role is to help users find accurate, relevant, and up-to-date information using web search.
|
| 32 |
+
|
| 33 |
+
## Tool Priority & Usage
|
| 34 |
+
|
| 35 |
+
**PRIMARY TOOLS (Google via Serper.dev API):**
|
| 36 |
+
|
| 37 |
+
1. 'google_search': General Google search with recent results (last 24 hours by default)
|
| 38 |
+
- Use for most search queries
|
| 39 |
+
- Returns: Title, Link, Snippet
|
| 40 |
+
- Input: { "query": "search terms", "num_results": 3 }
|
| 41 |
+
|
| 42 |
+
2. 'google_search_recent': Time-filtered Google search
|
| 43 |
+
- Use when user specifies a time range (today, this week, this month, this year)
|
| 44 |
+
- Timeframes: "d" (day), "w" (week), "m" (month), "y" (year)
|
| 45 |
+
- Input: { "query": "search terms", "num_results": 3, "timeframe": "d" }
|
| 46 |
+
|
| 47 |
+
**FALLBACK TOOL (DuckDuckGo Search):**
|
| 48 |
+
|
| 49 |
+
3. 'duckduckgo_search': Use ONLY when Google tools fail or SERPER_API_KEY is missing
|
| 50 |
+
- Provides similar search functionality
|
| 51 |
+
- Input: { "query": "search terms", "max_results": 5, "search_type": "text", "timelimit": "d" }
|
| 52 |
+
|
| 53 |
+
**CONTENT EXTRACTION:**
|
| 54 |
+
|
| 55 |
+
4. 'fetch_page_content': Extract full text content from a specific URL
|
| 56 |
+
- Use when user wants detailed information from a specific page
|
| 57 |
+
- Use after search to get complete content for analysis
|
| 58 |
+
- Input: { "url": "https://example.com", "timeout": 3 }
|
| 59 |
+
|
| 60 |
+
**TIME CONTEXT:**
|
| 61 |
+
|
| 62 |
+
5. 'current_datetime': Get current date/time for context
|
| 63 |
+
- Input: { "format": "natural" }
|
| 64 |
+
|
| 65 |
+
## Workflow
|
| 66 |
+
|
| 67 |
+
1. **Understand the Query**: Determine what information the user needs
|
| 68 |
+
- General search → use google_search
|
| 69 |
+
- Time-specific search → use google_search_recent with appropriate timeframe
|
| 70 |
+
- Deep dive into a page → use fetch_page_content after getting the URL
|
| 71 |
+
|
| 72 |
+
2. **Try Primary Tools First**: Always attempt Google tools (Serper.dev) before fallback
|
| 73 |
+
|
| 74 |
+
3. **Fallback if Needed**: If Google tools return an error (missing API key, no results),
|
| 75 |
+
automatically use duckduckgo_search
|
| 76 |
+
|
| 77 |
+
4. **Extract Content if Needed**: If user wants detailed information or summary,
|
| 78 |
+
use fetch_page_content on relevant URLs from search results
|
| 79 |
+
|
| 80 |
+
5. **Provide Context**: Use current_datetime when temporal context is important
|
| 81 |
+
|
| 82 |
+
## Search Strategy
|
| 83 |
+
|
| 84 |
+
**For factual queries:**
|
| 85 |
+
- Use google_search or google_search_recent
|
| 86 |
+
- Summarize findings from multiple sources
|
| 87 |
+
- Cite sources with URLs
|
| 88 |
+
|
| 89 |
+
**For recent events/news:**
|
| 90 |
+
- Use google_search_recent with timeframe="d" or "w"
|
| 91 |
+
- Focus on most recent information
|
| 92 |
+
- Include publication dates if available
|
| 93 |
+
|
| 94 |
+
**For in-depth research:**
|
| 95 |
+
- First: Use google_search to find relevant pages
|
| 96 |
+
- Then: Use fetch_page_content to extract full content from top results
|
| 97 |
+
- Synthesize information from multiple sources
|
| 98 |
+
|
| 99 |
+
## Output Format
|
| 100 |
+
|
| 101 |
+
Structure your response based on the query type:
|
| 102 |
+
|
| 103 |
+
**For Search Results:**
|
| 104 |
+
|
| 105 |
+
**Search Results for "[Query]"** - [Current Date]
|
| 106 |
+
|
| 107 |
+
1. **[Title]**
|
| 108 |
+
- Source: [URL]
|
| 109 |
+
- Summary: [Snippet or extracted info]
|
| 110 |
+
|
| 111 |
+
2. **[Next Result]**
|
| 112 |
+
...
|
| 113 |
+
|
| 114 |
+
**Key Findings:**
|
| 115 |
+
- [Synthesized insight 1]
|
| 116 |
+
- [Synthesized insight 2]
|
| 117 |
+
|
| 118 |
+
**For Content Extraction:**
|
| 119 |
+
|
| 120 |
+
**Analysis of [Page Title]**
|
| 121 |
+
|
| 122 |
+
[Summarized content with key points]
|
| 123 |
+
|
| 124 |
+
Source: [URL]
|
| 125 |
+
|
| 126 |
+
## Important Rules
|
| 127 |
+
|
| 128 |
+
- Always cite sources with URLs
|
| 129 |
+
- Prioritize recent information when relevant
|
| 130 |
+
- If API key is missing, inform user and use fallback automatically
|
| 131 |
+
- Never fabricate information or sources
|
| 132 |
+
- Synthesize information from multiple sources when possible
|
| 133 |
+
- Be transparent about limitations (e.g., "Based on search results from...")
|
| 134 |
+
- Use fetch_page_content sparingly (only when deep content is needed)
|
| 135 |
+
- Respect timeouts and handle errors gracefully
|
| 136 |
+
""",
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
__all__ = ["google_agent", "google_search", "google_search_recent", "duckduckgo_search", "fetch_page_content", "current_datetime"]
|
common/aagents/healthcare_agent.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Healthcare RAG Agent - Combines RAG retrieval with web search for comprehensive medical information."""
|
| 2 |
+
import os
|
| 3 |
+
from agents import Agent, OpenAIChatCompletionsModel
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from openai import AsyncOpenAI
|
| 6 |
+
|
| 7 |
+
# Import tools
|
| 8 |
+
from mcp.tools.rag_tool import rag_search, UserContext
|
| 9 |
+
from mcp.tools.search_tools import duckduckgo_search
|
| 10 |
+
from mcp.tools.time_tools import current_datetime
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# ---------------------------------------------------------
|
| 14 |
+
# Load environment variables
|
| 15 |
+
# ---------------------------------------------------------
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
# ---------------------------------------------------------
|
| 19 |
+
# Model Configuration
|
| 20 |
+
# ---------------------------------------------------------
|
| 21 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 22 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 23 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 24 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
|
| 25 |
+
|
| 26 |
+
GROQ_BASE_URL = "https://api.groq.com/openai/v1"
|
| 27 |
+
groq_api_key = os.getenv('GROQ_API_KEY')
|
| 28 |
+
groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
|
| 29 |
+
groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
|
| 30 |
+
|
| 31 |
+
# ---------------------------------------------------------
|
| 32 |
+
# Healthcare RAG Agent
|
| 33 |
+
# ---------------------------------------------------------
|
| 34 |
+
healthcare_agent = Agent[UserContext](
|
| 35 |
+
name="HealthcareRAGAgent",
|
| 36 |
+
model=gemini_model,
|
| 37 |
+
tools=[rag_search, duckduckgo_search],
|
| 38 |
+
instructions="""
|
| 39 |
+
You are a healthcare information retrieval agent. You retrieve information from tools and synthesize it into well-formatted markdown responses.
|
| 40 |
+
|
| 41 |
+
## CRITICAL RULES
|
| 42 |
+
|
| 43 |
+
1. **NEVER use your pre-trained knowledge** - Only use tool results
|
| 44 |
+
2. **ALWAYS call rag_search first** for every question
|
| 45 |
+
3. **Evaluate RAG results carefully** - if content is useless (just references, acknowledgments, page numbers), call duckduckgo_search
|
| 46 |
+
4. **If rag_search returns "No relevant information", MUST call duckduckgo_search**
|
| 47 |
+
5. **Synthesize tool results into clear, well-structured markdown**
|
| 48 |
+
6. **If both tools fail, say "I don't have information on this topic"**
|
| 49 |
+
|
| 50 |
+
## Workflow (MANDATORY)
|
| 51 |
+
|
| 52 |
+
For EVERY question:
|
| 53 |
+
|
| 54 |
+
Step 1: Call `rag_search(query="user question")`
|
| 55 |
+
|
| 56 |
+
Step 2: Evaluate the result:
|
| 57 |
+
- Returns "No relevant information"? → MUST call duckduckgo_search (go to Step 3)
|
| 58 |
+
- Returns content BUT it's NOT useful (just references, acknowledgments, page numbers, file names, credits)? → MUST call duckduckgo_search (go to Step 3)
|
| 59 |
+
- Returns useful information (definitions, explanations, medical details)? → Synthesize and format (go to Step 4)
|
| 60 |
+
|
| 61 |
+
Step 3: Call `duckduckgo_search(params={"query": "user question", "max_results": 3})`
|
| 62 |
+
|
| 63 |
+
Step 4: Synthesize and format response using markdown
|
| 64 |
+
|
| 65 |
+
## Response Format (Markdown)
|
| 66 |
+
|
| 67 |
+
## [Topic Name]
|
| 68 |
+
|
| 69 |
+
[Brief introduction/definition]
|
| 70 |
+
|
| 71 |
+
### Key Points
|
| 72 |
+
- **Point 1**: Description
|
| 73 |
+
- **Point 2**: Description
|
| 74 |
+
|
| 75 |
+
### Detailed Information
|
| 76 |
+
|
| 77 |
+
[Organized paragraphs with medical details]
|
| 78 |
+
|
| 79 |
+
---
|
| 80 |
+
|
| 81 |
+
**Source:** Knowledge Base / Web Search
|
| 82 |
+
|
| 83 |
+
**Disclaimer:** This information is for educational purposes only. Always consult a qualified healthcare provider for medical advice.
|
| 84 |
+
|
| 85 |
+
## Critical Reminders
|
| 86 |
+
|
| 87 |
+
🚨 You MUST:
|
| 88 |
+
- Call rag_search first, evaluate if content is useful
|
| 89 |
+
- If RAG content is useless (references/credits), call duckduckgo_search
|
| 90 |
+
- Use proper markdown formatting
|
| 91 |
+
- Cite the source
|
| 92 |
+
|
| 93 |
+
🚨 You MUST NOT:
|
| 94 |
+
- Use your pre-trained knowledge
|
| 95 |
+
- Skip evaluating RAG content quality
|
| 96 |
+
- Accept useless RAG results without calling web search
|
| 97 |
+
""",
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
__all__ = ["healthcare_agent"]
|
common/aagents/news_agent.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""News agent module for fetching and analyzing news articles."""
|
| 2 |
+
import os
|
| 3 |
+
from agents import Agent, OpenAIChatCompletionsModel
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from mcp.tools.news_tools import get_top_headlines, search_news, get_news_by_category
|
| 6 |
+
from mcp.tools.search_tools import duckduckgo_search
|
| 7 |
+
from mcp.tools.time_tools import current_datetime
|
| 8 |
+
from openai import AsyncOpenAI
|
| 9 |
+
|
| 10 |
+
# ---------------------------------------------------------
|
| 11 |
+
# Load environment variables
|
| 12 |
+
# ---------------------------------------------------------
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 16 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 17 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 18 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
|
| 19 |
+
|
| 20 |
+
GROQ_BASE_URL = "https://api.groq.com/openai/v1"
|
| 21 |
+
groq_api_key = os.getenv('GROQ_API_KEY')
|
| 22 |
+
groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
|
| 23 |
+
groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
|
| 24 |
+
|
| 25 |
+
news_agent = Agent(
|
| 26 |
+
name="NewsAgent",
|
| 27 |
+
model=gemini_model,
|
| 28 |
+
tools=[current_datetime, get_top_headlines, search_news, get_news_by_category, duckduckgo_search],
|
| 29 |
+
instructions="""
|
| 30 |
+
You are a NewsAgent specialized in fetching and analyzing recent news articles and headlines.
|
| 31 |
+
Your role is to provide users with up-to-date, relevant news information from reliable sources.
|
| 32 |
+
|
| 33 |
+
## Tool Priority & Usage
|
| 34 |
+
|
| 35 |
+
**PRIMARY TOOLS (NewsAPI.org):**
|
| 36 |
+
1. 'get_top_headlines': Fetch the latest top headlines for a specific country
|
| 37 |
+
- Use when user asks for general news, breaking news, or top stories
|
| 38 |
+
- Input: { "country": "us", "num_results": 5 }
|
| 39 |
+
|
| 40 |
+
2. 'search_news': Search for news articles about a specific topic
|
| 41 |
+
- Use when user asks about a specific subject, company, person, or event
|
| 42 |
+
- Input: { "query": "topic name", "num_results": 5, "days_back": 7 }
|
| 43 |
+
|
| 44 |
+
3. 'get_news_by_category': Fetch headlines by category
|
| 45 |
+
- Use when user asks for category-specific news (business, tech, sports, etc.)
|
| 46 |
+
- Categories: "business", "entertainment", "general", "health", "science", "sports", "technology"
|
| 47 |
+
- Input: { "category": "business", "country": "us", "num_results": 5 }
|
| 48 |
+
|
| 49 |
+
**FALLBACK TOOL (DuckDuckGo Search):**
|
| 50 |
+
4. 'duckduckgo_search': Use ONLY when NewsAPI tools fail or API key is missing
|
| 51 |
+
- Set search_type to "news" for news-specific results
|
| 52 |
+
- Input: { "query": "topic", "max_results": 5, "search_type": "news", "timelimit": "d" }
|
| 53 |
+
|
| 54 |
+
**TIME CONTEXT:**
|
| 55 |
+
5. 'current_datetime': Use to provide current date/time context in your responses
|
| 56 |
+
- Input: { "format": "natural" }
|
| 57 |
+
|
| 58 |
+
## Workflow
|
| 59 |
+
|
| 60 |
+
1. **Determine Intent**: Understand what type of news the user wants
|
| 61 |
+
- General headlines → use get_top_headlines
|
| 62 |
+
- Topic-specific → use search_news
|
| 63 |
+
- Category-specific → use get_news_by_category
|
| 64 |
+
|
| 65 |
+
2. **Try Primary Tools First**: Always attempt NewsAPI tools before fallback
|
| 66 |
+
|
| 67 |
+
3. **Fallback if Needed**: If NewsAPI returns an error (missing API key, no results),
|
| 68 |
+
use duckduckgo_search with search_type="news"
|
| 69 |
+
|
| 70 |
+
4. **Include Time Context**: Use current_datetime to provide temporal context
|
| 71 |
+
|
| 72 |
+
5. **Format Response**: Present news in a clear, organized format with:
|
| 73 |
+
- Headlines/titles
|
| 74 |
+
- Sources
|
| 75 |
+
- Publication dates
|
| 76 |
+
- Brief summaries
|
| 77 |
+
- URLs for full articles
|
| 78 |
+
|
| 79 |
+
## Output Format
|
| 80 |
+
|
| 81 |
+
Structure your response as:
|
| 82 |
+
|
| 83 |
+
**[News Category/Topic] - [Current Date]**
|
| 84 |
+
|
| 85 |
+
1. **[Headline]**
|
| 86 |
+
- Source: [News Source]
|
| 87 |
+
- Published: [Date/Time]
|
| 88 |
+
- Summary: [Brief description]
|
| 89 |
+
- Read more: [URL]
|
| 90 |
+
|
| 91 |
+
2. **[Next Headline]**
|
| 92 |
+
...
|
| 93 |
+
|
| 94 |
+
## Important Rules
|
| 95 |
+
|
| 96 |
+
- Always cite sources and include publication dates
|
| 97 |
+
- Prioritize recent news (within last 7 days unless specified otherwise)
|
| 98 |
+
- If API key is missing, inform the user and use the fallback tool
|
| 99 |
+
- Never fabricate news or sources
|
| 100 |
+
- Present news objectively without bias
|
| 101 |
+
- Include URLs so users can read full articles
|
| 102 |
+
- Use current_datetime to ensure temporal accuracy
|
| 103 |
+
""",
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
__all__ = ["news_agent", "get_top_headlines", "search_news", "get_news_by_category", "duckduckgo_search", "current_datetime"]
|
common/aagents/weather_agent.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Web search agent module for internet queries."""
|
| 2 |
+
import os
|
| 3 |
+
from agents import Agent
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from pydantic import BaseModel, Field
|
| 6 |
+
from mcp.tools.weather_tools import get_weather_forecast, search_weather_fallback_ddgs, search_weather_fallback_bs
|
| 7 |
+
from mcp.tools.time_tools import current_datetime
|
| 8 |
+
from agents import Agent, OpenAIChatCompletionsModel
|
| 9 |
+
from openai import AsyncOpenAI
|
| 10 |
+
|
| 11 |
+
# ---------------------------------------------------------
|
| 12 |
+
# Load environment variables
|
| 13 |
+
# ---------------------------------------------------------
|
| 14 |
+
load_dotenv()
|
| 15 |
+
|
| 16 |
+
################################
|
| 17 |
+
# Learning: gemini models struggles to construct the output_type when it's a Pydantic model.
|
| 18 |
+
# So we use list[dict] as output_type instead of list[searchResult].
|
| 19 |
+
# Then in the calling code, we can convert dicts back to searchResult models if needed.
|
| 20 |
+
################################
|
| 21 |
+
|
| 22 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 23 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 24 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 25 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-flash-latest", openai_client=gemini_client)
|
| 26 |
+
|
| 27 |
+
GROQ_BASE_URL = "https://api.groq.com/openai/v1"
|
| 28 |
+
groq_api_key = os.getenv('GROQ_API_KEY')
|
| 29 |
+
groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
|
| 30 |
+
groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
|
| 31 |
+
|
| 32 |
+
weather_agent = Agent(
|
| 33 |
+
name="WeatherAgent",
|
| 34 |
+
model=gemini_model, #"gpt-4o-mini",
|
| 35 |
+
# description="An agent that can perform web searches using DuckDuckGo.",
|
| 36 |
+
tools=[current_datetime, get_weather_forecast, search_weather_fallback_ddgs, search_weather_fallback_bs],
|
| 37 |
+
instructions="""
|
| 38 |
+
You are a Weather Forecast agent who forecasts weather information ONLY.
|
| 39 |
+
You can use the 'current_datetime' tool to determine the current date as reference for the weather forecast.
|
| 40 |
+
When given a query, you use the 'get_weather_forecast' tool to retrieve weather data.
|
| 41 |
+
If the API key is missing or the API fails to get the forecast, you use the 'search_weather_fallback_ddgs' or 'search_weather_fallback_bs' as fallback tools to perform a web search for weather information.
|
| 42 |
+
Tool: get_weather_forecast Input:
|
| 43 |
+
A JSON object with the following structure:
|
| 44 |
+
{ "city": "The city name to get the weather for.",
|
| 45 |
+
"date": "Optional date in YYYY-MM-DD format to get the forecast for a specific day. If not provided, return the current weather."
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
Output the weather information MUST be in a JSON well-formatted form as below:
|
| 49 |
+
{
|
| 50 |
+
"city": "City name",
|
| 51 |
+
"forecasts": [
|
| 52 |
+
{
|
| 53 |
+
"date": "Date of the forecast in YYYY-MM-DD format",
|
| 54 |
+
"weather": {
|
| 55 |
+
|
| 56 |
+
"description": "Weather description",
|
| 57 |
+
"temperature": "Temperature in Fahrenheit. Report both the high and low temperatures.",
|
| 58 |
+
"humidity": "Humidity percentage",
|
| 59 |
+
"wind_speed": "Wind speed in Miles per Hour (MPH)"
|
| 60 |
+
}
|
| 61 |
+
}.
|
| 62 |
+
]
|
| 63 |
+
""",
|
| 64 |
+
# output_type=AgentOutputSchema(list[searchResult], strict_json_schema=False),
|
| 65 |
+
# output_type=list[dict], # safer than list[searchResult],
|
| 66 |
+
# output_type=list[searchResult],
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
__all__ = ["weather_agent", "get_weather_forecast", "search_weather_fallback_ddgs", "search_weather_fallback_bs"]
|
common/aagents/web_agent.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Web search agent module for internet queries."""
|
| 2 |
+
import os
|
| 3 |
+
from agents import AgentOutputSchema, function_tool, Agent
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from pydantic import BaseModel, Field
|
| 6 |
+
from mcp.tools.search_tools import duckduckgo_search, searchQuery, searchResult
|
| 7 |
+
from agents import Agent, OpenAIChatCompletionsModel
|
| 8 |
+
from openai import AsyncOpenAI
|
| 9 |
+
|
| 10 |
+
# ---------------------------------------------------------
|
| 11 |
+
# Load environment variables
|
| 12 |
+
# ---------------------------------------------------------
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
################################
|
| 16 |
+
# Learning: gemini models struggles to construct the output_type when it's a Pydantic model.
|
| 17 |
+
# So we use list[dict] as output_type instead of list[searchResult].
|
| 18 |
+
# Then in the calling code, we can convert dicts back to searchResult models if needed.
|
| 19 |
+
################################
|
| 20 |
+
|
| 21 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 22 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 23 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 24 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
|
| 25 |
+
|
| 26 |
+
GROQ_BASE_URL = "https://api.groq.com/openai/v1"
|
| 27 |
+
groq_api_key = os.getenv('GROQ_API_KEY')
|
| 28 |
+
groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
|
| 29 |
+
groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
|
| 30 |
+
|
| 31 |
+
web_agent = Agent(
|
| 32 |
+
name="WebAgent",
|
| 33 |
+
model="gpt-4o-mini",
|
| 34 |
+
# description="An agent that can perform web searches using DuckDuckGo.",
|
| 35 |
+
tools=[duckduckgo_search],
|
| 36 |
+
instructions="""
|
| 37 |
+
You are a WebAgent that can perform web searches to find information on the internet.
|
| 38 |
+
When given a query, use the 'duckduckgo_search' tool to retrieve relevant search results.
|
| 39 |
+
Tool: duckduckgo_search Input:
|
| 40 |
+
A JSON object with the following structure:
|
| 41 |
+
{ "query": "The search query string.",
|
| 42 |
+
"max_results": "The maximum number of search results to return (default is 5).",
|
| 43 |
+
"search_type": "The type of search to perform. Options: 'text' (default) or 'news'. Use 'news' to get publication dates.",
|
| 44 |
+
"timelimit": "Time limit for search results. Options: 'd' (day), 'w' (week), 'm' (month), 'y' (year).",
|
| 45 |
+
"region": "Region for search results (e.g., 'us-en', 'uk-en'). Default is 'wt-wt' (world)."
|
| 46 |
+
}
|
| 47 |
+
""",
|
| 48 |
+
# output_type=AgentOutputSchema(list[searchResult], strict_json_schema=False),
|
| 49 |
+
# output_type=list[dict], # safer than list[searchResult],
|
| 50 |
+
output_type=list[searchResult],
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
__all__ = ["web_agent", "duckduckgo_search", "searchQuery", "searchResult"]
|
common/aagents/web_research_agent.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Web search agent module for internet queries."""
|
| 2 |
+
import os
|
| 3 |
+
from agents import AgentOutputSchema, function_tool, Agent
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from pydantic import BaseModel, Field
|
| 6 |
+
from mcp.tools.search_tools import duckduckgo_search, searchQuery, searchResult, fetch_page_content
|
| 7 |
+
from agents import Agent, OpenAIChatCompletionsModel
|
| 8 |
+
from openai import AsyncOpenAI
|
| 9 |
+
|
| 10 |
+
# ---------------------------------------------------------
|
| 11 |
+
# Load environment variables
|
| 12 |
+
# ---------------------------------------------------------
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
################################
|
| 16 |
+
# Learning: gemini models struggles to construct the output_type when it's a Pydantic model.
|
| 17 |
+
# So we use list[dict] as output_type instead of list[searchResult].
|
| 18 |
+
# Then in the calling code, we can convert dicts back to searchResult models if needed.
|
| 19 |
+
################################
|
| 20 |
+
|
| 21 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 22 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 23 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 24 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
|
| 25 |
+
|
| 26 |
+
GROQ_BASE_URL = "https://api.groq.com/openai/v1"
|
| 27 |
+
groq_api_key = os.getenv('GROQ_API_KEY')
|
| 28 |
+
groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
|
| 29 |
+
groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
|
| 30 |
+
|
| 31 |
+
web_research_agent = Agent(
|
| 32 |
+
name="WebResearchAgent",
|
| 33 |
+
model="gpt-4o-mini",
|
| 34 |
+
# description="An agent that can perform web searches using DuckDuckGo.",
|
| 35 |
+
tools=[duckduckgo_search, fetch_page_content],
|
| 36 |
+
instructions="""
|
| 37 |
+
You are WebResearchAgent — an advanced internet research assistant with two core abilities:
|
| 38 |
+
|
| 39 |
+
1) Use the tool `duckduckgo_search` to discover relevant webpages for the user’s query.
|
| 40 |
+
2) Use the tool `fetch_page_content` to retrieve full text content from any webpage returned by the search tool.
|
| 41 |
+
|
| 42 |
+
===========================
|
| 43 |
+
AGENT RESPONSIBILITIES
|
| 44 |
+
===========================
|
| 45 |
+
|
| 46 |
+
• Always begin by invoking `duckduckgo_search` to gather an initial set of webpages relevant to the user's question.
|
| 47 |
+
|
| 48 |
+
• After receiving the search results, you MUST fetch the full content for *all result URLs* by invoking
|
| 49 |
+
`fetch_page_content` once per URL.
|
| 50 |
+
|
| 51 |
+
• These fetch calls should be made **in parallel**:
|
| 52 |
+
- Do NOT wait for one fetch call to finish before issuing the next.
|
| 53 |
+
- Issue all fetch calls immediately after you receive the search results.
|
| 54 |
+
|
| 55 |
+
• You MUST NOT wait more than 3 seconds for any individual page to respond.
|
| 56 |
+
If content is missing or a fetch fails, continue with what you have.
|
| 57 |
+
|
| 58 |
+
===========================
|
| 59 |
+
ANALYSIS & FINAL ANSWER
|
| 60 |
+
===========================
|
| 61 |
+
|
| 62 |
+
• After search and fetch operations complete, analyze:
|
| 63 |
+
– the snippets from the search results
|
| 64 |
+
– the full content from `fetch_page_content` (for pages that responded)
|
| 65 |
+
|
| 66 |
+
• Synthesize the collected information and provide a clear, factual, concise answer.
|
| 67 |
+
|
| 68 |
+
• Your final output MUST be a structured, easy-to-read Markdown summary.
|
| 69 |
+
|
| 70 |
+
===========================
|
| 71 |
+
IMPORTANT RULES
|
| 72 |
+
===========================
|
| 73 |
+
|
| 74 |
+
• Never fabricate URLs or content not returned by the tools.
|
| 75 |
+
• Never claim to have visited pages without using `fetch_page_content`.
|
| 76 |
+
• Use the tools exactly as required — search first, fetch after.
|
| 77 |
+
• The final response should answer the user’s query using the combined evidence.
|
| 78 |
+
• MUST provide references to the research.
|
| 79 |
+
"""
|
| 80 |
+
,
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
__all__ = ["web_research_agent", "duckduckgo_search", "fetch_page_content", "searchQuery", "searchResult"]
|
common/aagents/yf_agent.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Yahoo Finance agent module for financial analysis and market research."""
|
| 2 |
+
import os
|
| 3 |
+
from agents import Agent, OpenAIChatCompletionsModel
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from mcp.tools.yf_tools import get_summary, get_market_sentiment, get_history
|
| 6 |
+
from mcp.tools.time_tools import current_datetime
|
| 7 |
+
from openai import AsyncOpenAI
|
| 8 |
+
|
| 9 |
+
# ---------------------------------------------------------
|
| 10 |
+
# Load environment variables
|
| 11 |
+
# ---------------------------------------------------------
|
| 12 |
+
load_dotenv()
|
| 13 |
+
|
| 14 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 15 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 16 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 17 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash-exp", openai_client=gemini_client)
|
| 18 |
+
|
| 19 |
+
GROQ_BASE_URL = "https://api.groq.com/openai/v1"
|
| 20 |
+
groq_api_key = os.getenv('GROQ_API_KEY')
|
| 21 |
+
groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
|
| 22 |
+
groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
|
| 23 |
+
|
| 24 |
+
yf_agent = Agent(
|
| 25 |
+
name="YahooFinanceAgent",
|
| 26 |
+
model=gemini_model,
|
| 27 |
+
tools=[current_datetime, get_summary, get_market_sentiment, get_history],
|
| 28 |
+
instructions="""
|
| 29 |
+
You are a specialized **Financial Analysis Agent** 💰, expert in market research, financial data retrieval, and market analysis.
|
| 30 |
+
Your primary role is to provide *actionable*, *data-driven*, and *concise* financial reports based on the available tools.
|
| 31 |
+
|
| 32 |
+
## Core Directives & Priorities
|
| 33 |
+
|
| 34 |
+
1. **Time Sensitivity:** Always use the 'current_datetime' tool to ensure all analysis is contextually relevant to the current date and time.
|
| 35 |
+
Financial data is extremely time-sensitive.
|
| 36 |
+
|
| 37 |
+
2. **Financial Data Integrity:** Use the Yahoo Finance tools for specific stock/index data:
|
| 38 |
+
- 'get_summary': Get latest summary information and intraday price data for a ticker
|
| 39 |
+
- 'get_market_sentiment': Analyze recent price changes and provide market sentiment (Bullish/Bearish/Neutral)
|
| 40 |
+
- 'get_history': Fetch historical price data for a given ticker
|
| 41 |
+
|
| 42 |
+
Be precise about the date range and data source.
|
| 43 |
+
|
| 44 |
+
3. **Synthesis and Analysis:** Do not just list data. You must **synthesize** financial data (prices, volume, sentiment)
|
| 45 |
+
to provide a complete analytical perspective (e.g., "Stock X is up 5% today driven by strong market momentum").
|
| 46 |
+
|
| 47 |
+
4. **Professional Clarity:** Present information in a clear, professional, and structured format.
|
| 48 |
+
Use numerical data and financial terminology correctly.
|
| 49 |
+
|
| 50 |
+
5. **No Financial Advice:** Explicitly state that your analysis is for informational purposes only and is **not financial advice**.
|
| 51 |
+
|
| 52 |
+
6. **Tool Mandatory:** For any request involving a stock, index, or current market conditions, you **must** use
|
| 53 |
+
the appropriate tool(s) to verify data. **Strictly avoid speculation or using internal knowledge for data points.**
|
| 54 |
+
|
| 55 |
+
## Tool Usage Examples
|
| 56 |
+
|
| 57 |
+
Tool: current_datetime
|
| 58 |
+
Input: { "format": "natural" }
|
| 59 |
+
|
| 60 |
+
Tool: get_summary
|
| 61 |
+
Input: { "symbol": "AAPL", "period": "1d", "interval": "1h" }
|
| 62 |
+
|
| 63 |
+
Tool: get_market_sentiment
|
| 64 |
+
Input: { "symbol": "AAPL", "period": "1mo" }
|
| 65 |
+
|
| 66 |
+
Tool: get_history
|
| 67 |
+
Input: { "symbol": "AAPL", "period": "1mo" }
|
| 68 |
+
|
| 69 |
+
## Output Format Guidelines
|
| 70 |
+
|
| 71 |
+
* Use **bold** for key financial metrics (e.g., Stock Symbol, Price, Volume).
|
| 72 |
+
* Cite the tools used to obtain the data (e.g., "Data sourced from Yahoo Finance as of [Date]").
|
| 73 |
+
* If a symbol or data point cannot be found, clearly state "Data for [X] is unavailable or invalid."
|
| 74 |
+
* Always include a disclaimer: "This analysis is for informational purposes only and is not financial advice."
|
| 75 |
+
""",
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
__all__ = ["yf_agent", "get_summary", "get_market_sentiment", "get_history", "current_datetime"]
|
common/mcp/README.md
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MCP Tools Server
|
| 2 |
+
|
| 3 |
+
A Model Context Protocol (MCP) server that exposes all tools from the `tools/` folder via stdio transport.
|
| 4 |
+
|
| 5 |
+
## Features
|
| 6 |
+
|
| 7 |
+
- **Dynamic Tool Discovery**: Automatically discovers and registers all tools from the tools folder
|
| 8 |
+
- **Stdio Transport**: Compatible with Claude Desktop and other MCP clients
|
| 9 |
+
- **Comprehensive Tool Coverage**: Exposes ~13 tools across 6 categories:
|
| 10 |
+
- Google Search (google_tools)
|
| 11 |
+
- News API (news_tools)
|
| 12 |
+
- DuckDuckGo Search (search_tools)
|
| 13 |
+
- Time Utilities (time_tools)
|
| 14 |
+
- Weather Forecast (weather_tools)
|
| 15 |
+
- Yahoo Finance (yf_tools)
|
| 16 |
+
|
| 17 |
+
## Installation
|
| 18 |
+
|
| 19 |
+
1. Install required dependencies:
|
| 20 |
+
```bash
|
| 21 |
+
pip install mcp requests beautifulsoup4 ddgs yfinance python-dotenv pydantic
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
2. Set up environment variables in `.env`:
|
| 25 |
+
```bash
|
| 26 |
+
# Google Search (Serper.dev)
|
| 27 |
+
SERPER_API_KEY=your_serper_api_key
|
| 28 |
+
|
| 29 |
+
# News API
|
| 30 |
+
NEWS_API_KEY=your_news_api_key
|
| 31 |
+
|
| 32 |
+
# Weather API
|
| 33 |
+
OPENWEATHER_API_KEY=your_openweather_api_key
|
| 34 |
+
|
| 35 |
+
# Google AI (for agents)
|
| 36 |
+
GOOGLE_API_KEY=your_google_api_key
|
| 37 |
+
|
| 38 |
+
# Groq (for agents)
|
| 39 |
+
GROQ_API_KEY=your_groq_api_key
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
## Usage
|
| 43 |
+
|
| 44 |
+
### Running the Server
|
| 45 |
+
|
| 46 |
+
```bash
|
| 47 |
+
cd common/mcp
|
| 48 |
+
python mcp_server.py
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
The server will:
|
| 52 |
+
1. Discover all tools from the `tools/` folder
|
| 53 |
+
2. Print registered tools to stderr
|
| 54 |
+
3. Start listening on stdio for MCP protocol messages
|
| 55 |
+
|
| 56 |
+
### Integrating with Claude Desktop
|
| 57 |
+
|
| 58 |
+
Add to your Claude Desktop config (`claude_desktop_config.json`):
|
| 59 |
+
|
| 60 |
+
```json
|
| 61 |
+
{
|
| 62 |
+
"mcpServers": {
|
| 63 |
+
"tools-server": {
|
| 64 |
+
"command": "python",
|
| 65 |
+
"args": ["/absolute/path/to/agenticaiprojects/common/mcp/mcp_server.py"],
|
| 66 |
+
"env": {
|
| 67 |
+
"SERPER_API_KEY": "your_key",
|
| 68 |
+
"NEWS_API_KEY": "your_key",
|
| 69 |
+
"OPENWEATHER_API_KEY": "your_key"
|
| 70 |
+
}
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### Available Tools
|
| 77 |
+
|
| 78 |
+
The server exposes the following tools:
|
| 79 |
+
|
| 80 |
+
**Google Search:**
|
| 81 |
+
- `google_tools.google_search` - General Google search
|
| 82 |
+
- `google_tools.google_search_recent` - Time-filtered Google search
|
| 83 |
+
|
| 84 |
+
**News:**
|
| 85 |
+
- `news_tools.get_top_headlines` - Top headlines by country
|
| 86 |
+
- `news_tools.search_news` - Search news by topic
|
| 87 |
+
- `news_tools.get_news_by_category` - News by category
|
| 88 |
+
|
| 89 |
+
**Search & Content:**
|
| 90 |
+
- `search_tools.duckduckgo_search` - DuckDuckGo search
|
| 91 |
+
- `search_tools.fetch_page_content` - Extract page content
|
| 92 |
+
|
| 93 |
+
**Time:**
|
| 94 |
+
- `time_tools.current_datetime` - Get current date/time
|
| 95 |
+
|
| 96 |
+
**Weather:**
|
| 97 |
+
- `weather_tools.get_weather_forecast` - Weather forecast via API
|
| 98 |
+
- `weather_tools.search_weather_fallback_ddgs` - Weather via DuckDuckGo
|
| 99 |
+
- `weather_tools.search_weather_fallback_bs` - Weather via web scraping
|
| 100 |
+
|
| 101 |
+
**Finance:**
|
| 102 |
+
- `yf_tools.get_summary` - Stock summary
|
| 103 |
+
- `yf_tools.get_market_sentiment` - Market sentiment analysis
|
| 104 |
+
- `yf_tools.get_history` - Historical stock data
|
| 105 |
+
|
| 106 |
+
## Development
|
| 107 |
+
|
| 108 |
+
### Adding New Tools
|
| 109 |
+
|
| 110 |
+
1. Create a new file in `tools/` folder (e.g., `my_tools.py`)
|
| 111 |
+
2. Decorate functions with `@function_tool`
|
| 112 |
+
3. The server will automatically discover and register them on next restart
|
| 113 |
+
|
| 114 |
+
### Testing
|
| 115 |
+
|
| 116 |
+
```bash
|
| 117 |
+
# Test the server
|
| 118 |
+
cd common/mcp
|
| 119 |
+
python mcp_server.py
|
| 120 |
+
|
| 121 |
+
# In another terminal, you can send MCP protocol messages via stdin
|
| 122 |
+
# Or use an MCP client library to test
|
| 123 |
+
```
|
| 124 |
+
|
| 125 |
+
## Troubleshooting
|
| 126 |
+
|
| 127 |
+
**Tools not discovered:**
|
| 128 |
+
- Check that functions are decorated with `@function_tool`
|
| 129 |
+
- Verify the module is in the `tools/` folder
|
| 130 |
+
- Check stderr output for registration messages
|
| 131 |
+
|
| 132 |
+
**API errors:**
|
| 133 |
+
- Verify environment variables are set correctly
|
| 134 |
+
- Check API key validity
|
| 135 |
+
- Review tool-specific error messages in stderr
|
| 136 |
+
|
| 137 |
+
## License
|
| 138 |
+
|
| 139 |
+
Part of the agenticaiprojects repository.
|
common/mcp/__init__.py
ADDED
|
File without changes
|
common/mcp/mcp_server.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
MCP Server with stdio transport that exposes all tools from the tools folder.
|
| 4 |
+
"""
|
| 5 |
+
import asyncio
|
| 6 |
+
import sys
|
| 7 |
+
import os
|
| 8 |
+
import inspect
|
| 9 |
+
import importlib
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import Any, Callable
|
| 12 |
+
|
| 13 |
+
# Add parent directory to path for imports
|
| 14 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 15 |
+
|
| 16 |
+
from mcp.server import Server
|
| 17 |
+
from mcp.server.stdio import stdio_server
|
| 18 |
+
from mcp.types import Tool, TextContent
|
| 19 |
+
|
| 20 |
+
# Initialize MCP server
|
| 21 |
+
app = Server("tools-server")
|
| 22 |
+
|
| 23 |
+
# Dictionary to store all discovered tools
|
| 24 |
+
TOOLS_REGISTRY: dict[str, Callable] = {}
|
| 25 |
+
|
| 26 |
+
def discover_tools():
|
| 27 |
+
"""
|
| 28 |
+
Dynamically discover all @function_tool decorated functions from the tools folder.
|
| 29 |
+
"""
|
| 30 |
+
tools_dir = Path(__file__).parent / "tools"
|
| 31 |
+
tool_modules = [
|
| 32 |
+
"google_tools",
|
| 33 |
+
"news_tools",
|
| 34 |
+
"search_tools",
|
| 35 |
+
"time_tools",
|
| 36 |
+
"weather_tools",
|
| 37 |
+
"yf_tools"
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
print(f"[MCP Server] Discovering tools from: {tools_dir}", file=sys.stderr)
|
| 41 |
+
|
| 42 |
+
for module_name in tool_modules:
|
| 43 |
+
try:
|
| 44 |
+
# Import the module
|
| 45 |
+
module = importlib.import_module(f"mcp.tools.{module_name}")
|
| 46 |
+
|
| 47 |
+
# Find all functions in the module
|
| 48 |
+
for name, obj in inspect.getmembers(module, inspect.isfunction):
|
| 49 |
+
# Check if it has the function_tool decorator
|
| 50 |
+
# The @function_tool decorator typically adds metadata to the function
|
| 51 |
+
if hasattr(obj, '__wrapped__') or name.startswith('_'):
|
| 52 |
+
continue
|
| 53 |
+
|
| 54 |
+
# Check if it's a tool by looking for common patterns
|
| 55 |
+
if callable(obj) and not name.startswith('_'):
|
| 56 |
+
# Register the tool
|
| 57 |
+
tool_name = f"{module_name}.{name}"
|
| 58 |
+
TOOLS_REGISTRY[tool_name] = obj
|
| 59 |
+
print(f"[MCP Server] Registered tool: {tool_name}", file=sys.stderr)
|
| 60 |
+
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"[MCP Server] Error loading module {module_name}: {e}", file=sys.stderr)
|
| 63 |
+
|
| 64 |
+
print(f"[MCP Server] Total tools registered: {len(TOOLS_REGISTRY)}", file=sys.stderr)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
@app.list_tools()
|
| 68 |
+
async def list_tools() -> list[Tool]:
|
| 69 |
+
"""
|
| 70 |
+
List all available tools.
|
| 71 |
+
"""
|
| 72 |
+
tools = []
|
| 73 |
+
|
| 74 |
+
for tool_name, tool_func in TOOLS_REGISTRY.items():
|
| 75 |
+
# Extract function signature and docstring
|
| 76 |
+
sig = inspect.signature(tool_func)
|
| 77 |
+
doc = inspect.getdoc(tool_func) or "No description available"
|
| 78 |
+
|
| 79 |
+
# Build input schema from function parameters
|
| 80 |
+
properties = {}
|
| 81 |
+
required = []
|
| 82 |
+
|
| 83 |
+
for param_name, param in sig.parameters.items():
|
| 84 |
+
param_type = "string" # Default type
|
| 85 |
+
param_desc = ""
|
| 86 |
+
|
| 87 |
+
# Try to infer type from annotation
|
| 88 |
+
if param.annotation != inspect.Parameter.empty:
|
| 89 |
+
annotation = param.annotation
|
| 90 |
+
if annotation == int:
|
| 91 |
+
param_type = "integer"
|
| 92 |
+
elif annotation == bool:
|
| 93 |
+
param_type = "boolean"
|
| 94 |
+
elif annotation == float:
|
| 95 |
+
param_type = "number"
|
| 96 |
+
|
| 97 |
+
properties[param_name] = {
|
| 98 |
+
"type": param_type,
|
| 99 |
+
"description": param_desc or f"Parameter: {param_name}"
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
# Check if parameter is required (no default value)
|
| 103 |
+
if param.default == inspect.Parameter.empty:
|
| 104 |
+
required.append(param_name)
|
| 105 |
+
|
| 106 |
+
# Create tool definition
|
| 107 |
+
tool = Tool(
|
| 108 |
+
name=tool_name,
|
| 109 |
+
description=doc.split('\n')[0][:200], # First line, max 200 chars
|
| 110 |
+
inputSchema={
|
| 111 |
+
"type": "object",
|
| 112 |
+
"properties": properties,
|
| 113 |
+
"required": required
|
| 114 |
+
}
|
| 115 |
+
)
|
| 116 |
+
tools.append(tool)
|
| 117 |
+
|
| 118 |
+
return tools
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
@app.call_tool()
|
| 122 |
+
async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
|
| 123 |
+
"""
|
| 124 |
+
Execute a tool with the provided arguments.
|
| 125 |
+
"""
|
| 126 |
+
print(f"[MCP Server] Calling tool: {name} with args: {arguments}", file=sys.stderr)
|
| 127 |
+
|
| 128 |
+
if name not in TOOLS_REGISTRY:
|
| 129 |
+
raise ValueError(f"Tool not found: {name}")
|
| 130 |
+
|
| 131 |
+
tool_func = TOOLS_REGISTRY[name]
|
| 132 |
+
|
| 133 |
+
try:
|
| 134 |
+
# Call the tool function
|
| 135 |
+
if inspect.iscoroutinefunction(tool_func):
|
| 136 |
+
result = await tool_func(**arguments)
|
| 137 |
+
else:
|
| 138 |
+
result = tool_func(**arguments)
|
| 139 |
+
|
| 140 |
+
# Convert result to string if needed
|
| 141 |
+
if not isinstance(result, str):
|
| 142 |
+
result = str(result)
|
| 143 |
+
|
| 144 |
+
return [TextContent(type="text", text=result)]
|
| 145 |
+
|
| 146 |
+
except Exception as e:
|
| 147 |
+
error_msg = f"Error executing tool {name}: {str(e)}"
|
| 148 |
+
print(f"[MCP Server] {error_msg}", file=sys.stderr)
|
| 149 |
+
return [TextContent(type="text", text=error_msg)]
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
async def main():
|
| 153 |
+
"""
|
| 154 |
+
Main entry point for the MCP server.
|
| 155 |
+
"""
|
| 156 |
+
# Discover all tools before starting the server
|
| 157 |
+
discover_tools()
|
| 158 |
+
|
| 159 |
+
print(f"[MCP Server] Starting MCP server with {len(TOOLS_REGISTRY)} tools", file=sys.stderr)
|
| 160 |
+
|
| 161 |
+
# Run the server with stdio transport
|
| 162 |
+
async with stdio_server() as (read_stream, write_stream):
|
| 163 |
+
await app.run(
|
| 164 |
+
read_stream,
|
| 165 |
+
write_stream,
|
| 166 |
+
app.create_initialization_options()
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
if __name__ == "__main__":
|
| 171 |
+
asyncio.run(main())
|
common/mcp/tools/__init__.py
ADDED
|
File without changes
|
common/mcp/tools/google_tools.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from agents import function_tool
|
| 5 |
+
from typing import Optional
|
| 6 |
+
|
| 7 |
+
# ---------------------------------------------------------
|
| 8 |
+
# Load environment variables
|
| 9 |
+
# ---------------------------------------------------------
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
# ============================================================
|
| 13 |
+
# 🔹 GOOGLE SEARCH TOOLSET (Serper.dev API)
|
| 14 |
+
# ============================================================
|
| 15 |
+
|
| 16 |
+
@function_tool
|
| 17 |
+
def google_search(query: str, num_results: int = 3) -> str:
|
| 18 |
+
"""
|
| 19 |
+
Perform a general Google search using Serper.dev API.
|
| 20 |
+
|
| 21 |
+
Parameters:
|
| 22 |
+
-----------
|
| 23 |
+
query : str
|
| 24 |
+
The search query string, e.g., "latest Tesla stock news".
|
| 25 |
+
num_results : int, optional (default=3)
|
| 26 |
+
Maximum number of search results to return.
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
--------
|
| 30 |
+
str
|
| 31 |
+
Formatted string of top search results, each including:
|
| 32 |
+
- Title of the page
|
| 33 |
+
- URL link
|
| 34 |
+
- Snippet / description
|
| 35 |
+
If no results are found or API key is missing, returns an error message.
|
| 36 |
+
|
| 37 |
+
Example:
|
| 38 |
+
--------
|
| 39 |
+
google_search("AI in finance", num_results=2)
|
| 40 |
+
|
| 41 |
+
Output:
|
| 42 |
+
Title: How AI is Transforming Finance
|
| 43 |
+
Link: https://example.com/ai-finance
|
| 44 |
+
Snippet: AI is increasingly used for trading, risk management...
|
| 45 |
+
|
| 46 |
+
Title: AI Applications in Banking
|
| 47 |
+
Link: https://example.com/ai-banking
|
| 48 |
+
Snippet: Banks are leveraging AI for customer service, fraud detection...
|
| 49 |
+
"""
|
| 50 |
+
print(f"[DEBUG] google_search called with query='{query}', num_results={num_results}")
|
| 51 |
+
|
| 52 |
+
try:
|
| 53 |
+
api_key = os.getenv("SERPER_API_KEY")
|
| 54 |
+
if not api_key:
|
| 55 |
+
return "Error: SERPER_API_KEY missing in environment variables."
|
| 56 |
+
|
| 57 |
+
url = "https://google.serper.dev/search"
|
| 58 |
+
headers = {"X-API-KEY": api_key, "Content-Type": "application/json"}
|
| 59 |
+
payload = {"q": query, "num": num_results, "tbs": "qdr:d"} # results from last 24h
|
| 60 |
+
|
| 61 |
+
response = requests.post(url, headers=headers, json=payload, timeout=10)
|
| 62 |
+
response.raise_for_status()
|
| 63 |
+
data = response.json()
|
| 64 |
+
|
| 65 |
+
if "organic" not in data or not data["organic"]:
|
| 66 |
+
return f"No results found for query: '{query}'"
|
| 67 |
+
|
| 68 |
+
formatted_results = [
|
| 69 |
+
f"Title: {item.get('title')}\n"
|
| 70 |
+
f"Link: {item.get('link')}\n"
|
| 71 |
+
f"Snippet: {item.get('snippet', '')}\n"
|
| 72 |
+
for item in data["organic"][:num_results]
|
| 73 |
+
]
|
| 74 |
+
return "\n".join(formatted_results)
|
| 75 |
+
|
| 76 |
+
except requests.exceptions.RequestException as e:
|
| 77 |
+
print(f"[DEBUG] Network error during Google search: {e}")
|
| 78 |
+
return f"Network error during Google search: {e}"
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f"[DEBUG] Error performing Google search: {e}")
|
| 81 |
+
return f"Error performing Google search: {e}"
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
@function_tool
|
| 85 |
+
def google_search_recent(query: str, num_results: int = 3, timeframe: str = "d") -> str:
|
| 86 |
+
"""
|
| 87 |
+
Perform a Google search with time-based filtering using Serper.dev API.
|
| 88 |
+
|
| 89 |
+
Parameters:
|
| 90 |
+
-----------
|
| 91 |
+
query : str
|
| 92 |
+
The search query string.
|
| 93 |
+
num_results : int, optional (default=3)
|
| 94 |
+
Maximum number of search results to return.
|
| 95 |
+
timeframe : str, optional (default="d")
|
| 96 |
+
Time range for results:
|
| 97 |
+
- "d" = past day
|
| 98 |
+
- "w" = past week
|
| 99 |
+
- "m" = past month
|
| 100 |
+
- "y" = past year
|
| 101 |
+
|
| 102 |
+
Returns:
|
| 103 |
+
--------
|
| 104 |
+
str
|
| 105 |
+
Formatted string of recent search results.
|
| 106 |
+
"""
|
| 107 |
+
print(f"[DEBUG] google_search_recent called with query='{query}', timeframe={timeframe}")
|
| 108 |
+
|
| 109 |
+
try:
|
| 110 |
+
api_key = os.getenv("SERPER_API_KEY")
|
| 111 |
+
if not api_key:
|
| 112 |
+
return "Error: SERPER_API_KEY missing in environment variables."
|
| 113 |
+
|
| 114 |
+
url = "https://google.serper.dev/search"
|
| 115 |
+
headers = {"X-API-KEY": api_key, "Content-Type": "application/json"}
|
| 116 |
+
payload = {"q": query, "num": num_results, "tbs": f"qdr:{timeframe}"}
|
| 117 |
+
|
| 118 |
+
response = requests.post(url, headers=headers, json=payload, timeout=10)
|
| 119 |
+
response.raise_for_status()
|
| 120 |
+
data = response.json()
|
| 121 |
+
|
| 122 |
+
if "organic" not in data or not data["organic"]:
|
| 123 |
+
return f"No recent results found for query: '{query}'"
|
| 124 |
+
|
| 125 |
+
formatted_results = [
|
| 126 |
+
f"Title: {item.get('title')}\n"
|
| 127 |
+
f"Link: {item.get('link')}\n"
|
| 128 |
+
f"Snippet: {item.get('snippet', '')}\n"
|
| 129 |
+
for item in data["organic"][:num_results]
|
| 130 |
+
]
|
| 131 |
+
|
| 132 |
+
return f"Recent results ({timeframe}):\n\n" + "\n".join(formatted_results)
|
| 133 |
+
|
| 134 |
+
except requests.exceptions.RequestException as e:
|
| 135 |
+
print(f"[DEBUG] Network error: {e}")
|
| 136 |
+
return f"Network error during Google search: {e}"
|
| 137 |
+
except Exception as e:
|
| 138 |
+
print(f"[DEBUG] Error: {e}")
|
| 139 |
+
return f"Error performing Google search: {e}"
|
common/mcp/tools/news_tools.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from agents import function_tool
|
| 5 |
+
from typing import Optional
|
| 6 |
+
import datetime
|
| 7 |
+
|
| 8 |
+
# ---------------------------------------------------------
|
| 9 |
+
# Load environment variables
|
| 10 |
+
# ---------------------------------------------------------
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
# ============================================================
|
| 14 |
+
# 🔹 NEWS TOOLSET (NewsAPI.org)
|
| 15 |
+
# ============================================================
|
| 16 |
+
|
| 17 |
+
@function_tool
|
| 18 |
+
def get_top_headlines(country: str = "us", num_results: int = 5) -> str:
|
| 19 |
+
"""
|
| 20 |
+
Fetch the latest top headlines for a country using NewsAPI.org.
|
| 21 |
+
|
| 22 |
+
Parameters:
|
| 23 |
+
-----------
|
| 24 |
+
country : str, optional (default="us")
|
| 25 |
+
Two-letter country code (e.g., "us", "gb", "in").
|
| 26 |
+
num_results : int, optional (default=5)
|
| 27 |
+
Number of articles to fetch.
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
--------
|
| 31 |
+
str
|
| 32 |
+
Formatted headlines with title, source, published date, and URL.
|
| 33 |
+
If API key is missing or no results found, returns an error message.
|
| 34 |
+
"""
|
| 35 |
+
print(f"[DEBUG] get_top_headlines called for country={country}, num_results={num_results}")
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
api_key = os.getenv("NEWS_API_KEY")
|
| 39 |
+
if not api_key:
|
| 40 |
+
return "Error: NEWS_API_KEY missing in environment variables."
|
| 41 |
+
|
| 42 |
+
url = "https://newsapi.org/v2/top-headlines"
|
| 43 |
+
params = {
|
| 44 |
+
"country": country,
|
| 45 |
+
"pageSize": num_results,
|
| 46 |
+
"apiKey": api_key
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
response = requests.get(url, params=params, timeout=10)
|
| 50 |
+
response.raise_for_status()
|
| 51 |
+
data = response.json()
|
| 52 |
+
|
| 53 |
+
if not data.get("articles"):
|
| 54 |
+
return f"No top headlines found for country: {country}"
|
| 55 |
+
|
| 56 |
+
formatted = []
|
| 57 |
+
for article in data["articles"][:num_results]:
|
| 58 |
+
formatted.append(
|
| 59 |
+
f"📰 {article.get('title')}\n"
|
| 60 |
+
f" Source: {article.get('source', {}).get('name')}\n"
|
| 61 |
+
f" Published: {article.get('publishedAt', 'N/A')}\n"
|
| 62 |
+
f" URL: {article.get('url')}\n"
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
return f"Top Headlines ({country.upper()}):\n\n" + "\n".join(formatted)
|
| 66 |
+
|
| 67 |
+
except requests.exceptions.RequestException as e:
|
| 68 |
+
print(f"[DEBUG] Network error: {e}")
|
| 69 |
+
return f"Network error while calling News API: {e}"
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"[DEBUG] Error: {e}")
|
| 72 |
+
return f"Unexpected error fetching news: {e}"
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
@function_tool
|
| 76 |
+
def search_news(query: str, num_results: int = 5, days_back: int = 7) -> str:
|
| 77 |
+
"""
|
| 78 |
+
Search for recent news articles about a specific topic using NewsAPI.org.
|
| 79 |
+
|
| 80 |
+
Parameters:
|
| 81 |
+
-----------
|
| 82 |
+
query : str
|
| 83 |
+
Keyword or topic to search (e.g., "Tesla earnings", "AI healthcare").
|
| 84 |
+
num_results : int, optional (default=5)
|
| 85 |
+
Number of articles to fetch.
|
| 86 |
+
days_back : int, optional (default=7)
|
| 87 |
+
Number of days to look back for articles (1-30).
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
--------
|
| 91 |
+
str
|
| 92 |
+
Formatted news articles with title, source, published date, and URL.
|
| 93 |
+
If API key is missing or no results found, returns an error message.
|
| 94 |
+
"""
|
| 95 |
+
print(f"[DEBUG] search_news called with query='{query}', num_results={num_results}, days_back={days_back}")
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
api_key = os.getenv("NEWS_API_KEY")
|
| 99 |
+
if not api_key:
|
| 100 |
+
return "Error: NEWS_API_KEY missing in environment variables."
|
| 101 |
+
|
| 102 |
+
# Calculate date range
|
| 103 |
+
today = datetime.datetime.utcnow()
|
| 104 |
+
from_date = (today - datetime.timedelta(days=days_back)).strftime('%Y-%m-%dT%H:%M:%SZ')
|
| 105 |
+
|
| 106 |
+
url = "https://newsapi.org/v2/everything"
|
| 107 |
+
params = {
|
| 108 |
+
"q": query,
|
| 109 |
+
"pageSize": num_results,
|
| 110 |
+
"apiKey": api_key,
|
| 111 |
+
"sortBy": "publishedAt",
|
| 112 |
+
"language": "en",
|
| 113 |
+
"from": from_date
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
response = requests.get(url, params=params, timeout=10)
|
| 117 |
+
response.raise_for_status()
|
| 118 |
+
data = response.json()
|
| 119 |
+
|
| 120 |
+
if not data.get("articles"):
|
| 121 |
+
return f"No news found for query: '{query}'"
|
| 122 |
+
|
| 123 |
+
formatted = []
|
| 124 |
+
for article in data["articles"][:num_results]:
|
| 125 |
+
formatted.append(
|
| 126 |
+
f"📰 {article.get('title')}\n"
|
| 127 |
+
f" Source: {article.get('source', {}).get('name')}\n"
|
| 128 |
+
f" Published: {article.get('publishedAt', 'N/A')}\n"
|
| 129 |
+
f" URL: {article.get('url')}\n"
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
return f"News Search Results for '{query}' (last {days_back} days):\n\n" + "\n".join(formatted)
|
| 133 |
+
|
| 134 |
+
except requests.exceptions.RequestException as e:
|
| 135 |
+
print(f"[DEBUG] Network error: {e}")
|
| 136 |
+
return f"Network error while calling News API: {e}"
|
| 137 |
+
except Exception as e:
|
| 138 |
+
print(f"[DEBUG] Error: {e}")
|
| 139 |
+
return f"Unexpected error fetching news: {e}"
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
@function_tool
|
| 143 |
+
def get_news_by_category(category: str = "business", country: str = "us", num_results: int = 5) -> str:
|
| 144 |
+
"""
|
| 145 |
+
Fetch top headlines by category using NewsAPI.org.
|
| 146 |
+
|
| 147 |
+
Parameters:
|
| 148 |
+
-----------
|
| 149 |
+
category : str, optional (default="business")
|
| 150 |
+
News category: "business", "entertainment", "general", "health",
|
| 151 |
+
"science", "sports", "technology".
|
| 152 |
+
country : str, optional (default="us")
|
| 153 |
+
Two-letter country code.
|
| 154 |
+
num_results : int, optional (default=5)
|
| 155 |
+
Number of articles to fetch.
|
| 156 |
+
|
| 157 |
+
Returns:
|
| 158 |
+
--------
|
| 159 |
+
str
|
| 160 |
+
Formatted headlines for the specified category.
|
| 161 |
+
"""
|
| 162 |
+
print(f"[DEBUG] get_news_by_category called for category={category}, country={country}")
|
| 163 |
+
|
| 164 |
+
try:
|
| 165 |
+
api_key = os.getenv("NEWS_API_KEY")
|
| 166 |
+
if not api_key:
|
| 167 |
+
return "Error: NEWS_API_KEY missing in environment variables."
|
| 168 |
+
|
| 169 |
+
url = "https://newsapi.org/v2/top-headlines"
|
| 170 |
+
params = {
|
| 171 |
+
"category": category,
|
| 172 |
+
"country": country,
|
| 173 |
+
"pageSize": num_results,
|
| 174 |
+
"apiKey": api_key
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
response = requests.get(url, params=params, timeout=10)
|
| 178 |
+
response.raise_for_status()
|
| 179 |
+
data = response.json()
|
| 180 |
+
|
| 181 |
+
if not data.get("articles"):
|
| 182 |
+
return f"No headlines found for category: {category}"
|
| 183 |
+
|
| 184 |
+
formatted = []
|
| 185 |
+
for article in data["articles"][:num_results]:
|
| 186 |
+
formatted.append(
|
| 187 |
+
f"📰 {article.get('title')}\n"
|
| 188 |
+
f" Source: {article.get('source', {}).get('name')}\n"
|
| 189 |
+
f" Published: {article.get('publishedAt', 'N/A')}\n"
|
| 190 |
+
f" URL: {article.get('url')}\n"
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
return f"Top {category.capitalize()} Headlines ({country.upper()}):\n\n" + "\n".join(formatted)
|
| 194 |
+
|
| 195 |
+
except requests.exceptions.RequestException as e:
|
| 196 |
+
print(f"[DEBUG] Network error: {e}")
|
| 197 |
+
return f"Network error while calling News API: {e}"
|
| 198 |
+
except Exception as e:
|
| 199 |
+
print(f"[DEBUG] Error: {e}")
|
| 200 |
+
return f"Unexpected error fetching news: {e}"
|
common/mcp/tools/rag_tool.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""RAG Search Tool - Search the local healthcare knowledge base"""
|
| 2 |
+
import os
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from agents import function_tool, RunContextWrapper
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
from rag.rag import Retriever
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@dataclass
|
| 11 |
+
class UserContext:
|
| 12 |
+
uid: str
|
| 13 |
+
db_path: str = ""
|
| 14 |
+
file_path: str = ""
|
| 15 |
+
similarity_threshold: float = 0.4 # FAISS L2 distance threshold for RAG relevance
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# ---------------------------------------------------------
|
| 19 |
+
# Load environment variables
|
| 20 |
+
# ---------------------------------------------------------
|
| 21 |
+
load_dotenv()
|
| 22 |
+
|
| 23 |
+
# ---------------------------------------------------------
|
| 24 |
+
# Initialize RAG Retriever
|
| 25 |
+
# ---------------------------------------------------------
|
| 26 |
+
# Get the healthcare-rag-chatbot directory path
|
| 27 |
+
# healthcare_dir = str(Path(__file__).parent.parent.parent)
|
| 28 |
+
# retriever = None
|
| 29 |
+
|
| 30 |
+
# ---------------------------------------------------------
|
| 31 |
+
# RAG Search Tool
|
| 32 |
+
# ---------------------------------------------------------
|
| 33 |
+
@function_tool
|
| 34 |
+
def rag_search(wrapper: RunContextWrapper[UserContext], query: str) -> str:
|
| 35 |
+
"""
|
| 36 |
+
Search the local healthcare knowledge base for relevant information.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
query: The medical question or topic to search for
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
Relevant information from the healthcare knowledge base
|
| 43 |
+
"""
|
| 44 |
+
print(f"[DEBUG] RAG_SEARCH called with query: '{query}'")
|
| 45 |
+
|
| 46 |
+
# Get similarity threshold from user context
|
| 47 |
+
similarity_threshold = wrapper.context.similarity_threshold
|
| 48 |
+
print(f"[DEBUG] RAG_SEARCH: Using similarity threshold: {similarity_threshold}")
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
# Initialize retriever with user context
|
| 52 |
+
retriever = Retriever(
|
| 53 |
+
db_path=wrapper.context.db_path,
|
| 54 |
+
file_path=wrapper.context.file_path
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
# Get results with similarity scores
|
| 58 |
+
results_with_scores = retriever.retrieve_with_scores(query, k=5) # Increased from 4 to 5
|
| 59 |
+
|
| 60 |
+
if not results_with_scores:
|
| 61 |
+
print("[DEBUG] RAG_SEARCH: No results found in knowledge base")
|
| 62 |
+
return "No relevant information found in the knowledge base."
|
| 63 |
+
|
| 64 |
+
print(f"[DEBUG] RAG_SEARCH: Found {len(results_with_scores)} results")
|
| 65 |
+
|
| 66 |
+
# Check if the best match meets the threshold
|
| 67 |
+
# FAISS returns (document, distance) where lower distance = better match
|
| 68 |
+
best_score = results_with_scores[0][1]
|
| 69 |
+
print(f"[DEBUG] RAG_SEARCH: Best similarity score (distance): {best_score:.4f} (threshold: {similarity_threshold})")
|
| 70 |
+
|
| 71 |
+
if best_score > similarity_threshold:
|
| 72 |
+
print(f"[DEBUG] RAG_SEARCH: Best match score {best_score:.4f} is above threshold {similarity_threshold}")
|
| 73 |
+
print("[DEBUG] RAG_SEARCH: Results not relevant enough, triggering web search fallback")
|
| 74 |
+
return "No relevant information found in the knowledge base."
|
| 75 |
+
|
| 76 |
+
print(f"[DEBUG] RAG_SEARCH: Results are relevant (score: {best_score:.4f} <= {similarity_threshold})")
|
| 77 |
+
|
| 78 |
+
# Log all scores for debugging
|
| 79 |
+
all_scores = [f"{score:.4f}" for _, score in results_with_scores]
|
| 80 |
+
print(f"[DEBUG] RAG_SEARCH: All scores: {', '.join(all_scores)}")
|
| 81 |
+
|
| 82 |
+
# Format results - only include documents that meet the similarity threshold
|
| 83 |
+
formatted_results = []
|
| 84 |
+
for i, (doc, score) in enumerate(results_with_scores[:5], 1): # Top 5 results
|
| 85 |
+
if score <= similarity_threshold:
|
| 86 |
+
content = doc.page_content.strip()
|
| 87 |
+
formatted_results.append(f"Result {i} (score: {score:.4f}):\n{content}\n")
|
| 88 |
+
|
| 89 |
+
if not formatted_results:
|
| 90 |
+
print("[DEBUG] RAG_SEARCH: No results met the similarity threshold")
|
| 91 |
+
print("[DEBUG] RAG_SEARCH: Triggering web search fallback")
|
| 92 |
+
return "No relevant information found in the knowledge base."
|
| 93 |
+
|
| 94 |
+
result_text = "\n".join(formatted_results)
|
| 95 |
+
print(f"[DEBUG] RAG_SEARCH: Returning {len(formatted_results)} results, total length: {len(result_text)} characters")
|
| 96 |
+
print(f"[DEBUG] RAG_SEARCH: First 300 chars: {result_text[:300]}...")
|
| 97 |
+
|
| 98 |
+
return result_text
|
| 99 |
+
|
| 100 |
+
except Exception as e:
|
| 101 |
+
print(f"[DEBUG] RAG_SEARCH: Error occurred - {str(e)}")
|
| 102 |
+
return f"Error retrieving from knowledge base: {str(e)}"
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
__all__ = ["rag_search", "retriever"]
|
common/mcp/tools/search_tools.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ddgs import DDGS
|
| 2 |
+
from agents import function_tool
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from pydantic import BaseModel, Field
|
| 5 |
+
import requests
|
| 6 |
+
from bs4 import BeautifulSoup
|
| 7 |
+
from typing import Optional
|
| 8 |
+
|
| 9 |
+
# ---------------------------------------------------------
|
| 10 |
+
# Load environment variables
|
| 11 |
+
# ---------------------------------------------------------
|
| 12 |
+
load_dotenv()
|
| 13 |
+
|
| 14 |
+
# ---------------------- MODELS ---------------------------
|
| 15 |
+
class searchQuery(BaseModel):
|
| 16 |
+
query: str = Field(..., description="The search query string.")
|
| 17 |
+
max_results: int = Field(5, description="The maximum number of search results to return.")
|
| 18 |
+
search_type: str = Field(
|
| 19 |
+
"text",
|
| 20 |
+
description="Search type: 'text' (default) or 'news'. Use 'news' to get publication dates."
|
| 21 |
+
)
|
| 22 |
+
timelimit: str = Field(
|
| 23 |
+
'd',
|
| 24 |
+
description="Time limit for search results: 'd' (day), 'w' (week), 'm' (month), 'y' (year)."
|
| 25 |
+
)
|
| 26 |
+
region: str = Field("us-en", description="Region for search results (e.g., 'us-en').")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class searchResult(BaseModel):
|
| 30 |
+
title: str
|
| 31 |
+
link: str
|
| 32 |
+
snippet: str
|
| 33 |
+
datetime: Optional[str] = None
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# ---------------------- PAGE FETCH TOOL ---------------------------
|
| 37 |
+
@function_tool
|
| 38 |
+
def fetch_page_content(url: str, timeout: int = 3) -> Optional[str]:
|
| 39 |
+
"""Fetch and extract text content from a web page."""
|
| 40 |
+
print(f"[DEBUG] fetch_page_content called with: {url} - timeout: {timeout}")
|
| 41 |
+
try:
|
| 42 |
+
headers = {
|
| 43 |
+
'User-Agent': (
|
| 44 |
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
| 45 |
+
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
| 46 |
+
'Chrome/91.0.4472.124 Safari/537.36'
|
| 47 |
+
)
|
| 48 |
+
}
|
| 49 |
+
response = requests.get(url, headers=headers, timeout=timeout)
|
| 50 |
+
response.raise_for_status()
|
| 51 |
+
|
| 52 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 53 |
+
|
| 54 |
+
# Remove irrelevant elements
|
| 55 |
+
for tag in soup(["script", "style", "nav", "footer", "header"]):
|
| 56 |
+
tag.decompose()
|
| 57 |
+
|
| 58 |
+
# Extract text
|
| 59 |
+
text = soup.get_text(separator='\n', strip=True)
|
| 60 |
+
|
| 61 |
+
# Clean whitespace
|
| 62 |
+
lines = (line.strip() for line in text.splitlines())
|
| 63 |
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
| 64 |
+
text = '\n'.join(chunk for chunk in chunks if chunk)
|
| 65 |
+
|
| 66 |
+
return text
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"[WARNING] Failed to fetch content from {url}: {str(e)}")
|
| 69 |
+
return None
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# ---------------------- SEARCH TOOL ---------------------------
|
| 73 |
+
@function_tool
|
| 74 |
+
def duckduckgo_search(params: searchQuery) -> list[dict]:
|
| 75 |
+
"""Perform a DuckDuckGo search and return only snippets.
|
| 76 |
+
No page content fetched here."""
|
| 77 |
+
print(f"[DEBUG] duckduckgo_search called with: {params}")
|
| 78 |
+
|
| 79 |
+
results = []
|
| 80 |
+
with DDGS() as ddgs:
|
| 81 |
+
if params.search_type == "news":
|
| 82 |
+
search_results = ddgs.news(
|
| 83 |
+
params.query,
|
| 84 |
+
max_results=params.max_results,
|
| 85 |
+
timelimit=params.timelimit,
|
| 86 |
+
region=params.region
|
| 87 |
+
)
|
| 88 |
+
for result in search_results:
|
| 89 |
+
results.append(
|
| 90 |
+
searchResult(
|
| 91 |
+
title=result.get("title", ""),
|
| 92 |
+
link=result.get("url", ""),
|
| 93 |
+
snippet=result.get("body", ""),
|
| 94 |
+
datetime=result.get("date", "")
|
| 95 |
+
).model_dump()
|
| 96 |
+
)
|
| 97 |
+
else:
|
| 98 |
+
search_results = ddgs.text(
|
| 99 |
+
params.query,
|
| 100 |
+
max_results=params.max_results,
|
| 101 |
+
timelimit=params.timelimit,
|
| 102 |
+
region=params.region
|
| 103 |
+
)
|
| 104 |
+
for result in search_results:
|
| 105 |
+
results.append(
|
| 106 |
+
searchResult(
|
| 107 |
+
title=result.get("title", ""),
|
| 108 |
+
link=result.get("href", ""),
|
| 109 |
+
snippet=result.get("body", "")
|
| 110 |
+
).model_dump()
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
print(f"[DEBUG] duckduckgo_search returning {len(results)} results")
|
| 114 |
+
return results
|
| 115 |
+
|
common/mcp/tools/time_tools.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
from agents import function_tool
|
| 3 |
+
# from ..common.utility.logger import log_call
|
| 4 |
+
|
| 5 |
+
@function_tool
|
| 6 |
+
# @log_call
|
| 7 |
+
def current_datetime(format: str = "natural") -> str:
|
| 8 |
+
"""
|
| 9 |
+
Returns the current date and time as a formatted string.
|
| 10 |
+
|
| 11 |
+
Args:
|
| 12 |
+
format (str): Format style for the datetime. Options:
|
| 13 |
+
- "natural" (default): "Saturday, December 7, 2025 at 3:59 PM"
|
| 14 |
+
- "natural_short": "Dec 7, 2025 at 3:59 PM"
|
| 15 |
+
- "natural_full": "Saturday, December 7, 2025 at 3:59:30 PM CST"
|
| 16 |
+
- Custom strftime format string (e.g., "%Y-%m-%d %H:%M:%S")
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
str: Current date and time in the specified format
|
| 20 |
+
"""
|
| 21 |
+
now = datetime.now()
|
| 22 |
+
|
| 23 |
+
# Natural format options
|
| 24 |
+
if format == "natural":
|
| 25 |
+
return now.strftime("%A, %B %d, %Y at %I:%M %p")
|
| 26 |
+
elif format == "natural_short":
|
| 27 |
+
return now.strftime("%b %d, %Y at %I:%M %p")
|
| 28 |
+
elif format == "natural_full":
|
| 29 |
+
return now.strftime("%A, %B %d, %Y at %I:%M:%S %p %Z")
|
| 30 |
+
else:
|
| 31 |
+
# Custom format string
|
| 32 |
+
return now.strftime(format)
|
common/mcp/tools/weather_tools.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import requests
|
| 4 |
+
import datetime
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
from typing import Optional
|
| 7 |
+
|
| 8 |
+
from ddgs import DDGS
|
| 9 |
+
from agents import function_tool
|
| 10 |
+
|
| 11 |
+
# ---------------------------------------------------------
|
| 12 |
+
# Load environment variables
|
| 13 |
+
# ---------------------------------------------------------
|
| 14 |
+
load_dotenv()
|
| 15 |
+
|
| 16 |
+
@function_tool
|
| 17 |
+
def get_weather_forecast(city: str, date: Optional[str] = None) -> str:
|
| 18 |
+
"""
|
| 19 |
+
PRIMARY TOOL: Fetch weather using OpenWeatherMap API.
|
| 20 |
+
"""
|
| 21 |
+
print(f"[DEBUG] Primary API get_weather_forecast called for city={city}")
|
| 22 |
+
|
| 23 |
+
api_key = os.getenv("OPENWEATHER_API_KEY")
|
| 24 |
+
if not api_key:
|
| 25 |
+
return "Error: OPENWEATHER_API_KEY missing. Please use the fallback search tool."
|
| 26 |
+
|
| 27 |
+
url = "https://api.openweathermap.org/data/2.5/forecast"
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
response = requests.get(
|
| 31 |
+
url,
|
| 32 |
+
params={"q": city, "appid": api_key, "units": "metric"},
|
| 33 |
+
timeout=5
|
| 34 |
+
)
|
| 35 |
+
data = response.json()
|
| 36 |
+
except Exception as e:
|
| 37 |
+
return f"Error calling weather API: {str(e)}"
|
| 38 |
+
|
| 39 |
+
if str(data.get("cod")) != "200":
|
| 40 |
+
return f"Error from API: {data.get('message', 'Unknown error')}"
|
| 41 |
+
|
| 42 |
+
# Build the report string
|
| 43 |
+
report_lines = []
|
| 44 |
+
found_date = False
|
| 45 |
+
|
| 46 |
+
for entry in data.get("list", []):
|
| 47 |
+
dt_txt = entry["dt_txt"].split(" ")[0]
|
| 48 |
+
|
| 49 |
+
if date and dt_txt != date:
|
| 50 |
+
continue
|
| 51 |
+
|
| 52 |
+
found_date = True
|
| 53 |
+
desc = entry['weather'][0]['description'].capitalize()
|
| 54 |
+
temp = entry['main']['temp']
|
| 55 |
+
hum = entry['main']['humidity']
|
| 56 |
+
wind = entry['wind']['speed']
|
| 57 |
+
|
| 58 |
+
report_lines.append(f"{dt_txt}: {desc}, Temp: {temp}°C, Humidity: {hum}%, Wind: {wind} m/s")
|
| 59 |
+
|
| 60 |
+
# Handle "Date not found" case
|
| 61 |
+
if date and not found_date:
|
| 62 |
+
return f"API valid, but date {date} is out of range (5-day limit). Try the search fallback tool."
|
| 63 |
+
|
| 64 |
+
final_report = "\n".join(report_lines)
|
| 65 |
+
|
| 66 |
+
return f"API Forecast for {city}:\n{final_report}"
|
| 67 |
+
|
| 68 |
+
# ---------------------------------------------------------
|
| 69 |
+
# Tool 2: Web Search Fallback (Secondary)
|
| 70 |
+
# ---------------------------------------------------------
|
| 71 |
+
|
| 72 |
+
@function_tool
|
| 73 |
+
def search_weather_fallback_ddgs(city: str, date: Optional[str] = None) -> str:
|
| 74 |
+
"""
|
| 75 |
+
SECONDARY TOOL: Search-based fallback that produces an API-like structured forecast.
|
| 76 |
+
"""
|
| 77 |
+
print(f"[DEBUG] Fallback API (DDGS) called for city={city}, date={date}")
|
| 78 |
+
|
| 79 |
+
# --- Build Query ---
|
| 80 |
+
try:
|
| 81 |
+
if date:
|
| 82 |
+
try:
|
| 83 |
+
dt_obj = datetime.strptime(date, "%Y-%m-%d")
|
| 84 |
+
natural_date = dt_obj.strftime("%B %d, %Y")
|
| 85 |
+
month_name = dt_obj.strftime("%B")
|
| 86 |
+
except ValueError:
|
| 87 |
+
natural_date = date
|
| 88 |
+
month_name = ""
|
| 89 |
+
else:
|
| 90 |
+
natural_date = datetime.now().strftime("%B %d, %Y")
|
| 91 |
+
month_name = natural_date.split()[0] # Month name
|
| 92 |
+
|
| 93 |
+
query = f"weather {city} {natural_date}"
|
| 94 |
+
print(f"[DEBUG] Search query: {query}")
|
| 95 |
+
|
| 96 |
+
# --- Perform Search ---
|
| 97 |
+
results = list(DDGS().text(query, max_results=3))
|
| 98 |
+
print(f"[DEBUG] Number of search results: {len(results)}")
|
| 99 |
+
|
| 100 |
+
if not results:
|
| 101 |
+
return f"Web Estimated Forecast for {city}:\nNo reliable search data found."
|
| 102 |
+
|
| 103 |
+
# --- Aggregate Text ---
|
| 104 |
+
full_text = " ".join([r.get("body", "") for r in results])
|
| 105 |
+
|
| 106 |
+
# --- Extract Values with Robust Regex ---
|
| 107 |
+
temp_match = re.findall(r'(-?\d+)\s*(?:°|deg|C|F)', full_text, re.I)
|
| 108 |
+
temperature = temp_match[0] if temp_match else "?"
|
| 109 |
+
|
| 110 |
+
humidity_match = re.findall(r'(\d+)\s*%', full_text)
|
| 111 |
+
humidity = humidity_match[0] if humidity_match else "?"
|
| 112 |
+
|
| 113 |
+
wind_match = re.findall(r'(\d+)\s*(?:mph|km/h|m/s)', full_text, re.I)
|
| 114 |
+
wind = wind_match[0] if wind_match else "?"
|
| 115 |
+
|
| 116 |
+
# --- Condition ---
|
| 117 |
+
# Take first word(s) of first title as best guess
|
| 118 |
+
condition_raw = results[0].get("title", "Unknown").split("-")[0].strip()
|
| 119 |
+
condition = condition_raw[0].upper() + condition_raw[1:] if condition_raw else "Unknown"
|
| 120 |
+
|
| 121 |
+
# --- Construct API-like Forecast ---
|
| 122 |
+
forecast = (
|
| 123 |
+
f"Web Estimated Forecast for {city}:\n"
|
| 124 |
+
f"{natural_date}: {condition}, Temp: {temperature}° (approx), "
|
| 125 |
+
f"Humidity: {humidity}%, Wind: {wind}\n"
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
# Optional: add raw snippets for debugging
|
| 129 |
+
# snippet_block = "\nSearch Snippets (Raw):\n" + "\n".join(
|
| 130 |
+
# f"- {r['title']}: {r['body']}" for r in results
|
| 131 |
+
# )
|
| 132 |
+
# return forecast + snippet_block
|
| 133 |
+
|
| 134 |
+
return forecast
|
| 135 |
+
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f"[DEBUG] Error in fallback: {e}")
|
| 138 |
+
return f"Error performing web search: {str(e)}"
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
import requests
|
| 142 |
+
from bs4 import BeautifulSoup
|
| 143 |
+
import re
|
| 144 |
+
from typing import Optional
|
| 145 |
+
from agents import function_tool
|
| 146 |
+
from datetime import datetime
|
| 147 |
+
|
| 148 |
+
@function_tool
|
| 149 |
+
def search_weather_fallback_bs(city: str, date: Optional[str] = None) -> str:
|
| 150 |
+
"""
|
| 151 |
+
SECONDARY TOOL: Web-scraping fallback using BeautifulSoup.
|
| 152 |
+
Produces an API-like structured forecast.
|
| 153 |
+
"""
|
| 154 |
+
import requests
|
| 155 |
+
from bs4 import BeautifulSoup
|
| 156 |
+
import re
|
| 157 |
+
from datetime import datetime
|
| 158 |
+
|
| 159 |
+
print(f"[DEBUG] Fallback API (BeautifulSoup) called for city={city}, date={date}")
|
| 160 |
+
|
| 161 |
+
try:
|
| 162 |
+
# --- Build Query ---
|
| 163 |
+
if date:
|
| 164 |
+
try:
|
| 165 |
+
dt_obj = datetime.strptime(date, "%Y-%m-%d")
|
| 166 |
+
natural_date = dt_obj.strftime("%B %d, %Y")
|
| 167 |
+
except ValueError:
|
| 168 |
+
natural_date = date
|
| 169 |
+
else:
|
| 170 |
+
natural_date = datetime.now().strftime("%B %d, %Y")
|
| 171 |
+
|
| 172 |
+
query = f"weather {city} {natural_date}"
|
| 173 |
+
print(f"[DEBUG] Search query: {query}")
|
| 174 |
+
|
| 175 |
+
# --- DuckDuckGo Search ---
|
| 176 |
+
search_url = f"https://duckduckgo.com/html/?q={query.replace(' ', '+')}"
|
| 177 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
| 178 |
+
response = requests.get(search_url, headers=headers, timeout=5)
|
| 179 |
+
if response.status_code != 200:
|
| 180 |
+
return f"Error fetching search results: {response.status_code}"
|
| 181 |
+
|
| 182 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 183 |
+
results = []
|
| 184 |
+
for result in soup.select(".result__body"):
|
| 185 |
+
title_tag = result.select_one(".result__title a")
|
| 186 |
+
snippet_tag = result.select_one(".result__snippet")
|
| 187 |
+
if title_tag and snippet_tag:
|
| 188 |
+
results.append({
|
| 189 |
+
"title": title_tag.get_text(strip=True),
|
| 190 |
+
"body": snippet_tag.get_text(strip=True)
|
| 191 |
+
})
|
| 192 |
+
|
| 193 |
+
if not results:
|
| 194 |
+
return f"Web Estimated Forecast for {city}:\nNo reliable search data found."
|
| 195 |
+
|
| 196 |
+
# --- Aggregate Text ---
|
| 197 |
+
full_text = " ".join([r["body"] for r in results])
|
| 198 |
+
|
| 199 |
+
# --- Extract Temperature ---
|
| 200 |
+
temp_matches = re.findall(r'(-?\d{1,2})\s*(?:°|deg|C|F)', full_text, re.I)
|
| 201 |
+
temperature = temp_matches[0] if temp_matches else "?"
|
| 202 |
+
|
| 203 |
+
# --- Extract Humidity ---
|
| 204 |
+
humidity_matches = re.findall(r'(\d{1,3})\s*%', full_text)
|
| 205 |
+
humidity = humidity_matches[0] if humidity_matches else "?"
|
| 206 |
+
|
| 207 |
+
# --- Extract Wind ---
|
| 208 |
+
wind_matches = re.findall(r'(\d{1,3})\s*(?:mph|km/h|m/s)', full_text, re.I)
|
| 209 |
+
wind = wind_matches[0] if wind_matches else "?"
|
| 210 |
+
|
| 211 |
+
# --- Extract Condition ---
|
| 212 |
+
# Look in all results first, fallback to first title
|
| 213 |
+
condition = "Unknown"
|
| 214 |
+
for r in results:
|
| 215 |
+
m = re.search(r'(clear|sunny|cloudy|rain|snow|storm|fog|mist)', r["body"], re.I)
|
| 216 |
+
if m:
|
| 217 |
+
condition = m.group(1).capitalize()
|
| 218 |
+
break
|
| 219 |
+
if condition == "Unknown":
|
| 220 |
+
# Fallback
|
| 221 |
+
condition_raw = results[0]["title"].split("-")[0].strip()
|
| 222 |
+
condition = condition_raw[0].upper() + condition_raw[1:] if condition_raw else "Unknown"
|
| 223 |
+
|
| 224 |
+
# --- Build Forecast ---
|
| 225 |
+
forecast = (
|
| 226 |
+
f"Web Estimated Forecast for {city}:\n"
|
| 227 |
+
f"{natural_date}: {condition}, Temp: {temperature}° (approx), "
|
| 228 |
+
f"Humidity: {humidity}%, Wind: {wind}\n"
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
return forecast
|
| 232 |
+
|
| 233 |
+
except Exception as e:
|
| 234 |
+
print(f"[DEBUG] Error in fallback: {e}")
|
| 235 |
+
return f"Error performing web search: {str(e)}"
|
common/mcp/tools/yf_tools.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
import yfinance as yf
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from agents import function_tool
|
| 6 |
+
from datetime import datetime, timedelta
|
| 7 |
+
|
| 8 |
+
# Load environment variables
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# ============================================================
|
| 13 |
+
# 🔹 YAHOO FINANCE TOOLSET
|
| 14 |
+
# ============================================================
|
| 15 |
+
@function_tool
|
| 16 |
+
def get_summary(symbol: str, period: str = "1d", interval: str = "1h") -> str:
|
| 17 |
+
"""
|
| 18 |
+
Fetch the latest summary information and intraday price data for a given ticker.
|
| 19 |
+
Ensures recent data is retrieved by calculating start/end dates dynamically.
|
| 20 |
+
|
| 21 |
+
Parameters:
|
| 22 |
+
-----------
|
| 23 |
+
symbol : str
|
| 24 |
+
The ticker symbol (e.g., "AAPL", "GOOG", "BTC-USD").
|
| 25 |
+
period : str, optional (default="1d")
|
| 26 |
+
Time range for price data. Examples: "1d", "5d", "1mo", "3mo".
|
| 27 |
+
interval : str, optional (default="1h")
|
| 28 |
+
Granularity of the data. Examples: "1m", "5m", "1h", "1d".
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
--------
|
| 32 |
+
str
|
| 33 |
+
A formatted string containing:
|
| 34 |
+
- Company/ticker name
|
| 35 |
+
- Current price and change
|
| 36 |
+
- Open, High, Low prices
|
| 37 |
+
- Volume
|
| 38 |
+
- Period and interval used
|
| 39 |
+
"""
|
| 40 |
+
try:
|
| 41 |
+
ticker = yf.Ticker(symbol)
|
| 42 |
+
|
| 43 |
+
# Calculate start and end dates based on period
|
| 44 |
+
end_date = datetime.today()
|
| 45 |
+
if period.endswith("d"):
|
| 46 |
+
days = int(period[:-1])
|
| 47 |
+
elif period.endswith("mo"):
|
| 48 |
+
days = int(period[:-2]) * 30
|
| 49 |
+
elif period.endswith("y"):
|
| 50 |
+
days = int(period[:-1]) * 365
|
| 51 |
+
else:
|
| 52 |
+
days = 30 # default 1 month
|
| 53 |
+
start_date = end_date - timedelta(days=days)
|
| 54 |
+
|
| 55 |
+
# Fetch recent data explicitly
|
| 56 |
+
data = ticker.history(
|
| 57 |
+
start=start_date.strftime("%Y-%m-%d"),
|
| 58 |
+
end=end_date.strftime("%Y-%m-%d"),
|
| 59 |
+
interval=interval
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
if data.empty:
|
| 63 |
+
return f"No data found for symbol '{symbol}'."
|
| 64 |
+
|
| 65 |
+
latest = data.iloc[-1]
|
| 66 |
+
current_price = round(latest["Close"], 2)
|
| 67 |
+
open_price = round(latest["Open"], 2)
|
| 68 |
+
change = round(current_price - open_price, 2)
|
| 69 |
+
pct_change = round((change / open_price) * 100, 2)
|
| 70 |
+
|
| 71 |
+
info = ticker.info
|
| 72 |
+
long_name = info.get("longName", symbol)
|
| 73 |
+
currency = info.get("currency", "USD")
|
| 74 |
+
|
| 75 |
+
formatted = [
|
| 76 |
+
f"📈 {long_name} ({symbol})",
|
| 77 |
+
f"Current Price: {current_price} {currency}",
|
| 78 |
+
f"Change: {change} ({pct_change}%)",
|
| 79 |
+
f"Open: {open_price} | High: {round(latest['High'], 2)} | Low: {round(latest['Low'], 2)}",
|
| 80 |
+
f"Volume: {int(latest['Volume'])}",
|
| 81 |
+
f"Period: {period} | Interval: {interval}",
|
| 82 |
+
]
|
| 83 |
+
return "\n".join(formatted)
|
| 84 |
+
|
| 85 |
+
except Exception as e:
|
| 86 |
+
return f"Error fetching data for '{symbol}': {e}"
|
| 87 |
+
|
| 88 |
+
@function_tool
|
| 89 |
+
def get_market_sentiment(symbol: str, period: str = "1mo") -> str:
|
| 90 |
+
"""
|
| 91 |
+
Analyze recent price changes and provide a simple market sentiment.
|
| 92 |
+
Uses dynamic start/end dates to ensure recent data.
|
| 93 |
+
|
| 94 |
+
This tool computes the percentage change over the specified period and
|
| 95 |
+
classifies the sentiment as:
|
| 96 |
+
- Bullish (if price increased >2%)
|
| 97 |
+
- Bearish (if price decreased >2%)
|
| 98 |
+
- Neutral (otherwise)
|
| 99 |
+
|
| 100 |
+
Parameters:
|
| 101 |
+
-----------
|
| 102 |
+
symbol : str
|
| 103 |
+
The ticker symbol (e.g., "AAPL", "GOOG", "BTC-USD").
|
| 104 |
+
period : str, optional (default="1mo")
|
| 105 |
+
Time range to analyze. Examples: "7d", "1mo", "3mo".
|
| 106 |
+
|
| 107 |
+
Returns:
|
| 108 |
+
--------
|
| 109 |
+
str
|
| 110 |
+
A human-readable sentiment string including percentage change.
|
| 111 |
+
"""
|
| 112 |
+
try:
|
| 113 |
+
ticker = yf.Ticker(symbol)
|
| 114 |
+
|
| 115 |
+
# Calculate start/end dynamically
|
| 116 |
+
end_date = datetime.today()
|
| 117 |
+
if period.endswith("d"):
|
| 118 |
+
days = int(period[:-1])
|
| 119 |
+
elif period.endswith("mo"):
|
| 120 |
+
days = int(period[:-2]) * 30
|
| 121 |
+
elif period.endswith("y"):
|
| 122 |
+
days = int(period[:-1]) * 365
|
| 123 |
+
else:
|
| 124 |
+
days = 30
|
| 125 |
+
start_date = end_date - timedelta(days=days)
|
| 126 |
+
|
| 127 |
+
data = ticker.history(
|
| 128 |
+
start=start_date.strftime("%Y-%m-%d"),
|
| 129 |
+
end=end_date.strftime("%Y-%m-%d")
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
if data.empty:
|
| 133 |
+
return f"No data for {symbol}."
|
| 134 |
+
|
| 135 |
+
recent_change = data["Close"].iloc[-1] - data["Close"].iloc[0]
|
| 136 |
+
pct_change = (recent_change / data["Close"].iloc[0]) * 100
|
| 137 |
+
|
| 138 |
+
sentiment = "Neutral"
|
| 139 |
+
if pct_change > 2:
|
| 140 |
+
sentiment = "Bullish"
|
| 141 |
+
elif pct_change < -2:
|
| 142 |
+
sentiment = "Bearish"
|
| 143 |
+
|
| 144 |
+
return f"{symbol} market sentiment ({period}): {sentiment} ({pct_change:.2f}% change)"
|
| 145 |
+
|
| 146 |
+
except Exception as e:
|
| 147 |
+
return f"Error fetching market sentiment for '{symbol}': {e}"
|
| 148 |
+
|
| 149 |
+
@function_tool
|
| 150 |
+
def get_history(symbol: str, period: str = "1mo") -> str:
|
| 151 |
+
"""
|
| 152 |
+
Fetch historical price data for a given ticker.
|
| 153 |
+
Ensures recent data is retrieved dynamically using start/end dates.
|
| 154 |
+
|
| 155 |
+
Parameters:
|
| 156 |
+
-----------
|
| 157 |
+
symbol : str
|
| 158 |
+
The ticker symbol (e.g., "AAPL", "GOOG", "BTC-USD").
|
| 159 |
+
period : str, optional (default="1mo")
|
| 160 |
+
The length of historical data to retrieve. Examples: "1d", "5d", "1mo", "3mo", "1y", "5y".
|
| 161 |
+
|
| 162 |
+
Returns:
|
| 163 |
+
--------
|
| 164 |
+
str
|
| 165 |
+
A formatted string showing the last 5 rows of historical prices (Open, High, Low, Close, Volume).
|
| 166 |
+
"""
|
| 167 |
+
try:
|
| 168 |
+
ticker = yf.Ticker(symbol)
|
| 169 |
+
|
| 170 |
+
# Calculate start/end dynamically
|
| 171 |
+
end_date = datetime.today()
|
| 172 |
+
if period.endswith("d"):
|
| 173 |
+
days = int(period[:-1])
|
| 174 |
+
elif period.endswith("mo"):
|
| 175 |
+
days = int(period[:-2]) * 30
|
| 176 |
+
elif period.endswith("y"):
|
| 177 |
+
days = int(period[:-1]) * 365
|
| 178 |
+
else:
|
| 179 |
+
days = 30
|
| 180 |
+
start_date = end_date - timedelta(days=days)
|
| 181 |
+
|
| 182 |
+
data = ticker.history(
|
| 183 |
+
start=start_date.strftime("%Y-%m-%d"),
|
| 184 |
+
end=end_date.strftime("%Y-%m-%d")
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
if data.empty:
|
| 188 |
+
return f"No historical data found for '{symbol}'."
|
| 189 |
+
return f"Historical data for {symbol} ({period}):\n{data.tail(5).to_string()}"
|
| 190 |
+
|
| 191 |
+
except Exception as e:
|
| 192 |
+
return f"Error fetching historical data for '{symbol}': {e}"
|
common/rag/rag.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
|
| 4 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 5 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 6 |
+
from langchain_community.vectorstores import FAISS
|
| 7 |
+
|
| 8 |
+
DB_NAME = 'healthcare_db'
|
| 9 |
+
DIRECTORY_NAME = "healthcare"
|
| 10 |
+
|
| 11 |
+
class Retriever:
|
| 12 |
+
def __init__(self,
|
| 13 |
+
file_path:str = os.path.join(os.getcwd(), "data"),
|
| 14 |
+
db_path:str = os.path.join(os.getcwd(), "db") ):
|
| 15 |
+
self.directory_path = os.path.join(file_path, DIRECTORY_NAME)
|
| 16 |
+
self.db_path = os.path.join(db_path, DB_NAME)
|
| 17 |
+
self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 18 |
+
self.text_splitter = RecursiveCharacterTextSplitter(
|
| 19 |
+
chunk_size=1024,
|
| 20 |
+
chunk_overlap=200,
|
| 21 |
+
length_function=len,
|
| 22 |
+
# separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
|
| 23 |
+
is_separator_regex=False,
|
| 24 |
+
)
|
| 25 |
+
self.retriever = None
|
| 26 |
+
|
| 27 |
+
def load_knowledge_base(self):
|
| 28 |
+
if os.path.exists(self.db_path):
|
| 29 |
+
self.retriever = FAISS.load_local(
|
| 30 |
+
self.db_path,
|
| 31 |
+
self.embeddings,
|
| 32 |
+
allow_dangerous_deserialization=True
|
| 33 |
+
).as_retriever()
|
| 34 |
+
else:
|
| 35 |
+
self.retriever = self._create_knowledge_base()
|
| 36 |
+
|
| 37 |
+
def _create_knowledge_base(self):
|
| 38 |
+
documents = self._load_documents()
|
| 39 |
+
chunks = self._split_documents(documents)
|
| 40 |
+
# embeddings = self._embed_documents(texts)
|
| 41 |
+
vectorstore = FAISS.from_documents(chunks, self.embeddings)
|
| 42 |
+
vectorstore.save_local(self.db_path)
|
| 43 |
+
return vectorstore.as_retriever()
|
| 44 |
+
|
| 45 |
+
def _load_documents(self):
|
| 46 |
+
documents = []
|
| 47 |
+
loader = DirectoryLoader(
|
| 48 |
+
self.directory_path,
|
| 49 |
+
glob="**/*.pdf",
|
| 50 |
+
loader_cls=PyPDFLoader,
|
| 51 |
+
show_progress=True
|
| 52 |
+
)
|
| 53 |
+
documents = loader.load()
|
| 54 |
+
return documents
|
| 55 |
+
|
| 56 |
+
def _split_documents(self, documents):
|
| 57 |
+
chunks = []
|
| 58 |
+
for doc in documents:
|
| 59 |
+
chunks.extend(self.text_splitter.split_documents([doc]))
|
| 60 |
+
return chunks
|
| 61 |
+
|
| 62 |
+
# def _embed_documents(self, texts):
|
| 63 |
+
# return [self.embeddings.embed_query(text.page_content) for text in texts]
|
| 64 |
+
|
| 65 |
+
def retrieve(self, query, k=4):
|
| 66 |
+
"""Retrieve documents without scores (backward compatible)"""
|
| 67 |
+
if not self.retriever:
|
| 68 |
+
self.load_knowledge_base()
|
| 69 |
+
return self.retriever.invoke(query)
|
| 70 |
+
|
| 71 |
+
def retrieve_with_scores(self, query, k=4):
|
| 72 |
+
"""Retrieve documents with similarity scores"""
|
| 73 |
+
if not self.retriever:
|
| 74 |
+
self.load_knowledge_base()
|
| 75 |
+
|
| 76 |
+
# Get the underlying vectorstore from the retriever
|
| 77 |
+
vectorstore = self.retriever.vectorstore
|
| 78 |
+
|
| 79 |
+
# Use similarity_search_with_score to get scores
|
| 80 |
+
# Note: FAISS returns L2 distance, lower is better
|
| 81 |
+
results = vectorstore.similarity_search_with_score(query, k=k)
|
| 82 |
+
|
| 83 |
+
return results
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def update_knowledge_base(self):
|
| 87 |
+
self._create_knowledge_base()
|
| 88 |
+
|
| 89 |
+
def delete_knowledge_base(self):
|
| 90 |
+
if os.path.exists(self.db_path):
|
| 91 |
+
shutil.rmtree(self.db_path)
|
| 92 |
+
|
| 93 |
+
# No cleanup needed for VectorStoreRetriever
|
| 94 |
+
|
common/utility/__init__.py
ADDED
|
File without changes
|
common/utility/embedding_factory.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Union
|
| 3 |
+
# from azure.identity import DefaultAzureCredential
|
| 4 |
+
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
|
| 5 |
+
from langchain_ollama import OllamaEmbeddings
|
| 6 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class EmbeddingFactory:
|
| 10 |
+
"""
|
| 11 |
+
A static utility class to create and return LLM Embedding instances based on the input type.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
@staticmethod
|
| 15 |
+
def get_llm(llm_type: str) -> Union[AzureOpenAIEmbeddings, OpenAIEmbeddings]:
|
| 16 |
+
"""
|
| 17 |
+
Returns an LLM instance based on the specified type.
|
| 18 |
+
|
| 19 |
+
Parameters:
|
| 20 |
+
llm_type (str): The type of LLM to return. Valid values are 'azure' or 'openai'.
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
Union[AzureOpenAIEmbeddings, OpenAIEmbeddings]: The LLM instance.
|
| 24 |
+
"""
|
| 25 |
+
if llm_type.lower() == "azure":
|
| 26 |
+
# Get the Azure Credential
|
| 27 |
+
# credential = DefaultAzureCredential()
|
| 28 |
+
# token=credential.get_token("https://cognitiveservices.azure.com/.default").token
|
| 29 |
+
|
| 30 |
+
# if not token:
|
| 31 |
+
# raise ValueError("Token is required for AzureOpenAIEmbeddings.")
|
| 32 |
+
# return AzureOpenAIEmbeddings(
|
| 33 |
+
# azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
|
| 34 |
+
# azure_deployment="text-embedding-3-small", #os.environ["AZURE_OPENAI_API_BASE_MODEL"],
|
| 35 |
+
# api_version=os.environ["AZURE_OPENAI_API_VERSION"],
|
| 36 |
+
# api_key=token
|
| 37 |
+
# )
|
| 38 |
+
pass
|
| 39 |
+
elif llm_type.lower() == "openai":
|
| 40 |
+
return OpenAIEmbeddings(
|
| 41 |
+
api_key=os.environ["OPENAI_API_KEY"],
|
| 42 |
+
model="text-embedding-3-large"
|
| 43 |
+
)
|
| 44 |
+
elif llm_type.lower() == "ollama": # must have ollama running locally with the following model
|
| 45 |
+
return OllamaEmbeddings(model="gemma:2b")
|
| 46 |
+
elif llm_type.lower() == "hf": # must have key update in env:HF_TOKEN
|
| 47 |
+
return HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
| 48 |
+
else:
|
| 49 |
+
raise ValueError("Invalid llm_type. Use 'azure' or 'openai'.")
|
common/utility/llm_factory.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import tiktoken
|
| 3 |
+
from typing import Any
|
| 4 |
+
from langchain_openai.chat_models import ChatOpenAI, AzureChatOpenAI
|
| 5 |
+
from langchain_openai.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings
|
| 6 |
+
# from azure.identity import DefaultAzureCredential
|
| 7 |
+
from huggingface_hub import login
|
| 8 |
+
from langchain_huggingface import ChatHuggingFace, HuggingFaceEmbeddings
|
| 9 |
+
from langchain_ollama import ChatOllama, OllamaEmbeddings
|
| 10 |
+
from langchain_groq import ChatGroq
|
| 11 |
+
# from langchain_openai import OpenAIEmbeddings
|
| 12 |
+
|
| 13 |
+
class LLMFactory:
|
| 14 |
+
"""
|
| 15 |
+
Factory class to provide LLM and embedding model instances for different providers.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
@staticmethod
|
| 19 |
+
def get_llm(provider: str, **kwargs) -> Any:
|
| 20 |
+
"""
|
| 21 |
+
Returns a chat/completion LLM instance based on the provider.
|
| 22 |
+
Supported providers: openai, azureopenai, huggingface, ollama, groq
|
| 23 |
+
"""
|
| 24 |
+
if provider == "openai":
|
| 25 |
+
# OpenAI Chat Model
|
| 26 |
+
return ChatOpenAI(
|
| 27 |
+
openai_api_key=kwargs.get("api_key", os.environ.get("OPENAI_API_KEY")),
|
| 28 |
+
model_name=kwargs.get("model_name", "gpt-4")
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# elif provider == "azureopenai":
|
| 32 |
+
# # Azure OpenAI Chat Model using Azure Identity for token
|
| 33 |
+
# credential = DefaultAzureCredential()
|
| 34 |
+
# token = credential.get_token("https://cognitiveservices.azure.com/.default").token
|
| 35 |
+
# if not token:
|
| 36 |
+
# raise ValueError("Token is required for AzureChatOpenAI.")
|
| 37 |
+
# return AzureChatOpenAI(
|
| 38 |
+
# azure_endpoint=kwargs["endpoint"],
|
| 39 |
+
# azure_deployment=kwargs.get("deployment_name", "gpt-4"),
|
| 40 |
+
# api_version=kwargs["api_version"],
|
| 41 |
+
# api_key=token
|
| 42 |
+
# )
|
| 43 |
+
|
| 44 |
+
# pip install langchain langchain-huggingface huggingface_hub
|
| 45 |
+
elif provider == "huggingface":
|
| 46 |
+
# If using a private model or endpoint, authenticate
|
| 47 |
+
login(token=kwargs.get("api_key", os.environ.get("HF_TOKEN")))
|
| 48 |
+
|
| 49 |
+
return ChatHuggingFace(
|
| 50 |
+
repo_id=kwargs.get("model_name", "mistralai/Mistral-Nemo-Instruct-2407"), # Or any other chat-friendly model
|
| 51 |
+
task="text-generation",
|
| 52 |
+
model_kwargs={
|
| 53 |
+
"temperature": 0.7,
|
| 54 |
+
"max_new_tokens": 256
|
| 55 |
+
}
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
elif provider == "ollama":
|
| 59 |
+
# Ollama local model
|
| 60 |
+
return ChatOllama(
|
| 61 |
+
model=kwargs.get("model_name", "gemma:2b"),
|
| 62 |
+
temperature=0
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
elif provider == "groq":
|
| 66 |
+
# Groq LLM
|
| 67 |
+
return ChatGroq(
|
| 68 |
+
model=kwargs.get("model_name", "Gemma2-9b-It"),
|
| 69 |
+
max_tokens=512,
|
| 70 |
+
api_key=kwargs.get("api_key", os.environ.get("GROQ_API_KEY"))
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
else:
|
| 74 |
+
raise ValueError(f"Unsupported provider: {provider}")
|
| 75 |
+
|
| 76 |
+
@staticmethod
|
| 77 |
+
def get_embedding_model(provider: str, **kwargs) -> Any:
|
| 78 |
+
"""
|
| 79 |
+
Returns an embedding model instance based on the provider.
|
| 80 |
+
Supported providers: openai, huggingface
|
| 81 |
+
"""
|
| 82 |
+
if provider == "openai":
|
| 83 |
+
return OpenAIEmbeddings(
|
| 84 |
+
model=kwargs.get("model_name", "text-embedding-3-large"),
|
| 85 |
+
openai_api_key=kwargs.get("api_key", os.environ.get("OPENAI_API_KEY"))
|
| 86 |
+
)
|
| 87 |
+
# if provider == "azureopenai":
|
| 88 |
+
# # Get the Azure Credential
|
| 89 |
+
# credential = DefaultAzureCredential()
|
| 90 |
+
# token=credential.get_token("https://cognitiveservices.azure.com/.default").token
|
| 91 |
+
|
| 92 |
+
# if not token:
|
| 93 |
+
# raise ValueError("Token is required for AzureOpenAIEmbeddings.")
|
| 94 |
+
# return AzureOpenAIEmbeddings(
|
| 95 |
+
# azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
|
| 96 |
+
# azure_deployment=kwargs.get("azure_deployment", "text-embedding-3-large"),
|
| 97 |
+
# api_version=os.environ["AZURE_OPENAI_API_VERSION"],
|
| 98 |
+
# api_key=token
|
| 99 |
+
# )
|
| 100 |
+
elif provider == "huggingface":
|
| 101 |
+
# If using a private model or endpoint, authenticate
|
| 102 |
+
login(token=kwargs.get("api_key", os.environ.get("HF_TOKEN")))
|
| 103 |
+
|
| 104 |
+
return HuggingFaceEmbeddings(
|
| 105 |
+
model_name=kwargs.get("model_name", "all-MiniLM-L6-v2")
|
| 106 |
+
)
|
| 107 |
+
elif provider == "groq":
|
| 108 |
+
raise ValueError(f"No embedding support from the provider: {provider}")
|
| 109 |
+
elif provider == "ollama":
|
| 110 |
+
return OllamaEmbeddings(model=kwargs.get("model_name", "gemma:2b"))
|
| 111 |
+
else:
|
| 112 |
+
raise ValueError(f"Unsupported embedding provider: {provider}")
|
| 113 |
+
|
| 114 |
+
@staticmethod
|
| 115 |
+
def num_tokens_from_messages(messages) -> int:
|
| 116 |
+
"""
|
| 117 |
+
Return the number of tokens used by a list of messages.
|
| 118 |
+
Adapted from the OpenAI cookbook token counter.
|
| 119 |
+
"""
|
| 120 |
+
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
| 121 |
+
tokens_per_message = 3 # <|start|>, role, <|end|>
|
| 122 |
+
num_tokens = 0
|
| 123 |
+
|
| 124 |
+
for message in messages:
|
| 125 |
+
num_tokens += tokens_per_message
|
| 126 |
+
for key, value in message.items():
|
| 127 |
+
num_tokens += len(encoding.encode(value))
|
| 128 |
+
|
| 129 |
+
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
|
| 130 |
+
return num_tokens
|
common/utility/llm_factory2.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import tiktoken
|
| 3 |
+
from typing import Union
|
| 4 |
+
# from azure.identity import DefaultAzureCredential
|
| 5 |
+
from langchain_openai.chat_models import AzureChatOpenAI, ChatOpenAI
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class LLMFactory:
|
| 9 |
+
"""
|
| 10 |
+
A static utility class to create and return LLM instances based on the input type.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
@staticmethod
|
| 14 |
+
def get_llm(llm_type: str) -> Union[AzureChatOpenAI, ChatOpenAI]:
|
| 15 |
+
"""
|
| 16 |
+
Returns an LLM instance based on the specified type.
|
| 17 |
+
|
| 18 |
+
Parameters:
|
| 19 |
+
llm_type (str): The type of LLM to return. Valid values are 'azure' or 'openai'.
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
Union[AzureChatOpenAI, ChatOpenAI]: The LLM instance.
|
| 23 |
+
"""
|
| 24 |
+
if llm_type.lower() == "azure":
|
| 25 |
+
# # Get the Azure Credential
|
| 26 |
+
# credential = DefaultAzureCredential()
|
| 27 |
+
# token=credential.get_token("https://cognitiveservices.azure.com/.default").token
|
| 28 |
+
|
| 29 |
+
# if not token:
|
| 30 |
+
# raise ValueError("Token is required for AzureChatOpenAI.")
|
| 31 |
+
# return AzureChatOpenAI(
|
| 32 |
+
# azure_endpoint=os.environ["AZURE_OPENAI_API_URI"],
|
| 33 |
+
# azure_deployment=os.environ["AZURE_OPENAI_API_BASE_MODEL"],
|
| 34 |
+
# api_version=os.environ["AZURE_OPENAI_API_VERSION"],
|
| 35 |
+
# api_key=token
|
| 36 |
+
# )
|
| 37 |
+
pass
|
| 38 |
+
elif llm_type.lower() == "openai":
|
| 39 |
+
return ChatOpenAI(
|
| 40 |
+
api_key=os.environ["OPENAI_API_KEY"],
|
| 41 |
+
model_name="gpt-4"
|
| 42 |
+
)
|
| 43 |
+
elif llm_type.lower() == "openai_chat":
|
| 44 |
+
return ChatOpenAI(
|
| 45 |
+
api_key=os.environ["OPENAI_API_KEY"],
|
| 46 |
+
model_name="gpt-4"
|
| 47 |
+
)
|
| 48 |
+
else:
|
| 49 |
+
raise ValueError("Invalid llm_type. Use 'azure' or 'openai'.")
|
| 50 |
+
|
| 51 |
+
@staticmethod
|
| 52 |
+
def num_tokens_from_messages(messages):
|
| 53 |
+
|
| 54 |
+
"""
|
| 55 |
+
Return the number of tokens used by a list of messages.
|
| 56 |
+
Adapted from the Open AI cookbook token counter
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
| 60 |
+
|
| 61 |
+
# Each message is sandwiched with <|start|>role and <|end|>
|
| 62 |
+
# Hence, messages look like: <|start|>system or user or assistant{message}<|end|>
|
| 63 |
+
|
| 64 |
+
tokens_per_message = 3 # token1:<|start|>, token2:system(or user or assistant), token3:<|end|>
|
| 65 |
+
|
| 66 |
+
num_tokens = 0
|
| 67 |
+
|
| 68 |
+
for message in messages:
|
| 69 |
+
num_tokens += tokens_per_message
|
| 70 |
+
for key, value in message.items():
|
| 71 |
+
num_tokens += len(encoding.encode(value))
|
| 72 |
+
|
| 73 |
+
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
|
| 74 |
+
|
| 75 |
+
return num_tokens
|
common/utility/logger.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import functools
|
| 2 |
+
import datetime
|
| 3 |
+
|
| 4 |
+
def log_call(func):
|
| 5 |
+
"""
|
| 6 |
+
A decorator that logs when a function is called and when it finishes.
|
| 7 |
+
"""
|
| 8 |
+
@functools.wraps(func)
|
| 9 |
+
def wrapper(*args, **kwargs):
|
| 10 |
+
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 11 |
+
arg_list = ", ".join(
|
| 12 |
+
[repr(a) for a in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
|
| 13 |
+
)
|
| 14 |
+
print(f"[{timestamp}] 🚀 Calling: {func.__name__}({arg_list})")
|
| 15 |
+
try:
|
| 16 |
+
result = func(*args, **kwargs)
|
| 17 |
+
print(f"[{timestamp}] ✅ Finished: {func.__name__}")
|
| 18 |
+
return result
|
| 19 |
+
except Exception as e:
|
| 20 |
+
print(f"[{timestamp}] ❌ Error in {func.__name__}: {e}")
|
| 21 |
+
raise
|
| 22 |
+
return wrapper
|
pyproject.toml
CHANGED
|
@@ -67,6 +67,7 @@ dependencies = [
|
|
| 67 |
"logfire",
|
| 68 |
"serpapi",
|
| 69 |
"smithery>=0.4.4",
|
|
|
|
| 70 |
|
| 71 |
# =======================
|
| 72 |
# WEB SCRAPING
|
|
@@ -100,6 +101,7 @@ dependencies = [
|
|
| 100 |
# =======================
|
| 101 |
"scikit-learn>=1.7.2",
|
| 102 |
"huggingface_hub<=1.1.4",
|
|
|
|
| 103 |
|
| 104 |
# =======================
|
| 105 |
# IPYNB SUPPORT
|
|
|
|
| 67 |
"logfire",
|
| 68 |
"serpapi",
|
| 69 |
"smithery>=0.4.4",
|
| 70 |
+
"sendgrid",
|
| 71 |
|
| 72 |
# =======================
|
| 73 |
# WEB SCRAPING
|
|
|
|
| 101 |
# =======================
|
| 102 |
"scikit-learn>=1.7.2",
|
| 103 |
"huggingface_hub<=1.1.4",
|
| 104 |
+
"datasets>=4.4.1",
|
| 105 |
|
| 106 |
# =======================
|
| 107 |
# IPYNB SUPPORT
|
run.py
CHANGED
|
@@ -1,11 +1,215 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Universal App Launcher for AgenticAI Projects
|
| 4 |
+
|
| 5 |
+
Usage:
|
| 6 |
+
python run.py <app_name> [--port PORT] [--help]
|
| 7 |
+
|
| 8 |
+
Examples:
|
| 9 |
+
python run.py healthcare
|
| 10 |
+
python run.py deep-research --port 8502
|
| 11 |
+
python run.py stock-advisor
|
| 12 |
+
python run.py --list
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import sys
|
| 16 |
+
import os
|
| 17 |
+
import subprocess
|
| 18 |
+
import argparse
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
from typing import Dict, Optional
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# App registry - maps app names to their paths and entry points
|
| 24 |
+
APP_REGISTRY: Dict[str, Dict[str, str]] = {
|
| 25 |
+
"healthcare": {
|
| 26 |
+
"path": "src/healthcare-assistant",
|
| 27 |
+
"entry": "app.py",
|
| 28 |
+
"description": "Healthcare Assistant - Medical information with RAG and web search"
|
| 29 |
+
},
|
| 30 |
+
"deep-research": {
|
| 31 |
+
"path": "src/deep-research",
|
| 32 |
+
"entry": "app.py",
|
| 33 |
+
"description": "Deep Research AI - Comprehensive research assistant"
|
| 34 |
+
},
|
| 35 |
+
"stock-advisor": {
|
| 36 |
+
"path": "src/stock-advisor",
|
| 37 |
+
"entry": "app.py",
|
| 38 |
+
"description": "Stock Advisor - Financial analysis and stock recommendations"
|
| 39 |
+
},
|
| 40 |
+
"travel-agent": {
|
| 41 |
+
"path": "src/travel-agent",
|
| 42 |
+
"entry": "app.py",
|
| 43 |
+
"description": "Travel Agent - Trip planning and travel recommendations"
|
| 44 |
+
},
|
| 45 |
+
"trip-planner": {
|
| 46 |
+
"path": "src/trip-planner",
|
| 47 |
+
"entry": "app.py",
|
| 48 |
+
"description": "Trip Planner - Detailed trip itinerary planning"
|
| 49 |
+
},
|
| 50 |
+
"chatbot": {
|
| 51 |
+
"path": "src/chatbot",
|
| 52 |
+
"entry": "app.py",
|
| 53 |
+
"description": "General Chatbot - Multi-purpose conversational AI"
|
| 54 |
+
},
|
| 55 |
+
"accessibility": {
|
| 56 |
+
"path": "src/accessibility",
|
| 57 |
+
"entry": "app.py",
|
| 58 |
+
"description": "Accessibility Tools - Assistive technology applications"
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def print_banner():
|
| 64 |
+
"""Print a nice banner."""
|
| 65 |
+
print("=" * 70)
|
| 66 |
+
print("🚀 AgenticAI Projects Launcher".center(70))
|
| 67 |
+
print("=" * 70)
|
| 68 |
+
print()
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def list_apps():
|
| 72 |
+
"""List all available apps."""
|
| 73 |
+
print_banner()
|
| 74 |
+
print("Available Applications:\n")
|
| 75 |
+
|
| 76 |
+
max_name_len = max(len(name) for name in APP_REGISTRY.keys())
|
| 77 |
+
|
| 78 |
+
for name, config in sorted(APP_REGISTRY.items()):
|
| 79 |
+
print(f" {name.ljust(max_name_len + 2)} - {config['description']}")
|
| 80 |
+
|
| 81 |
+
print("\n" + "=" * 70)
|
| 82 |
+
print("\nUsage: python run.py <app_name> [--port PORT]")
|
| 83 |
+
print("Example: python run.py healthcare --port 8501\n")
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def validate_app(app_name: str) -> Optional[Dict[str, str]]:
|
| 87 |
+
"""
|
| 88 |
+
Validate that the app exists and its files are present.
|
| 89 |
+
|
| 90 |
+
Args:
|
| 91 |
+
app_name: Name of the app to validate
|
| 92 |
+
|
| 93 |
+
Returns:
|
| 94 |
+
App configuration dict if valid, None otherwise
|
| 95 |
+
"""
|
| 96 |
+
if app_name not in APP_REGISTRY:
|
| 97 |
+
print(f"❌ Error: Unknown app '{app_name}'")
|
| 98 |
+
print(f"\nAvailable apps: {', '.join(sorted(APP_REGISTRY.keys()))}")
|
| 99 |
+
print("\nRun 'python run.py --list' to see all available apps.")
|
| 100 |
+
return None
|
| 101 |
+
|
| 102 |
+
config = APP_REGISTRY[app_name]
|
| 103 |
+
project_root = Path(__file__).parent
|
| 104 |
+
app_path = project_root / config["path"] / config["entry"]
|
| 105 |
+
|
| 106 |
+
if not app_path.exists():
|
| 107 |
+
print(f"❌ Error: App file not found at {app_path}")
|
| 108 |
+
return None
|
| 109 |
+
|
| 110 |
+
return config
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def launch_app(app_name: str, port: Optional[int] = None):
|
| 114 |
+
"""
|
| 115 |
+
Launch a Streamlit app.
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
app_name: Name of the app to launch
|
| 119 |
+
port: Optional port number (default: 8501)
|
| 120 |
+
"""
|
| 121 |
+
config = validate_app(app_name)
|
| 122 |
+
if not config:
|
| 123 |
+
sys.exit(1)
|
| 124 |
+
|
| 125 |
+
project_root = Path(__file__).parent
|
| 126 |
+
app_dir = project_root / config["path"]
|
| 127 |
+
app_file = config["entry"]
|
| 128 |
+
|
| 129 |
+
print_banner()
|
| 130 |
+
print(f"📱 Launching: {config['description']}")
|
| 131 |
+
print(f"📂 Location: {config['path']}")
|
| 132 |
+
print(f"🌐 Entry Point: {app_file}")
|
| 133 |
+
|
| 134 |
+
# Build streamlit command
|
| 135 |
+
cmd = ["streamlit", "run", app_file]
|
| 136 |
+
|
| 137 |
+
# Add port if specified
|
| 138 |
+
if port:
|
| 139 |
+
cmd.extend(["--server.port", str(port)])
|
| 140 |
+
print(f"🔌 Port: {port}")
|
| 141 |
+
else:
|
| 142 |
+
print(f"🔌 Port: 8501 (default)")
|
| 143 |
+
|
| 144 |
+
print("\n" + "=" * 70)
|
| 145 |
+
print("\n🎯 Starting application...\n")
|
| 146 |
+
|
| 147 |
+
try:
|
| 148 |
+
# Change to app directory and run
|
| 149 |
+
os.chdir(app_dir)
|
| 150 |
+
subprocess.run(cmd)
|
| 151 |
+
except KeyboardInterrupt:
|
| 152 |
+
print("\n\n👋 Application stopped by user")
|
| 153 |
+
except FileNotFoundError:
|
| 154 |
+
print("\n❌ Error: Streamlit not found. Please install it:")
|
| 155 |
+
print(" pip install streamlit")
|
| 156 |
+
sys.exit(1)
|
| 157 |
+
except Exception as e:
|
| 158 |
+
print(f"\n❌ Error launching app: {e}")
|
| 159 |
+
sys.exit(1)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def main():
|
| 163 |
+
"""Main entry point."""
|
| 164 |
+
parser = argparse.ArgumentParser(
|
| 165 |
+
description="Universal launcher for AgenticAI project applications",
|
| 166 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 167 |
+
epilog="""
|
| 168 |
+
Examples:
|
| 169 |
+
python run.py healthcare # Launch healthcare chatbot
|
| 170 |
+
python run.py deep-research --port 8502 # Launch on custom port
|
| 171 |
+
python run.py --list # List all available apps
|
| 172 |
+
|
| 173 |
+
Available Apps:
|
| 174 |
+
""" + "\n ".join(f"{name}: {config['description']}"
|
| 175 |
+
for name, config in sorted(APP_REGISTRY.items()))
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
parser.add_argument(
|
| 179 |
+
"app_name",
|
| 180 |
+
nargs="?",
|
| 181 |
+
help="Name of the app to launch"
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
parser.add_argument(
|
| 185 |
+
"--port",
|
| 186 |
+
type=int,
|
| 187 |
+
help="Port number for Streamlit server (default: 8501)"
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
parser.add_argument(
|
| 191 |
+
"--list",
|
| 192 |
+
action="store_true",
|
| 193 |
+
help="List all available apps"
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
args = parser.parse_args()
|
| 197 |
+
|
| 198 |
+
# Handle --list flag
|
| 199 |
+
if args.list:
|
| 200 |
+
list_apps()
|
| 201 |
+
return
|
| 202 |
+
|
| 203 |
+
# Require app name if not listing
|
| 204 |
+
if not args.app_name:
|
| 205 |
+
parser.print_help()
|
| 206 |
+
print("\n")
|
| 207 |
+
list_apps()
|
| 208 |
+
return
|
| 209 |
+
|
| 210 |
+
# Launch the app
|
| 211 |
+
launch_app(args.app_name, args.port)
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
if __name__ == "__main__":
|
| 215 |
+
main()
|
src/deep-research/.env.name
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OPENAI_API_KEY=""
|
| 2 |
+
GROQ_API_KEY=""
|
| 3 |
+
GOOGLE_API_KEY=""
|
| 4 |
+
#https://serper.dev/api-keys
|
| 5 |
+
SERPER_API_KEY=""
|
| 6 |
+
#https://newsapi.org/v2/everything
|
| 7 |
+
NEWS_API_KEY=""
|
| 8 |
+
#https://app.sendgrid.com/ - bm80177
|
| 9 |
+
SENDGRID_API_KEY=""
|
src/deep-research/Dockerfile
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 4 |
+
DEBIAN_FRONTEND=noninteractive \
|
| 5 |
+
PYTHONPATH=/app:/app/common:$PYTHONPATH
|
| 6 |
+
|
| 7 |
+
WORKDIR /app
|
| 8 |
+
|
| 9 |
+
# System deps
|
| 10 |
+
RUN apt-get update && apt-get install -y \
|
| 11 |
+
git build-essential curl \
|
| 12 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
# Install uv
|
| 15 |
+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 16 |
+
ENV PATH="/root/.local/bin:$PATH"
|
| 17 |
+
|
| 18 |
+
# Copy project metadata
|
| 19 |
+
COPY pyproject.toml .
|
| 20 |
+
COPY uv.lock .
|
| 21 |
+
|
| 22 |
+
# Copy required folders
|
| 23 |
+
COPY common/ ./common/
|
| 24 |
+
COPY src/deep-research/ ./src/deep-research/
|
| 25 |
+
|
| 26 |
+
# Install dependencies using uv, then export and install with pip to system
|
| 27 |
+
RUN uv sync --frozen --no-dev && \
|
| 28 |
+
uv pip install -e . --system
|
| 29 |
+
|
| 30 |
+
# Copy entry point
|
| 31 |
+
COPY run.py .
|
| 32 |
+
|
| 33 |
+
EXPOSE 7860
|
| 34 |
+
|
| 35 |
+
CMD ["python", "run.py", "deep-research", "--port", "7860"]
|
src/deep-research/README.md
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: AI Deep Researcher # Give your app a title
|
| 3 |
+
emoji: 🤖 # Pick an emoji
|
| 4 |
+
colorFrom: indigo # Theme start color
|
| 5 |
+
colorTo: blue # Theme end color
|
| 6 |
+
sdk: docker # SDK type
|
| 7 |
+
sdk_version: "4.39.0" # Example Gradio version
|
| 8 |
+
app_file: ui/app.py # <-- points to your app.py inside ui/
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# AI Deep Researcher
|
| 13 |
+
|
| 14 |
+
**AI Deep Researcher** is a generative AI learning project built using the OpenAI Agentic Framework. This app performs deep-level web research based on user queries and generates a well-structured, consolidated report.
|
| 15 |
+
|
| 16 |
+
To achieve this, the project integrates the following technologies and AI features:
|
| 17 |
+
- **OpenAI SDK**
|
| 18 |
+
- **OpenAI Agents**
|
| 19 |
+
- **OpenAI WebSearch Tool**
|
| 20 |
+
- **Serper API** - a free alternative to OpenAI WebSearch Tool (https://serper.dev/api-keys)
|
| 21 |
+
- **News API** (https://newsapi.org/v2/everything)
|
| 22 |
+
- **SendGrid** (for emailing report)
|
| 23 |
+
- **LLMs** - (OpenAI, Geminia, Groq)
|
| 24 |
+
|
| 25 |
+
## How it works?
|
| 26 |
+
The system is a multi-agent solution, where each agent has a specific responsibility:
|
| 27 |
+
|
| 28 |
+
1. **Planner Agent**
|
| 29 |
+
- Receives the user query and builds a structured query plan.
|
| 30 |
+
|
| 31 |
+
2. **Guardrail Agent**
|
| 32 |
+
- Validates user input and ensures compliance.
|
| 33 |
+
- Stops the workflow if the input contains inappropriate or unparliamentary words.
|
| 34 |
+
|
| 35 |
+
3. **Search Agent**
|
| 36 |
+
- Executes the query plan.
|
| 37 |
+
- Runs multiple web searches in parallel to gather data.
|
| 38 |
+
|
| 39 |
+
4. **Writer Agent**
|
| 40 |
+
- Reads results from all search agents.
|
| 41 |
+
- Generates a well-formatted, consolidated report.
|
| 42 |
+
|
| 43 |
+
5. **Email Agent (not functional at present)**
|
| 44 |
+
- Responsible for sending the report via email using SendGrid.
|
| 45 |
+
|
| 46 |
+
6. **Orchestrator**
|
| 47 |
+
- The entry point of the system.
|
| 48 |
+
- Facilitates communication and workflow between all agents.
|
| 49 |
+
|
| 50 |
+
## Project Folder Structure
|
| 51 |
+
|
| 52 |
+
```
|
| 53 |
+
deep-research/
|
| 54 |
+
├── ui/
|
| 55 |
+
│ ├── app.py # Main Streamlit application entry point
|
| 56 |
+
│ └── __pycache__/ # Python bytecode cache
|
| 57 |
+
├── appagents/
|
| 58 |
+
│ ├── __init__.py # Package initialization
|
| 59 |
+
│ ├── orchestrator.py # Orchestrator agent - coordinates all agents
|
| 60 |
+
│ ├── planner_agent.py # Planner agent - builds structured query plans
|
| 61 |
+
│ ├── guardrail_agent.py # Guardrail agent - validates user input
|
| 62 |
+
│ ├── search_agent.py # Search agent - performs web searches
|
| 63 |
+
│ ├── writer_agent.py # Writer agent - generates consolidated reports
|
| 64 |
+
│ ├── email_agent.py # Email agent - sends reports via email (not functional)
|
| 65 |
+
│ └── __pycache__/ # Python bytecode cache
|
| 66 |
+
├── core/
|
| 67 |
+
│ ├── __init__.py # Package initialization
|
| 68 |
+
│ ├── logger.py # Centralized logging configuration
|
| 69 |
+
│ └── __pycache__/ # Python bytecode cache
|
| 70 |
+
├── tools/
|
| 71 |
+
│ ├── __init__.py # Package initialization
|
| 72 |
+
│ ├── google_tools.py # Google search utilities
|
| 73 |
+
│ ├── time_tools.py # Time-related utility functions
|
| 74 |
+
│ └── __pycache__/ # Python bytecode cache
|
| 75 |
+
├── prompts/
|
| 76 |
+
│ ├── __init__.py # Package initialization (if present)
|
| 77 |
+
│ ├── planner_prompt.txt # Prompt for planner agent (if present)
|
| 78 |
+
│ ├── guardrail_prompt.txt # Prompt for guardrail agent (if present)
|
| 79 |
+
│ ├── search_prompt.txt # Prompt for search agent (if present)
|
| 80 |
+
│ └── writer_prompt.txt # Prompt for writer agent (if present)
|
| 81 |
+
├── Dockerfile # Docker configuration for container deployment
|
| 82 |
+
├── pyproject.toml # Project metadata and dependencies (copied from root)
|
| 83 |
+
├── uv.lock # Locked dependency versions (copied from root)
|
| 84 |
+
├── README.md # Project documentation
|
| 85 |
+
└── run.py # Script to run the application locally (if present)
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
## File Descriptions
|
| 89 |
+
|
| 90 |
+
### UI Layer (`ui/`)
|
| 91 |
+
- **app.py** - Main Streamlit web application that provides the user interface. Handles:
|
| 92 |
+
- Text input for research queries
|
| 93 |
+
- Run/Download buttons (PDF, Markdown)
|
| 94 |
+
- Real-time streaming of results
|
| 95 |
+
- Display of final research reports
|
| 96 |
+
- Session state management
|
| 97 |
+
- Button enable/disable during streaming
|
| 98 |
+
|
| 99 |
+
### Agents (`appagents/`)
|
| 100 |
+
- **orchestrator.py** - Central coordinator that:
|
| 101 |
+
- Manages the multi-agent workflow
|
| 102 |
+
- Handles communication between all agents
|
| 103 |
+
- Streams results back to the UI
|
| 104 |
+
- Implements the research pipeline
|
| 105 |
+
|
| 106 |
+
- **planner_agent.py** - Creates a structured plan for the query:
|
| 107 |
+
- Breaks down user query into actionable research steps
|
| 108 |
+
- Defines search queries and research angles
|
| 109 |
+
|
| 110 |
+
- **guardrail_agent.py** - Validates user input:
|
| 111 |
+
- Checks for inappropriate content
|
| 112 |
+
- Ensures compliance with policies
|
| 113 |
+
- Stops workflow if violations detected
|
| 114 |
+
|
| 115 |
+
- **search_agent.py** - Executes web searches:
|
| 116 |
+
- Performs parallel web searches
|
| 117 |
+
- Integrates with Google Search / Serper API
|
| 118 |
+
- Gathers raw research data
|
| 119 |
+
|
| 120 |
+
- **writer_agent.py** - Generates final report:
|
| 121 |
+
- Consolidates search results
|
| 122 |
+
- Formats findings into structured markdown
|
| 123 |
+
- Creates well-organized research summaries
|
| 124 |
+
|
| 125 |
+
- **email_agent.py** - Email delivery (not functional):
|
| 126 |
+
- Intended to send reports via SendGrid
|
| 127 |
+
- Currently not integrated in the workflow
|
| 128 |
+
|
| 129 |
+
### Core Utilities (`core/`)
|
| 130 |
+
- **logger.py** - Centralized logging configuration:
|
| 131 |
+
- Provides consistent logging across agents
|
| 132 |
+
- Handles log levels and formatting
|
| 133 |
+
|
| 134 |
+
### Tools (`tools/`)
|
| 135 |
+
- **google_tools.py** - Google/Serper API wrapper:
|
| 136 |
+
- Executes web searches
|
| 137 |
+
- Handles API authentication and response parsing
|
| 138 |
+
|
| 139 |
+
- **time_tools.py** - Utility functions:
|
| 140 |
+
- Time-related operations
|
| 141 |
+
- Timestamp management
|
| 142 |
+
|
| 143 |
+
### Configuration Files
|
| 144 |
+
- **Dockerfile** - Container deployment:
|
| 145 |
+
- Builds Docker image with Python 3.12
|
| 146 |
+
- Installs dependencies using `uv`
|
| 147 |
+
- Sets up Streamlit server on port 7860
|
| 148 |
+
- Configures PYTHONPATH for module imports
|
| 149 |
+
|
| 150 |
+
- **pyproject.toml** - Project metadata:
|
| 151 |
+
- Package name: "agents"
|
| 152 |
+
- Python version requirement: 3.12
|
| 153 |
+
- Lists all dependencies (OpenAI, LangChain, Streamlit, etc.)
|
| 154 |
+
|
| 155 |
+
- **uv.lock** - Dependency lock file:
|
| 156 |
+
- Ensures reproducible builds
|
| 157 |
+
- Pins exact versions of all dependencies
|
| 158 |
+
|
| 159 |
+
## Key Technologies
|
| 160 |
+
|
| 161 |
+
| Component | Technology | Purpose |
|
| 162 |
+
|-----------|-----------|---------|
|
| 163 |
+
| LLM Framework | OpenAI Agents | Multi-agent orchestration |
|
| 164 |
+
| Web Search | Serper API / Google Search | Research data gathering |
|
| 165 |
+
| Web UI | Streamlit | User interface and interaction |
|
| 166 |
+
| Document Export | ReportLab | PDF generation from markdown |
|
| 167 |
+
| Async Operations | AsyncIO | Parallel agent execution |
|
| 168 |
+
| Dependencies | UV | Fast Python package management |
|
| 169 |
+
| Containerization | Docker | Cloud deployment |
|
| 170 |
+
|
| 171 |
+
## Running Locally
|
| 172 |
+
|
| 173 |
+
```bash
|
| 174 |
+
# Install dependencies
|
| 175 |
+
uv sync
|
| 176 |
+
|
| 177 |
+
# Set environment variables defined in .env.name file
|
| 178 |
+
export OPENAI_API_KEY="your-key"
|
| 179 |
+
export SERPER_API_KEY="your-key"
|
| 180 |
+
|
| 181 |
+
# Run the Streamlit app
|
| 182 |
+
python run.py
|
| 183 |
+
```
|
| 184 |
+
|
| 185 |
+
## Deployment
|
| 186 |
+
|
| 187 |
+
The project is deployed on Hugging Face Spaces as a Docker container:
|
| 188 |
+
- **Space**: https://huggingface.co/spaces/mishrabp/deep-research
|
| 189 |
+
- **URL**: https://huggingface.co/spaces/mishrabp/deep-research
|
| 190 |
+
- **Trigger**: Automatic deployment on push to `main` branch
|
| 191 |
+
- **Configuration**: `.github/workflows/deep-research-app-hf.yml`
|
src/deep-research/app.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import asyncio
|
| 3 |
+
import time
|
| 4 |
+
import html
|
| 5 |
+
from io import BytesIO
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
|
| 9 |
+
# Add project root
|
| 10 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".")))
|
| 11 |
+
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph
|
| 14 |
+
from reportlab.lib.styles import getSampleStyleSheet
|
| 15 |
+
from appagents.orchestrator import Orchestrator
|
| 16 |
+
from agents import SQLiteSession
|
| 17 |
+
|
| 18 |
+
load_dotenv(override=True)
|
| 19 |
+
|
| 20 |
+
# --------------------
|
| 21 |
+
# Page config
|
| 22 |
+
# --------------------
|
| 23 |
+
st.set_page_config(page_title="Deep Research AI", layout="wide", page_icon="🧠")
|
| 24 |
+
|
| 25 |
+
# --------------------
|
| 26 |
+
# Premium CSS
|
| 27 |
+
# --------------------
|
| 28 |
+
st.markdown("""
|
| 29 |
+
<style>
|
| 30 |
+
/* Global Defaults */
|
| 31 |
+
.stApp {
|
| 32 |
+
background-color: #f8f9fa;
|
| 33 |
+
font-family: 'Inter', sans-serif;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
/* Remove default Streamlit top padding but add space for Fixed Header - Revert: Just remove top padding */
|
| 37 |
+
.block-container {
|
| 38 |
+
padding-top: 1rem !important; /* Small buffer */
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
/* Sticky Header */
|
| 42 |
+
header[data-testid="stHeader"] { display: none; } /* Hide default streamlit header */
|
| 43 |
+
|
| 44 |
+
.header-container {
|
| 45 |
+
position: sticky;
|
| 46 |
+
top: 0;
|
| 47 |
+
z-index: 999;
|
| 48 |
+
|
| 49 |
+
background: linear-gradient(135deg, #0f2027 0%, #203a43 50%, #2c5364 100%);
|
| 50 |
+
color: #ffffff;
|
| 51 |
+
padding: 3rem 2rem;
|
| 52 |
+
display: flex;
|
| 53 |
+
justify-content: space-between;
|
| 54 |
+
align-items: center;
|
| 55 |
+
box-shadow: 0 4px 20px rgba(0,0,0,0.15);
|
| 56 |
+
|
| 57 |
+
margin-top: -4rem; /* Pull up aggressively to cover top gap */
|
| 58 |
+
margin-left: -5rem;
|
| 59 |
+
margin-right: -5rem;
|
| 60 |
+
|
| 61 |
+
border-bottom: none;
|
| 62 |
+
border-radius: 0 0 1rem 1rem;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
.app-brand {
|
| 66 |
+
font-family: 'Inter', sans-serif;
|
| 67 |
+
font-size: 1.6rem;
|
| 68 |
+
font-weight: 700;
|
| 69 |
+
letter-spacing: -0.02em;
|
| 70 |
+
color: #ffffff;
|
| 71 |
+
display: flex;
|
| 72 |
+
gap: 0.75rem;
|
| 73 |
+
align-items: center;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
/* Centered Search Area */
|
| 77 |
+
.search-wrapper {
|
| 78 |
+
max-width: 800px;
|
| 79 |
+
margin: 4rem auto 2rem auto;
|
| 80 |
+
text-align: center;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
.search-headline {
|
| 84 |
+
font-size: 2.5rem;
|
| 85 |
+
font-weight: 800;
|
| 86 |
+
color: #111;
|
| 87 |
+
margin-bottom: 0.5rem;
|
| 88 |
+
letter-spacing: -0.03em;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
.search-subtext {
|
| 92 |
+
font-size: 1.1rem;
|
| 93 |
+
color: #666;
|
| 94 |
+
margin-bottom: 2.5rem;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
/* Input styling override */
|
| 98 |
+
.stTextArea textarea {
|
| 99 |
+
border-radius: 12px !important;
|
| 100 |
+
border: 1px solid #e0e0e0 !important;
|
| 101 |
+
padding: 1rem !important;
|
| 102 |
+
background: white !important;
|
| 103 |
+
box-shadow: 0 4px 12px rgba(0,0,0,0.03) !important;
|
| 104 |
+
font-size: 1.1rem !important;
|
| 105 |
+
}
|
| 106 |
+
.stTextArea textarea:focus {
|
| 107 |
+
border-color: #667eea !important;
|
| 108 |
+
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.1) !important;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
/* Custom Button */
|
| 112 |
+
.stButton button {
|
| 113 |
+
background: black !important;
|
| 114 |
+
color: white !important;
|
| 115 |
+
border-radius: 30px !important;
|
| 116 |
+
padding: 0.5rem 2rem !important;
|
| 117 |
+
border: none !important;
|
| 118 |
+
box-shadow: 0 4px 10px rgba(0,0,0,0.2) !important;
|
| 119 |
+
transition: transform 0.1s ease;
|
| 120 |
+
}
|
| 121 |
+
.stButton button:hover {
|
| 122 |
+
transform: scale(1.02);
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
/* Report Paper Style */
|
| 126 |
+
.report-paper {
|
| 127 |
+
max-width: 850px;
|
| 128 |
+
margin: 2rem auto;
|
| 129 |
+
background: white;
|
| 130 |
+
padding: 4rem;
|
| 131 |
+
min-height: 800px;
|
| 132 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.1), 0 20px 40px rgba(0,0,0,0.05);
|
| 133 |
+
color: #2c3e50;
|
| 134 |
+
border: 1px solid #f0f0f0;
|
| 135 |
+
}
|
| 136 |
+
</style>
|
| 137 |
+
""", unsafe_allow_html=True)
|
| 138 |
+
|
| 139 |
+
# --------------------
|
| 140 |
+
# Session State
|
| 141 |
+
# --------------------
|
| 142 |
+
if "session_id" not in st.session_state:
|
| 143 |
+
st.session_state.session_id = str(id(st))
|
| 144 |
+
|
| 145 |
+
if "final_report" not in st.session_state:
|
| 146 |
+
st.session_state.final_report = ""
|
| 147 |
+
|
| 148 |
+
if "is_researching" not in st.session_state:
|
| 149 |
+
st.session_state.is_researching = False
|
| 150 |
+
|
| 151 |
+
if "research_logs" not in st.session_state:
|
| 152 |
+
st.session_state.research_logs = []
|
| 153 |
+
|
| 154 |
+
# --------------------
|
| 155 |
+
# Helpers
|
| 156 |
+
# --------------------
|
| 157 |
+
def make_pdf_bytes(text: str) -> bytes:
|
| 158 |
+
buf = BytesIO()
|
| 159 |
+
doc = SimpleDocTemplate(buf, topMargin=0.5*72, bottomMargin=0.5*72, leftMargin=0.75*72, rightMargin=0.75*72)
|
| 160 |
+
styles = getSampleStyleSheet()
|
| 161 |
+
story = []
|
| 162 |
+
|
| 163 |
+
for line in text.split("\n"):
|
| 164 |
+
stripped = line.strip()
|
| 165 |
+
if not stripped:
|
| 166 |
+
story.append(Paragraph(" ", styles["Normal"]))
|
| 167 |
+
continue
|
| 168 |
+
|
| 169 |
+
if stripped.startswith("# "):
|
| 170 |
+
story.append(Paragraph(html.escape(stripped[2:]), styles["Heading1"]))
|
| 171 |
+
elif stripped.startswith("## "):
|
| 172 |
+
story.append(Paragraph(html.escape(stripped[3:]), styles["Heading2"]))
|
| 173 |
+
elif stripped.startswith("- "):
|
| 174 |
+
story.append(Paragraph("• " + html.escape(stripped[2:]), styles["Normal"]))
|
| 175 |
+
else:
|
| 176 |
+
story.append(Paragraph(html.escape(stripped), styles["Normal"]))
|
| 177 |
+
|
| 178 |
+
doc.build(story)
|
| 179 |
+
buf.seek(0)
|
| 180 |
+
return buf.read()
|
| 181 |
+
|
| 182 |
+
# --------------------
|
| 183 |
+
# Logic
|
| 184 |
+
# --------------------
|
| 185 |
+
async def run_research(query: str):
|
| 186 |
+
session_id = st.session_state.session_id
|
| 187 |
+
session = SQLiteSession(f"session_{session_id}.db")
|
| 188 |
+
orchestrator = Orchestrator(session=session)
|
| 189 |
+
|
| 190 |
+
report_content = ""
|
| 191 |
+
status_container = st.status("🔍 Researching...", expanded=True)
|
| 192 |
+
|
| 193 |
+
try:
|
| 194 |
+
async for chunk in orchestrator.run(query):
|
| 195 |
+
# Filtering heuristic: Orchestrator yields status messages then the final report.
|
| 196 |
+
# Status messages are short and specific.
|
| 197 |
+
if (chunk.startswith("View trace") or
|
| 198 |
+
chunk.startswith("Searches") or
|
| 199 |
+
chunk.startswith("Report written") or
|
| 200 |
+
chunk.startswith("Starting")):
|
| 201 |
+
|
| 202 |
+
status_container.markdown(chunk)
|
| 203 |
+
else:
|
| 204 |
+
# Assume this is the report content (or the final error note)
|
| 205 |
+
report_content = chunk
|
| 206 |
+
status_container.markdown("Processing final output...")
|
| 207 |
+
|
| 208 |
+
st.session_state.final_report = report_content
|
| 209 |
+
st.session_state.is_researching = False
|
| 210 |
+
status_container.update(label="✅ Research Complete", state="complete", expanded=False)
|
| 211 |
+
st.rerun()
|
| 212 |
+
|
| 213 |
+
except Exception as e:
|
| 214 |
+
status_container.update(label="❌ Error", state="error")
|
| 215 |
+
st.error(f"Error: {e}")
|
| 216 |
+
st.session_state.is_researching = False
|
| 217 |
+
|
| 218 |
+
# --------------------
|
| 219 |
+
# Layout
|
| 220 |
+
# --------------------
|
| 221 |
+
|
| 222 |
+
# Custom Header
|
| 223 |
+
st.markdown("""
|
| 224 |
+
<div class="header-container">
|
| 225 |
+
<div class="app-brand">
|
| 226 |
+
<span>🧠</span> Deep Research <i>(OpenAI Agentic)</i>
|
| 227 |
+
</div>
|
| 228 |
+
<div>
|
| 229 |
+
<!-- Could add profile or other links here -->
|
| 230 |
+
</div>
|
| 231 |
+
</div>
|
| 232 |
+
""", unsafe_allow_html=True)
|
| 233 |
+
|
| 234 |
+
# Sidebar Settings
|
| 235 |
+
with st.sidebar:
|
| 236 |
+
st.header("⚙️ Configuration")
|
| 237 |
+
research_depth = st.select_slider("Research Depth", options=["Quick", "Standard", "Deep"], value="Standard")
|
| 238 |
+
report_format = st.selectbox("Report Format", ["Academic", "Business", "Creative"])
|
| 239 |
+
st.caption("Settings affect the tone and depth of the final report.")
|
| 240 |
+
|
| 241 |
+
st.divider()
|
| 242 |
+
if st.button("🗑️ Clear History"):
|
| 243 |
+
st.session_state.final_report = ""
|
| 244 |
+
st.rerun()
|
| 245 |
+
|
| 246 |
+
# Main Interface
|
| 247 |
+
if not st.session_state.final_report and not st.session_state.is_researching:
|
| 248 |
+
# Centered Input View
|
| 249 |
+
st.markdown("""
|
| 250 |
+
<div class="search-wrapper">
|
| 251 |
+
<div class="search-headline">What do you want to know?</div>
|
| 252 |
+
<div class="search-subtext">Deep Research will browse the web, analyze sources, and write a comprehensive report for you.</div>
|
| 253 |
+
</div>
|
| 254 |
+
""", unsafe_allow_html=True)
|
| 255 |
+
|
| 256 |
+
col_c1, col_c2, col_c3 = st.columns([1, 2, 1])
|
| 257 |
+
with col_c2:
|
| 258 |
+
query = st.text_area("Research Topic", height=60, placeholder="e.g. The future of quantum computing in drug discovery...", label_visibility="collapsed")
|
| 259 |
+
|
| 260 |
+
col_b1, col_b2, col_b3 = st.columns([1, 1, 1])
|
| 261 |
+
with col_b2:
|
| 262 |
+
if st.button("Start Research", use_container_width=True):
|
| 263 |
+
if query.strip():
|
| 264 |
+
st.session_state.is_researching = True
|
| 265 |
+
st.session_state.current_query = query
|
| 266 |
+
st.rerun()
|
| 267 |
+
|
| 268 |
+
elif st.session_state.is_researching:
|
| 269 |
+
# Researching View
|
| 270 |
+
st.markdown("""
|
| 271 |
+
<div class="search-wrapper">
|
| 272 |
+
<div class="search-headline">Compiling Report...</div>
|
| 273 |
+
</div>
|
| 274 |
+
""", unsafe_allow_html=True)
|
| 275 |
+
|
| 276 |
+
# Trigger async run
|
| 277 |
+
asyncio.run(run_research(st.session_state.current_query))
|
| 278 |
+
|
| 279 |
+
else:
|
| 280 |
+
# Result View - Title removed to let Sticky Header be the main branding,
|
| 281 |
+
# and Report itself be the focus.
|
| 282 |
+
|
| 283 |
+
# Action Toolbar
|
| 284 |
+
col_a1, col_a2, col_a3, col_a4 = st.columns([2, 1, 1, 2])
|
| 285 |
+
with col_a2:
|
| 286 |
+
pdf_bytes = make_pdf_bytes(st.session_state.final_report)
|
| 287 |
+
st.download_button("📄 Download PDF", pdf_bytes, "report.pdf", mime="application/pdf", use_container_width=True)
|
| 288 |
+
with col_a3:
|
| 289 |
+
if st.button("🔄 New Search", use_container_width=True):
|
| 290 |
+
st.session_state.final_report = ""
|
| 291 |
+
st.rerun()
|
| 292 |
+
|
| 293 |
+
# Final Report Render
|
| 294 |
+
# We use a container with a class to apply the 'sheet' look via global CSS if possible,
|
| 295 |
+
# or just use standard Markdown rendering which looks best.
|
| 296 |
+
|
| 297 |
+
with st.container():
|
| 298 |
+
st.markdown(st.session_state.final_report)
|
| 299 |
+
|
src/deep-research/appagents/__init__.py
ADDED
|
File without changes
|
src/deep-research/appagents/email_agent.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Dict
|
| 3 |
+
|
| 4 |
+
import sendgrid
|
| 5 |
+
from sendgrid.helpers.mail import Email, Mail, Content, To
|
| 6 |
+
from agents import Agent, function_tool
|
| 7 |
+
from core.logger import log_call
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@function_tool
|
| 11 |
+
@log_call
|
| 12 |
+
def send_email(subject: str, html_body: str) -> Dict[str, str]:
|
| 13 |
+
""" Send an email with the given subject and HTML body """
|
| 14 |
+
sg = sendgrid.SendGridAPIClient(api_key=os.environ.get('SENDGRID_API_KEY'))
|
| 15 |
+
from_email = Email("bm80177@gmail.com") # put your verified sender here
|
| 16 |
+
to_email = To("bibhup_mishra@yahoo.com") # put your recipient here
|
| 17 |
+
content = Content("text/html", html_body)
|
| 18 |
+
mail = Mail(from_email, to_email, subject, content).get()
|
| 19 |
+
response = sg.client.mail.send.post(request_body=mail)
|
| 20 |
+
print("Email response", response.status_code)
|
| 21 |
+
return {"status": "success"}
|
| 22 |
+
|
| 23 |
+
INSTRUCTIONS = """You are able to send a nicely formatted HTML email based on a detailed report.
|
| 24 |
+
You will be provided with a detailed report. You should use your tool to send one email, providing the
|
| 25 |
+
report converted into clean, well presented HTML with an appropriate subject line."""
|
| 26 |
+
|
| 27 |
+
email_agent = Agent(
|
| 28 |
+
name="Email agent",
|
| 29 |
+
instructions=INSTRUCTIONS,
|
| 30 |
+
tools=[send_email],
|
| 31 |
+
model="gpt-4o-mini",
|
| 32 |
+
)
|
src/deep-research/appagents/guardrail_agent.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from agents import (
|
| 4 |
+
Agent,
|
| 5 |
+
Runner,
|
| 6 |
+
input_guardrail,
|
| 7 |
+
GuardrailFunctionOutput,
|
| 8 |
+
)
|
| 9 |
+
from tools.time_tools import TimeTools
|
| 10 |
+
from openai import AsyncOpenAI
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# ✅ Step 1: Define structured output schema
|
| 14 |
+
class UnparliamentaryCheckOutput(BaseModel):
|
| 15 |
+
has_unparliamentary_language: bool
|
| 16 |
+
explanation: str
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# ✅ Step 2: Define the LLM guardrail agent
|
| 20 |
+
guardrail_agent = Agent(
|
| 21 |
+
name="Unparliamentary language check",
|
| 22 |
+
instructions=(
|
| 23 |
+
"Analyze the user input and determine if it contains any unparliamentary, "
|
| 24 |
+
"offensive, or disrespectful language. "
|
| 25 |
+
"If it does, set has_unparliamentary_language=true and explain briefly why. "
|
| 26 |
+
"Otherwise, set it to false."
|
| 27 |
+
),
|
| 28 |
+
output_type=UnparliamentaryCheckOutput,
|
| 29 |
+
model="gpt-4o-mini",
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# ✅ Step 3: Use the input guardrail decorator
|
| 34 |
+
@input_guardrail
|
| 35 |
+
async def guardrail_against_unparliamentary(ctx, agent, message: str):
|
| 36 |
+
"""Guardrail function that blocks messages with unparliamentary words."""
|
| 37 |
+
result = await Runner.run(guardrail_agent, message, context=ctx.context)
|
| 38 |
+
has_unparliamentary_language = result.final_output.has_unparliamentary_language
|
| 39 |
+
|
| 40 |
+
return GuardrailFunctionOutput(
|
| 41 |
+
output_info={
|
| 42 |
+
"found_unparliamentary_word": result.final_output.model_dump()
|
| 43 |
+
},
|
| 44 |
+
tripwire_triggered=has_unparliamentary_language,
|
| 45 |
+
)
|
src/deep-research/appagents/orchestrator.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agents import Runner, trace, gen_trace_id, SQLiteSession
|
| 2 |
+
from appagents.search_agent import search_agent
|
| 3 |
+
from appagents.planner_agent import planner_agent, WebSearchItem, WebSearchPlan
|
| 4 |
+
from appagents.writer_agent import writer_agent, ReportData
|
| 5 |
+
from appagents.email_agent import email_agent
|
| 6 |
+
from agents.exceptions import InputGuardrailTripwireTriggered
|
| 7 |
+
from core.logger import log_call
|
| 8 |
+
import asyncio
|
| 9 |
+
|
| 10 |
+
class Orchestrator:
|
| 11 |
+
|
| 12 |
+
def __init__(self, session: SQLiteSession | None = None):
|
| 13 |
+
self.session = session or SQLiteSession()
|
| 14 |
+
|
| 15 |
+
@log_call
|
| 16 |
+
async def run(self, query: str):
|
| 17 |
+
""" Run the deep research process, yielding the status updates and the final report"""
|
| 18 |
+
trace_id = gen_trace_id()
|
| 19 |
+
with trace("Deep Research Orchestrator", trace_id=trace_id):
|
| 20 |
+
print(f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}")
|
| 21 |
+
yield f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}"
|
| 22 |
+
print("Starting research...")
|
| 23 |
+
search_plan = await self.plan_searches(query)
|
| 24 |
+
|
| 25 |
+
if not search_plan or not getattr(search_plan, "searches", []):
|
| 26 |
+
note = getattr(search_plan, "note", "")
|
| 27 |
+
if "unparliamentary" in note.lower():
|
| 28 |
+
print("⚠️ Guardrail triggered – unparliamentary language detected.")
|
| 29 |
+
yield note
|
| 30 |
+
else:
|
| 31 |
+
yield note or "No search results found, ending research."
|
| 32 |
+
return
|
| 33 |
+
|
| 34 |
+
yield "Searches planned, starting to search..."
|
| 35 |
+
search_results = await self.perform_searches(search_plan)
|
| 36 |
+
yield "Searches complete, writing report..."
|
| 37 |
+
report = await self.write_report(query, search_results)
|
| 38 |
+
yield "Report written, sending email..."
|
| 39 |
+
# await self.send_email(report)
|
| 40 |
+
# yield "Email sent, research complete"
|
| 41 |
+
yield report.markdown_report
|
| 42 |
+
|
| 43 |
+
@log_call
|
| 44 |
+
async def plan_searches(self, query: str) -> WebSearchPlan:
|
| 45 |
+
"""Plan the searches to perform for the query."""
|
| 46 |
+
print("Planning searches...")
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
result = await Runner.run(
|
| 50 |
+
planner_agent, # use self. unless global
|
| 51 |
+
f"Query: {query}",
|
| 52 |
+
session=self.session,
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
print(f"Will perform {len(result.final_output.searches)} searches")
|
| 56 |
+
return result.final_output_as(WebSearchPlan)
|
| 57 |
+
|
| 58 |
+
except InputGuardrailTripwireTriggered as e:
|
| 59 |
+
explanation = getattr(e, "result", {}).get("output_info", {}).get(
|
| 60 |
+
"found_unparliamentary_word", {}
|
| 61 |
+
).get("explanation", "")
|
| 62 |
+
print("⚠️ Guardrail triggered – unparliamentary language detected.")
|
| 63 |
+
return WebSearchPlan(searches=[], note=f"Blocked due to unparliamentary input. {explanation}")
|
| 64 |
+
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"❌ Error during planning: {e}")
|
| 67 |
+
return WebSearchPlan(searches=[], note="An error occurred while planning searches.")
|
| 68 |
+
|
| 69 |
+
@log_call
|
| 70 |
+
async def perform_searches(self, search_plan: WebSearchPlan) -> list[str]:
|
| 71 |
+
""" Perform the searches to perform for the query """
|
| 72 |
+
print("Searching...")
|
| 73 |
+
num_completed = 0
|
| 74 |
+
tasks = [asyncio.create_task(self.search(item)) for item in search_plan.searches]
|
| 75 |
+
results = []
|
| 76 |
+
for task in asyncio.as_completed(tasks):
|
| 77 |
+
result = await task
|
| 78 |
+
if result is not None:
|
| 79 |
+
results.append(result)
|
| 80 |
+
num_completed += 1
|
| 81 |
+
print(f"Searching... {num_completed}/{len(tasks)} completed")
|
| 82 |
+
print("Finished searching")
|
| 83 |
+
return results
|
| 84 |
+
|
| 85 |
+
@log_call
|
| 86 |
+
async def search(self, item: WebSearchItem) -> str | None:
|
| 87 |
+
""" Perform a search for the query """
|
| 88 |
+
input = f"Search term: {item.query}\nReason for searching: {item.reason}"
|
| 89 |
+
try:
|
| 90 |
+
result = await Runner.run(
|
| 91 |
+
search_agent,
|
| 92 |
+
input,
|
| 93 |
+
)
|
| 94 |
+
return str(result.final_output)
|
| 95 |
+
except Exception:
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
+
@log_call
|
| 99 |
+
async def write_report(self, query: str, search_results: list[str]) -> ReportData:
|
| 100 |
+
""" Write the report for the query """
|
| 101 |
+
print("Thinking about report...")
|
| 102 |
+
input = f"Original query: {query}\nSummarized search results: {search_results}"
|
| 103 |
+
result = await Runner.run(
|
| 104 |
+
writer_agent,
|
| 105 |
+
input,
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
print("Finished writing report")
|
| 109 |
+
return result.final_output_as(ReportData)
|
| 110 |
+
|
| 111 |
+
@log_call
|
| 112 |
+
async def send_email(self, report: ReportData) -> None:
|
| 113 |
+
print("Writing email...")
|
| 114 |
+
result = await Runner.run(
|
| 115 |
+
email_agent,
|
| 116 |
+
report.markdown_report,
|
| 117 |
+
)
|
| 118 |
+
print("Email sent")
|
| 119 |
+
return report
|
src/deep-research/appagents/planner_agent.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pydantic import BaseModel, Field
|
| 3 |
+
from agents import Agent, OpenAIChatCompletionsModel
|
| 4 |
+
from openai import AsyncOpenAI
|
| 5 |
+
from tools.time_tools import TimeTools
|
| 6 |
+
from appagents.guardrail_agent import guardrail_against_unparliamentary
|
| 7 |
+
|
| 8 |
+
HOW_MANY_SEARCHES = 10
|
| 9 |
+
|
| 10 |
+
INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of web searches \
|
| 11 |
+
to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for. \
|
| 12 |
+
Use the tool to find current date & time, and use it where relevant to inform your search and summary."
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class WebSearchItem(BaseModel):
|
| 16 |
+
reason: str = Field(description="Your reasoning for why this search is important to the query.")
|
| 17 |
+
query: str = Field(description="The search term to use for the web search.")
|
| 18 |
+
current_date_time: str = Field(description="Current date and time.")
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class WebSearchPlan(BaseModel):
|
| 22 |
+
searches: list[WebSearchItem] = Field(description="A list of web searches to perform to best answer the query.")
|
| 23 |
+
|
| 24 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 25 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 26 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 27 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
|
| 28 |
+
|
| 29 |
+
GROQ_BASE_URL = "https://api.groq.com/openai/v1"
|
| 30 |
+
groq_api_key = os.getenv('GROQ_API_KEY')
|
| 31 |
+
groq_client = AsyncOpenAI(base_url=GROQ_BASE_URL, api_key=groq_api_key)
|
| 32 |
+
groq_model = OpenAIChatCompletionsModel(model="groq/compound", openai_client=groq_client)
|
| 33 |
+
|
| 34 |
+
openai_model = "gpt-4.1-mini"
|
| 35 |
+
|
| 36 |
+
# Note: Many models do not like tool call and json output_schema used together.
|
| 37 |
+
|
| 38 |
+
planner_agent = Agent(
|
| 39 |
+
name="PlannerAgent",
|
| 40 |
+
instructions=INSTRUCTIONS,
|
| 41 |
+
model=openai_model,
|
| 42 |
+
tools=[TimeTools.current_datetime],
|
| 43 |
+
output_type=WebSearchPlan,
|
| 44 |
+
input_guardrails=[guardrail_against_unparliamentary],
|
| 45 |
+
)
|
src/deep-research/appagents/search_agent.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from agents import Agent, OpenAIChatCompletionsModel, WebSearchTool
|
| 3 |
+
from openai import AsyncOpenAI
|
| 4 |
+
|
| 5 |
+
from agents.model_settings import ModelSettings
|
| 6 |
+
from tools.google_tools import GoogleTools
|
| 7 |
+
|
| 8 |
+
# INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and \
|
| 9 |
+
# produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 \
|
| 10 |
+
# words. Capture the main points. Write succintly, no need to have complete sentences or good \
|
| 11 |
+
# grammar. This will be consumed by someone synthesizing a report, so it's vital you capture the \
|
| 12 |
+
# essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
|
| 13 |
+
|
| 14 |
+
# INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web and produce a detailed synthesis of the results. \
|
| 15 |
+
# The output must be structured into sections, one for each search result provided by the tool. \
|
| 16 |
+
# For each result, you MUST include the full link/URL and the title. \
|
| 17 |
+
# Your response should capture the main points and relevant details from all sources. \
|
| 18 |
+
# Do not add any personal commentary, introductions, or conclusions. \
|
| 19 |
+
# Format the entire output as a single, detailed block of text in markdown format, ensuring ALL source links are visible and preserved."
|
| 20 |
+
|
| 21 |
+
INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and \
|
| 22 |
+
produce a concise summary of the results. The summary must 3-5 paragraphs and less than 500 \
|
| 23 |
+
words. Capture the main points. Write succintly, no need to have complete sentences or good \
|
| 24 |
+
grammar. This will be consumed by someone synthesizing a report, so it's vital you capture the \
|
| 25 |
+
essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
|
| 26 |
+
|
| 27 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 28 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 29 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 30 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
|
| 31 |
+
|
| 32 |
+
# search_agent = Agent(
|
| 33 |
+
# name="Search agent",
|
| 34 |
+
# instructions=INSTRUCTIONS,
|
| 35 |
+
# tools=[WebSearchTool(search_context_size="low")],
|
| 36 |
+
# # tools=[GoogleTools.search],
|
| 37 |
+
# model="gpt-4o-mini",
|
| 38 |
+
# model_settings=ModelSettings(tool_choice="required"),
|
| 39 |
+
# )
|
| 40 |
+
|
| 41 |
+
# -----------------------------
|
| 42 |
+
# CONNECT TO MCP SERVER
|
| 43 |
+
# -----------------------------
|
| 44 |
+
async def setup_mcp_tools():
|
| 45 |
+
"""
|
| 46 |
+
Starts the MCP server via stdio and returns its list of tools
|
| 47 |
+
that can be attached to the agent.
|
| 48 |
+
"""
|
| 49 |
+
# Absolute path ensures the script is found even from a notebook
|
| 50 |
+
import os
|
| 51 |
+
script_path = os.path.abspath("../mcp/search-server.py")
|
| 52 |
+
|
| 53 |
+
params = {
|
| 54 |
+
"command": "uvx", # or "uv" depending on your environment
|
| 55 |
+
"args": ["run", script_path],
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
# Start MCP server and list available tools
|
| 59 |
+
async with MCPServerStdio(
|
| 60 |
+
params=params,
|
| 61 |
+
client_session_timeout_seconds=60,
|
| 62 |
+
verbose=True, # helpful for debugging
|
| 63 |
+
) as server:
|
| 64 |
+
mcp_tools = await server.list_tools()
|
| 65 |
+
print(f"✅ Connected to MCP server with {len(mcp_tools)} tool(s).")
|
| 66 |
+
return mcp_tools
|
| 67 |
+
|
| 68 |
+
# # Note: Gemini does not like
|
| 69 |
+
# search_agent = Agent(
|
| 70 |
+
# name="Search agent",
|
| 71 |
+
# instructions=INSTRUCTIONS,
|
| 72 |
+
# # tools=[WebSearchTool(search_context_size="low")],
|
| 73 |
+
# tools=[GoogleTools.search],
|
| 74 |
+
# model=gemini_model,
|
| 75 |
+
# model_settings=ModelSettings(tool_choice="required"),
|
| 76 |
+
# )
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
search_agent = Agent(
|
| 80 |
+
name="Search agent",
|
| 81 |
+
instructions=INSTRUCTIONS,
|
| 82 |
+
# tools=[WebSearchTool(search_context_size="low")],
|
| 83 |
+
tools=[GoogleTools.search],
|
| 84 |
+
model=gemini_model,
|
| 85 |
+
model_settings=ModelSettings(tool_choice="required"),
|
| 86 |
+
)
|
| 87 |
+
|
src/deep-research/appagents/writer_agent.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pydantic import BaseModel, Field
|
| 3 |
+
from agents import Agent, OpenAIChatCompletionsModel, WebSearchTool
|
| 4 |
+
from openai import AsyncOpenAI
|
| 5 |
+
|
| 6 |
+
INSTRUCTIONS = (
|
| 7 |
+
"You are a senior researcher tasked with writing a cohesive report for a research query. "
|
| 8 |
+
"You will be provided with the original query, and some initial research done by a research assistant.\n"
|
| 9 |
+
"You should first come up with an outline for the report that describes the structure and "
|
| 10 |
+
"flow of the report. Then, generate the report and return that as your final output.\n"
|
| 11 |
+
"The final output should be in markdown format, and it should be lengthy and detailed. Aim "
|
| 12 |
+
"for 5-10 pages of content, at least 1000 words."
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class ReportData(BaseModel):
|
| 17 |
+
short_summary: str = Field(description="A short 2-3 sentence summary of the findings.")
|
| 18 |
+
|
| 19 |
+
markdown_report: str = Field(description="The final report")
|
| 20 |
+
|
| 21 |
+
follow_up_questions: list[str] = Field(description="Suggested topics to research further")
|
| 22 |
+
|
| 23 |
+
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 24 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 25 |
+
gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)
|
| 26 |
+
gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# writer_agent = Agent(
|
| 30 |
+
# name="WriterAgent",
|
| 31 |
+
# instructions=INSTRUCTIONS,
|
| 32 |
+
# model="gpt-5-mini",
|
| 33 |
+
# output_type=ReportData,
|
| 34 |
+
# )
|
| 35 |
+
|
| 36 |
+
writer_agent = Agent(
|
| 37 |
+
name="WriterAgent",
|
| 38 |
+
instructions=INSTRUCTIONS,
|
| 39 |
+
model=gemini_model,
|
| 40 |
+
output_type=ReportData,
|
| 41 |
+
)
|
src/deep-research/core/__init__.py
ADDED
|
File without changes
|
src/deep-research/core/logger.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import functools
|
| 2 |
+
import datetime
|
| 3 |
+
|
| 4 |
+
def log_call(func):
|
| 5 |
+
"""
|
| 6 |
+
A decorator that logs when a function is called and when it finishes.
|
| 7 |
+
"""
|
| 8 |
+
@functools.wraps(func)
|
| 9 |
+
def wrapper(*args, **kwargs):
|
| 10 |
+
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 11 |
+
arg_list = ", ".join(
|
| 12 |
+
[repr(a) for a in args] + [f"{k}={v!r}" for k, v in kwargs.items()]
|
| 13 |
+
)
|
| 14 |
+
print(f"[{timestamp}] 🚀 Calling: {func.__name__}({arg_list})")
|
| 15 |
+
try:
|
| 16 |
+
result = func(*args, **kwargs)
|
| 17 |
+
# print(f"[{timestamp}] ✅ Finished: {func.__name__}")
|
| 18 |
+
return result
|
| 19 |
+
except Exception as e:
|
| 20 |
+
print(f"[{timestamp}] ❌ Error in {func.__name__}: {e}")
|
| 21 |
+
raise
|
| 22 |
+
return wrapper
|
src/deep-research/prompts/__init__.py
ADDED
|
File without changes
|
src/deep-research/tools/__init__.py
ADDED
|
File without changes
|
src/deep-research/tools/google_tools.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from agents import function_tool
|
| 5 |
+
from core.logger import log_call
|
| 6 |
+
|
| 7 |
+
# Load environment variables once
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# ============================================================
|
| 12 |
+
# 🔹 GOOGLE SEARCH TOOLSET (Serper.dev API)
|
| 13 |
+
# ============================================================
|
| 14 |
+
class GoogleTools:
|
| 15 |
+
"""
|
| 16 |
+
GoogleTools provides function tools to perform web searches
|
| 17 |
+
using the Serper.dev API (Google Search). I am a fallback for
|
| 18 |
+
retrieving recent information from the web.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
@staticmethod
|
| 22 |
+
@function_tool
|
| 23 |
+
@log_call
|
| 24 |
+
def search(query: str, num_results: int = 3) -> str:
|
| 25 |
+
"""
|
| 26 |
+
Perform a general Google search using Serper.dev API.
|
| 27 |
+
|
| 28 |
+
Parameters:
|
| 29 |
+
-----------
|
| 30 |
+
query : str
|
| 31 |
+
The search query string, e.g., "latest Tesla stock news".
|
| 32 |
+
num_results : int, optional (default=3)
|
| 33 |
+
Maximum number of search results to return.
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
--------
|
| 37 |
+
str
|
| 38 |
+
Nicely formatted search results.
|
| 39 |
+
"""
|
| 40 |
+
try:
|
| 41 |
+
api_key = os.getenv("SERPER_API_KEY")
|
| 42 |
+
if not api_key:
|
| 43 |
+
return "❌ Missing SERPER_API_KEY in environment variables."
|
| 44 |
+
|
| 45 |
+
url = "https://google.serper.dev/search"
|
| 46 |
+
headers = {
|
| 47 |
+
"X-API-KEY": api_key,
|
| 48 |
+
"Content-Type": "application/json"
|
| 49 |
+
}
|
| 50 |
+
payload = {
|
| 51 |
+
"q": query,
|
| 52 |
+
"gl": "us", # country code (optional)
|
| 53 |
+
"hl": "en", # language code (optional)
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
response = requests.post(url, headers=headers, json=payload)
|
| 57 |
+
response.raise_for_status()
|
| 58 |
+
data = response.json()
|
| 59 |
+
|
| 60 |
+
organic_results = data.get("organic", [])
|
| 61 |
+
if not organic_results:
|
| 62 |
+
return "No search results found."
|
| 63 |
+
|
| 64 |
+
formatted = []
|
| 65 |
+
for item in organic_results[:num_results]:
|
| 66 |
+
title = item.get("title", "No title")
|
| 67 |
+
link = item.get("link", "No link")
|
| 68 |
+
snippet = item.get("snippet", "")
|
| 69 |
+
formatted.append(
|
| 70 |
+
f"Title: {title}\nLink: {link}\nSnippet: {snippet}\n"
|
| 71 |
+
)
|
| 72 |
+
# print(formatted[-1]) # Log each result
|
| 73 |
+
|
| 74 |
+
return "\n".join(formatted)
|
| 75 |
+
|
| 76 |
+
except requests.exceptions.RequestException as e:
|
| 77 |
+
return f"⚠️ Network error during Google search: {e}"
|
| 78 |
+
except Exception as e:
|
| 79 |
+
return f"⚠️ Error performing Google search: {e}"
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
# ============================================================
|
| 83 |
+
# 🔹 OPENAI & OTHER MODEL TOOLS
|
| 84 |
+
# ============================================================
|
| 85 |
+
class ModelTools:
|
| 86 |
+
"""
|
| 87 |
+
ModelTools provides function tools to interact with LLM APIs
|
| 88 |
+
such as OpenAI, Gemini, or Groq.
|
| 89 |
+
|
| 90 |
+
Features:
|
| 91 |
+
- Send prompts to a language model.
|
| 92 |
+
- Receive structured text completions.
|
| 93 |
+
- Can be extended to support multiple LLM providers.
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
@staticmethod
|
| 97 |
+
@function_tool
|
| 98 |
+
def query_openai(prompt: str, model: str = "gpt-4o-mini") -> str:
|
| 99 |
+
"""
|
| 100 |
+
Query an OpenAI language model with a prompt.
|
| 101 |
+
|
| 102 |
+
Parameters:
|
| 103 |
+
-----------
|
| 104 |
+
prompt : str
|
| 105 |
+
User-provided prompt for the model.
|
| 106 |
+
model : str, optional (default="gpt-4o-mini")
|
| 107 |
+
Model name to query (e.g., "gpt-4o-mini", "gpt-4").
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
--------
|
| 111 |
+
str
|
| 112 |
+
Model's response content as text.
|
| 113 |
+
If an error occurs (network/API), returns an error message.
|
| 114 |
+
|
| 115 |
+
Example:
|
| 116 |
+
--------
|
| 117 |
+
query_openai("Explain AI in finance")
|
| 118 |
+
|
| 119 |
+
Output:
|
| 120 |
+
"AI in finance refers to the use of machine learning and natural language
|
| 121 |
+
processing techniques to automate trading, risk assessment, and customer service..."
|
| 122 |
+
"""
|
| 123 |
+
try:
|
| 124 |
+
from openai import OpenAI # delayed import
|
| 125 |
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 126 |
+
response = client.chat.completions.create(
|
| 127 |
+
model=model,
|
| 128 |
+
messages=[{"role": "user", "content": prompt}],
|
| 129 |
+
)
|
| 130 |
+
return response.choices[0].message.content
|
| 131 |
+
except Exception as e:
|
| 132 |
+
return f"Error querying OpenAI API: {e}"
|
src/deep-research/tools/time_tools.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
from agents import function_tool
|
| 3 |
+
from core.logger import log_call
|
| 4 |
+
|
| 5 |
+
class TimeTools:
|
| 6 |
+
"""Provides tools related to current date and time."""
|
| 7 |
+
|
| 8 |
+
@staticmethod
|
| 9 |
+
@function_tool
|
| 10 |
+
@log_call
|
| 11 |
+
def current_datetime(format: str = "%Y-%m-%d %H:%M:%S") -> str:
|
| 12 |
+
"""
|
| 13 |
+
Returns the current date and time as a formatted string.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
format (str): Optional datetime format (default: "YYYY-MM-DD HH:MM:SS")
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
str: Current date and time in the specified format
|
| 20 |
+
"""
|
| 21 |
+
now = datetime.now()
|
| 22 |
+
return now.strftime(format)
|
uv.lock
CHANGED
|
@@ -14,6 +14,7 @@ dependencies = [
|
|
| 14 |
{ name = "autogen-ext", extra = ["grpc", "mcp", "ollama", "openai"] },
|
| 15 |
{ name = "beautifulsoup4" },
|
| 16 |
{ name = "chromadb" },
|
|
|
|
| 17 |
{ name = "ddgs" },
|
| 18 |
{ name = "duckduckgo-search" },
|
| 19 |
{ name = "faiss-cpu" },
|
|
@@ -52,6 +53,7 @@ dependencies = [
|
|
| 52 |
{ name = "reportlab" },
|
| 53 |
{ name = "requests" },
|
| 54 |
{ name = "scikit-learn" },
|
|
|
|
| 55 |
{ name = "sentence-transformers" },
|
| 56 |
{ name = "serpapi" },
|
| 57 |
{ name = "smithery" },
|
|
@@ -79,6 +81,7 @@ requires-dist = [
|
|
| 79 |
{ name = "autogen-ext", extras = ["grpc", "mcp", "ollama", "openai"], specifier = ">=0.7.5" },
|
| 80 |
{ name = "beautifulsoup4", specifier = ">=4.12.3" },
|
| 81 |
{ name = "chromadb", specifier = "==1.3.5" },
|
|
|
|
| 82 |
{ name = "ddgs", specifier = ">=9.9.2" },
|
| 83 |
{ name = "duckduckgo-search" },
|
| 84 |
{ name = "faiss-cpu", specifier = ">=1.13.0" },
|
|
@@ -117,6 +120,7 @@ requires-dist = [
|
|
| 117 |
{ name = "reportlab", specifier = ">=4.4.5" },
|
| 118 |
{ name = "requests", specifier = ">=2.32.3" },
|
| 119 |
{ name = "scikit-learn", specifier = ">=1.7.2" },
|
|
|
|
| 120 |
{ name = "sentence-transformers", specifier = ">=5.1.2" },
|
| 121 |
{ name = "serpapi" },
|
| 122 |
{ name = "smithery", specifier = ">=0.4.4" },
|
|
@@ -719,6 +723,31 @@ wheels = [
|
|
| 719 |
{ url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" },
|
| 720 |
]
|
| 721 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 722 |
[[package]]
|
| 723 |
name = "ddgs"
|
| 724 |
version = "9.9.2"
|
|
@@ -757,6 +786,15 @@ wheels = [
|
|
| 757 |
{ url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" },
|
| 758 |
]
|
| 759 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 760 |
[[package]]
|
| 761 |
name = "distro"
|
| 762 |
version = "1.9.0"
|
|
@@ -917,6 +955,11 @@ wheels = [
|
|
| 917 |
{ url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
|
| 918 |
]
|
| 919 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 920 |
[[package]]
|
| 921 |
name = "gitdb"
|
| 922 |
version = "4.0.12"
|
|
@@ -2039,6 +2082,22 @@ wheels = [
|
|
| 2039 |
{ url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
|
| 2040 |
]
|
| 2041 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2042 |
[[package]]
|
| 2043 |
name = "multitasking"
|
| 2044 |
version = "0.0.12"
|
|
@@ -3083,6 +3142,15 @@ wheels = [
|
|
| 3083 |
{ url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
|
| 3084 |
]
|
| 3085 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3086 |
[[package]]
|
| 3087 |
name = "python-multipart"
|
| 3088 |
version = "0.0.20"
|
|
@@ -3364,6 +3432,20 @@ wheels = [
|
|
| 3364 |
{ url = "https://files.pythonhosted.org/packages/ce/69/c5c7807fd007dad4f48e0a5f2153038dc96e8725d3345b9ee31b2b7bed46/scipy-1.16.3-cp312-cp312-win_arm64.whl", hash = "sha256:a8a26c78ef223d3e30920ef759e25625a0ecdd0d60e5a8818b7513c3e5384cf2", size = 25463014, upload-time = "2025-10-28T17:33:25.975Z" },
|
| 3365 |
]
|
| 3366 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3367 |
[[package]]
|
| 3368 |
name = "sentence-transformers"
|
| 3369 |
version = "5.1.2"
|
|
@@ -4019,6 +4101,18 @@ wheels = [
|
|
| 4019 |
{ url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
|
| 4020 |
]
|
| 4021 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4022 |
[[package]]
|
| 4023 |
name = "wikipedia"
|
| 4024 |
version = "1.4.0"
|
|
|
|
| 14 |
{ name = "autogen-ext", extra = ["grpc", "mcp", "ollama", "openai"] },
|
| 15 |
{ name = "beautifulsoup4" },
|
| 16 |
{ name = "chromadb" },
|
| 17 |
+
{ name = "datasets" },
|
| 18 |
{ name = "ddgs" },
|
| 19 |
{ name = "duckduckgo-search" },
|
| 20 |
{ name = "faiss-cpu" },
|
|
|
|
| 53 |
{ name = "reportlab" },
|
| 54 |
{ name = "requests" },
|
| 55 |
{ name = "scikit-learn" },
|
| 56 |
+
{ name = "sendgrid" },
|
| 57 |
{ name = "sentence-transformers" },
|
| 58 |
{ name = "serpapi" },
|
| 59 |
{ name = "smithery" },
|
|
|
|
| 81 |
{ name = "autogen-ext", extras = ["grpc", "mcp", "ollama", "openai"], specifier = ">=0.7.5" },
|
| 82 |
{ name = "beautifulsoup4", specifier = ">=4.12.3" },
|
| 83 |
{ name = "chromadb", specifier = "==1.3.5" },
|
| 84 |
+
{ name = "datasets", specifier = ">=4.4.1" },
|
| 85 |
{ name = "ddgs", specifier = ">=9.9.2" },
|
| 86 |
{ name = "duckduckgo-search" },
|
| 87 |
{ name = "faiss-cpu", specifier = ">=1.13.0" },
|
|
|
|
| 120 |
{ name = "reportlab", specifier = ">=4.4.5" },
|
| 121 |
{ name = "requests", specifier = ">=2.32.3" },
|
| 122 |
{ name = "scikit-learn", specifier = ">=1.7.2" },
|
| 123 |
+
{ name = "sendgrid" },
|
| 124 |
{ name = "sentence-transformers", specifier = ">=5.1.2" },
|
| 125 |
{ name = "serpapi" },
|
| 126 |
{ name = "smithery", specifier = ">=0.4.4" },
|
|
|
|
| 723 |
{ url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" },
|
| 724 |
]
|
| 725 |
|
| 726 |
+
[[package]]
|
| 727 |
+
name = "datasets"
|
| 728 |
+
version = "4.4.1"
|
| 729 |
+
source = { registry = "https://pypi.org/simple" }
|
| 730 |
+
dependencies = [
|
| 731 |
+
{ name = "dill" },
|
| 732 |
+
{ name = "filelock" },
|
| 733 |
+
{ name = "fsspec", extra = ["http"] },
|
| 734 |
+
{ name = "httpx" },
|
| 735 |
+
{ name = "huggingface-hub" },
|
| 736 |
+
{ name = "multiprocess" },
|
| 737 |
+
{ name = "numpy" },
|
| 738 |
+
{ name = "packaging" },
|
| 739 |
+
{ name = "pandas" },
|
| 740 |
+
{ name = "pyarrow" },
|
| 741 |
+
{ name = "pyyaml" },
|
| 742 |
+
{ name = "requests" },
|
| 743 |
+
{ name = "tqdm" },
|
| 744 |
+
{ name = "xxhash" },
|
| 745 |
+
]
|
| 746 |
+
sdist = { url = "https://files.pythonhosted.org/packages/93/bf/0dae295d6d1ba0b1a200a9dd216838464b5bbd05da01407cb1330b377445/datasets-4.4.1.tar.gz", hash = "sha256:80322699aa8c0bbbdb7caa87906da689c3c2e29523cff698775c67f28fdab1fc", size = 585341, upload-time = "2025-11-05T16:00:38.162Z" }
|
| 747 |
+
wheels = [
|
| 748 |
+
{ url = "https://files.pythonhosted.org/packages/3b/5e/6f8d874366788ad5d549e9ba258037d974dda6e004843be1bda794571701/datasets-4.4.1-py3-none-any.whl", hash = "sha256:c1163de5211e42546079ab355cc0250c7e6db16eb209ac5ac6252f801f596c44", size = 511591, upload-time = "2025-11-05T16:00:36.365Z" },
|
| 749 |
+
]
|
| 750 |
+
|
| 751 |
[[package]]
|
| 752 |
name = "ddgs"
|
| 753 |
version = "9.9.2"
|
|
|
|
| 786 |
{ url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" },
|
| 787 |
]
|
| 788 |
|
| 789 |
+
[[package]]
|
| 790 |
+
name = "dill"
|
| 791 |
+
version = "0.4.0"
|
| 792 |
+
source = { registry = "https://pypi.org/simple" }
|
| 793 |
+
sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
|
| 794 |
+
wheels = [
|
| 795 |
+
{ url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" },
|
| 796 |
+
]
|
| 797 |
+
|
| 798 |
[[package]]
|
| 799 |
name = "distro"
|
| 800 |
version = "1.9.0"
|
|
|
|
| 955 |
{ url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
|
| 956 |
]
|
| 957 |
|
| 958 |
+
[package.optional-dependencies]
|
| 959 |
+
http = [
|
| 960 |
+
{ name = "aiohttp" },
|
| 961 |
+
]
|
| 962 |
+
|
| 963 |
[[package]]
|
| 964 |
name = "gitdb"
|
| 965 |
version = "4.0.12"
|
|
|
|
| 2082 |
{ url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
|
| 2083 |
]
|
| 2084 |
|
| 2085 |
+
[[package]]
|
| 2086 |
+
name = "multiprocess"
|
| 2087 |
+
version = "0.70.18"
|
| 2088 |
+
source = { registry = "https://pypi.org/simple" }
|
| 2089 |
+
dependencies = [
|
| 2090 |
+
{ name = "dill" },
|
| 2091 |
+
]
|
| 2092 |
+
sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" }
|
| 2093 |
+
wheels = [
|
| 2094 |
+
{ url = "https://files.pythonhosted.org/packages/ba/d8/0cba6cf51a1a31f20471fbc823a716170c73012ddc4fb85d706630ed6e8f/multiprocess-0.70.18-py310-none-any.whl", hash = "sha256:60c194974c31784019c1f459d984e8f33ee48f10fcf42c309ba97b30d9bd53ea", size = 134948, upload-time = "2025-04-17T03:11:20.223Z" },
|
| 2095 |
+
{ url = "https://files.pythonhosted.org/packages/4b/88/9039f2fed1012ef584751d4ceff9ab4a51e5ae264898f0b7cbf44340a859/multiprocess-0.70.18-py311-none-any.whl", hash = "sha256:5aa6eef98e691281b3ad923be2832bf1c55dd2c859acd73e5ec53a66aae06a1d", size = 144462, upload-time = "2025-04-17T03:11:21.657Z" },
|
| 2096 |
+
{ url = "https://files.pythonhosted.org/packages/bf/b6/5f922792be93b82ec6b5f270bbb1ef031fd0622847070bbcf9da816502cc/multiprocess-0.70.18-py312-none-any.whl", hash = "sha256:9b78f8e5024b573730bfb654783a13800c2c0f2dfc0c25e70b40d184d64adaa2", size = 150287, upload-time = "2025-04-17T03:11:22.69Z" },
|
| 2097 |
+
{ url = "https://files.pythonhosted.org/packages/3b/c3/ca84c19bd14cdfc21c388fdcebf08b86a7a470ebc9f5c3c084fc2dbc50f7/multiprocess-0.70.18-py38-none-any.whl", hash = "sha256:dbf705e52a154fe5e90fb17b38f02556169557c2dd8bb084f2e06c2784d8279b", size = 132636, upload-time = "2025-04-17T03:11:24.936Z" },
|
| 2098 |
+
{ url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
|
| 2099 |
+
]
|
| 2100 |
+
|
| 2101 |
[[package]]
|
| 2102 |
name = "multitasking"
|
| 2103 |
version = "0.0.12"
|
|
|
|
| 3142 |
{ url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
|
| 3143 |
]
|
| 3144 |
|
| 3145 |
+
[[package]]
|
| 3146 |
+
name = "python-http-client"
|
| 3147 |
+
version = "3.3.7"
|
| 3148 |
+
source = { registry = "https://pypi.org/simple" }
|
| 3149 |
+
sdist = { url = "https://files.pythonhosted.org/packages/56/fa/284e52a8c6dcbe25671f02d217bf2f85660db940088faf18ae7a05e97313/python_http_client-3.3.7.tar.gz", hash = "sha256:bf841ee45262747e00dec7ee9971dfb8c7d83083f5713596488d67739170cea0", size = 9377, upload-time = "2022-03-09T20:23:56.386Z" }
|
| 3150 |
+
wheels = [
|
| 3151 |
+
{ url = "https://files.pythonhosted.org/packages/29/31/9b360138f4e4035ee9dac4fe1132b6437bd05751aaf1db2a2d83dc45db5f/python_http_client-3.3.7-py3-none-any.whl", hash = "sha256:ad371d2bbedc6ea15c26179c6222a78bc9308d272435ddf1d5c84f068f249a36", size = 8352, upload-time = "2022-03-09T20:23:54.862Z" },
|
| 3152 |
+
]
|
| 3153 |
+
|
| 3154 |
[[package]]
|
| 3155 |
name = "python-multipart"
|
| 3156 |
version = "0.0.20"
|
|
|
|
| 3432 |
{ url = "https://files.pythonhosted.org/packages/ce/69/c5c7807fd007dad4f48e0a5f2153038dc96e8725d3345b9ee31b2b7bed46/scipy-1.16.3-cp312-cp312-win_arm64.whl", hash = "sha256:a8a26c78ef223d3e30920ef759e25625a0ecdd0d60e5a8818b7513c3e5384cf2", size = 25463014, upload-time = "2025-10-28T17:33:25.975Z" },
|
| 3433 |
]
|
| 3434 |
|
| 3435 |
+
[[package]]
|
| 3436 |
+
name = "sendgrid"
|
| 3437 |
+
version = "6.12.5"
|
| 3438 |
+
source = { registry = "https://pypi.org/simple" }
|
| 3439 |
+
dependencies = [
|
| 3440 |
+
{ name = "cryptography" },
|
| 3441 |
+
{ name = "python-http-client" },
|
| 3442 |
+
{ name = "werkzeug" },
|
| 3443 |
+
]
|
| 3444 |
+
sdist = { url = "https://files.pythonhosted.org/packages/da/fa/f718b2b953f99c1f0085811598ac7e31ccbd4229a81ec2a5290be868187a/sendgrid-6.12.5.tar.gz", hash = "sha256:ea9aae30cd55c332e266bccd11185159482edfc07c149b6cd15cf08869fabdb7", size = 50310, upload-time = "2025-09-19T06:23:09.229Z" }
|
| 3445 |
+
wheels = [
|
| 3446 |
+
{ url = "https://files.pythonhosted.org/packages/bd/55/b3c3880a77082e8f7374954e0074aafafaa9bc78bdf9c8f5a92c2e7afc6a/sendgrid-6.12.5-py3-none-any.whl", hash = "sha256:96f92cc91634bf552fdb766b904bbb53968018da7ae41fdac4d1090dc0311ca8", size = 102173, upload-time = "2025-09-19T06:23:07.93Z" },
|
| 3447 |
+
]
|
| 3448 |
+
|
| 3449 |
[[package]]
|
| 3450 |
name = "sentence-transformers"
|
| 3451 |
version = "5.1.2"
|
|
|
|
| 4101 |
{ url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
|
| 4102 |
]
|
| 4103 |
|
| 4104 |
+
[[package]]
|
| 4105 |
+
name = "werkzeug"
|
| 4106 |
+
version = "3.1.4"
|
| 4107 |
+
source = { registry = "https://pypi.org/simple" }
|
| 4108 |
+
dependencies = [
|
| 4109 |
+
{ name = "markupsafe" },
|
| 4110 |
+
]
|
| 4111 |
+
sdist = { url = "https://files.pythonhosted.org/packages/45/ea/b0f8eeb287f8df9066e56e831c7824ac6bab645dd6c7a8f4b2d767944f9b/werkzeug-3.1.4.tar.gz", hash = "sha256:cd3cd98b1b92dc3b7b3995038826c68097dcb16f9baa63abe35f20eafeb9fe5e", size = 864687, upload-time = "2025-11-29T02:15:22.841Z" }
|
| 4112 |
+
wheels = [
|
| 4113 |
+
{ url = "https://files.pythonhosted.org/packages/2f/f9/9e082990c2585c744734f85bec79b5dae5df9c974ffee58fe421652c8e91/werkzeug-3.1.4-py3-none-any.whl", hash = "sha256:2ad50fb9ed09cc3af22c54698351027ace879a0b60a3b5edf5730b2f7d876905", size = 224960, upload-time = "2025-11-29T02:15:21.13Z" },
|
| 4114 |
+
]
|
| 4115 |
+
|
| 4116 |
[[package]]
|
| 4117 |
name = "wikipedia"
|
| 4118 |
version = "1.4.0"
|