Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- Dockerfile +20 -0
- README.md +51 -6
- agent.py +300 -0
- main.py +161 -0
- requirements.txt +16 -0
Dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
build-essential \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
# Copy backend files
|
| 11 |
+
COPY backend/requirements.txt .
|
| 12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
+
|
| 14 |
+
COPY backend/ .
|
| 15 |
+
|
| 16 |
+
# Expose port
|
| 17 |
+
EXPOSE 7860
|
| 18 |
+
|
| 19 |
+
# Run the application
|
| 20 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,12 +1,57 @@
|
|
| 1 |
---
|
| 2 |
-
title: Deep Research
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
-
short_description: Multi-agent AI assistant powered by LangGraph & Gemini
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Deep Research AI
|
| 3 |
+
emoji: 🔬
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
|
|
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# Deep Research AI 🚀
|
| 12 |
+
|
| 13 |
+
A sophisticated multi-agent research assistant powered by LangGraph and Google Gemini.
|
| 14 |
+
|
| 15 |
+
## Features
|
| 16 |
+
|
| 17 |
+
- 🔬 **Deep Research**: Multi-step research with web search & document analysis
|
| 18 |
+
- 🎥 **YouTube Analyzer**: Generate viral titles & extract captions from videos
|
| 19 |
+
- 📝 **Smart Summarizer**: Auto-summarize long reports with one click
|
| 20 |
+
- 📄 **Document RAG**: Upload PDFs & texts for enhanced knowledge base
|
| 21 |
+
|
| 22 |
+
## Tech Stack
|
| 23 |
+
|
| 24 |
+
- **Backend**: FastAPI + LangGraph
|
| 25 |
+
- **AI Models**: Google Gemini 2.0 Flash
|
| 26 |
+
- **Vector Store**: ChromaDB
|
| 27 |
+
- **Web Search**: Tavily API
|
| 28 |
+
|
| 29 |
+
## Environment Variables
|
| 30 |
+
|
| 31 |
+
This Space requires the following secrets:
|
| 32 |
+
|
| 33 |
+
```
|
| 34 |
+
GOOGLE_API_KEY=your_gemini_api_key
|
| 35 |
+
TAVILY_API_KEY=your_tavily_api_key
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
## API Endpoints
|
| 39 |
+
|
| 40 |
+
- `POST /api/chat` - Main chat interface
|
| 41 |
+
- `POST /api/summarize` - Content summarization
|
| 42 |
+
- `POST /api/upload` - Document upload
|
| 43 |
+
- `GET /api/health` - Health check
|
| 44 |
+
|
| 45 |
+
## Frontend
|
| 46 |
+
|
| 47 |
+
The frontend is deployed separately on Vercel for optimal performance.
|
| 48 |
+
|
| 49 |
+
Visit: [Your Frontend URL]
|
| 50 |
+
|
| 51 |
+
## Local Development
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
cd backend
|
| 55 |
+
pip install -r requirements.txt
|
| 56 |
+
uvicorn main:app --reload
|
| 57 |
+
```
|
agent.py
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import TypedDict, List, Annotated, Dict, Any
|
| 2 |
+
import operator
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
import re
|
| 6 |
+
import json
|
| 7 |
+
|
| 8 |
+
load_dotenv("../.env", override=True)
|
| 9 |
+
|
| 10 |
+
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
|
| 11 |
+
from langgraph.graph import StateGraph, END
|
| 12 |
+
from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
|
| 13 |
+
from langchain_chroma import Chroma
|
| 14 |
+
from tavily import TavilyClient
|
| 15 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 16 |
+
from langchain_core.pydantic_v1 import BaseModel, Field
|
| 17 |
+
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
|
| 18 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 19 |
+
import yt_dlp
|
| 20 |
+
|
| 21 |
+
# --- Configuration ---
|
| 22 |
+
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
|
| 23 |
+
|
| 24 |
+
# Initialize Embeddings & Vector Store
|
| 25 |
+
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
|
| 26 |
+
vector_store = Chroma(embedding_function=embeddings, persist_directory="./chroma_db")
|
| 27 |
+
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
|
| 28 |
+
|
| 29 |
+
# --- State Definition ---
|
| 30 |
+
class AgentState(TypedDict):
|
| 31 |
+
"""The state of our Deep Research Agent."""
|
| 32 |
+
task: str
|
| 33 |
+
plan: List[str]
|
| 34 |
+
content: Annotated[List[str], operator.add]
|
| 35 |
+
revision_number: int
|
| 36 |
+
max_revisions: int
|
| 37 |
+
final_report: str
|
| 38 |
+
steps: Annotated[List[str], operator.add]
|
| 39 |
+
messages: Annotated[List[BaseMessage], operator.add]
|
| 40 |
+
youtube_url: str
|
| 41 |
+
youtube_captions: str
|
| 42 |
+
|
| 43 |
+
# --- Data Models ---
|
| 44 |
+
class Plan(BaseModel):
|
| 45 |
+
"""Plan to follow for research."""
|
| 46 |
+
steps: List[str] = Field(description="List of research steps/questions to investigate.")
|
| 47 |
+
|
| 48 |
+
# --- Helpers ---
|
| 49 |
+
def extract_video_id(url):
|
| 50 |
+
"""Extracts the video ID from a YouTube URL."""
|
| 51 |
+
# Support various URL formats
|
| 52 |
+
regex = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
|
| 53 |
+
match = re.search(regex, url)
|
| 54 |
+
if match:
|
| 55 |
+
return match.group(1)
|
| 56 |
+
return None
|
| 57 |
+
|
| 58 |
+
def get_video_duration(url):
|
| 59 |
+
"""Gets video duration in seconds using yt-dlp."""
|
| 60 |
+
ydl_opts = {'quiet': True}
|
| 61 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 62 |
+
info = ydl.extract_info(url, download=False)
|
| 63 |
+
return info.get('duration', 0)
|
| 64 |
+
|
| 65 |
+
# --- Nodes ---
|
| 66 |
+
|
| 67 |
+
def router_node(state: AgentState):
|
| 68 |
+
"""
|
| 69 |
+
Routes to YouTube processor if a URL is detected, otherwise to Planner.
|
| 70 |
+
"""
|
| 71 |
+
task = state["task"]
|
| 72 |
+
if "youtube.com" in task or "youtu.be" in task:
|
| 73 |
+
# Extract URL from task
|
| 74 |
+
url_match = re.search(r"(https?://[^\s]+)", task)
|
| 75 |
+
if url_match:
|
| 76 |
+
return {"youtube_url": url_match.group(1), "steps": ["Detected YouTube URL, routing to Video Processor"]}
|
| 77 |
+
return {"steps": ["Routing to Research Planner"]}
|
| 78 |
+
|
| 79 |
+
def youtube_node(state: AgentState):
|
| 80 |
+
"""
|
| 81 |
+
Process YouTube video: Check duration, get captions, generate title.
|
| 82 |
+
"""
|
| 83 |
+
print("---YOUTUBE PROCESSOR---")
|
| 84 |
+
url = state["youtube_url"]
|
| 85 |
+
task = state["task"]
|
| 86 |
+
|
| 87 |
+
video_id = extract_video_id(url)
|
| 88 |
+
if not video_id:
|
| 89 |
+
return {"final_report": "Error: Could not extract Video ID.", "steps": ["Failed to extract Video ID"]}
|
| 90 |
+
|
| 91 |
+
# Check Duration
|
| 92 |
+
try:
|
| 93 |
+
duration = get_video_duration(url)
|
| 94 |
+
if duration > 600: # 10 minutes
|
| 95 |
+
return {"final_report": f"Error: Video is too long ({duration//60} mins). Limit is 10 minutes.", "steps": ["Video rejected: Too long"]}
|
| 96 |
+
except Exception as e:
|
| 97 |
+
return {"final_report": f"Error checking duration: {e}", "steps": ["Failed to check duration"]}
|
| 98 |
+
|
| 99 |
+
# Get Captions
|
| 100 |
+
try:
|
| 101 |
+
print(f"DEBUG: Fetching captions for {video_id}")
|
| 102 |
+
yt = YouTubeTranscriptApi()
|
| 103 |
+
transcript_list = yt.list(video_id)
|
| 104 |
+
# Try to find English, or fallback to first available
|
| 105 |
+
try:
|
| 106 |
+
transcript = transcript_list.find_transcript(['en'])
|
| 107 |
+
except:
|
| 108 |
+
# Fallback to the first available transcript
|
| 109 |
+
transcript = next(iter(transcript_list))
|
| 110 |
+
|
| 111 |
+
transcript_data = transcript.fetch()
|
| 112 |
+
# Handle if it returns objects or dicts (it seems to be objects in this version)
|
| 113 |
+
transcript_text = " ".join([t.text for t in transcript_data])
|
| 114 |
+
except Exception as e:
|
| 115 |
+
print(f"DEBUG: Caption Error: {e}")
|
| 116 |
+
return {"final_report": f"Error fetching captions: {e}", "steps": ["Failed to fetch captions"]}
|
| 117 |
+
|
| 118 |
+
# Generate Title
|
| 119 |
+
system = """You are a YouTube Expert. Analyze the provided video transcript and generate 3 catchy, AI-enhanced title options.
|
| 120 |
+
|
| 121 |
+
Provide ONLY the 3 titles in this exact format:
|
| 122 |
+
VIRAL: [title here]
|
| 123 |
+
SEO: [title here]
|
| 124 |
+
PROFESSIONAL: [title here]
|
| 125 |
+
|
| 126 |
+
Do not add any other text, headers, or explanations. Just the 3 titles."""
|
| 127 |
+
|
| 128 |
+
prompt = ChatPromptTemplate.from_messages(
|
| 129 |
+
[
|
| 130 |
+
("system", system),
|
| 131 |
+
("human", "Transcript: {transcript}\n\nUser Request: {task}"),
|
| 132 |
+
]
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
chain = prompt | llm | StrOutputParser()
|
| 136 |
+
raw_titles = chain.invoke({"transcript": transcript_text[:5000], "task": task})
|
| 137 |
+
|
| 138 |
+
# Manually construct a beautifully formatted report with MAXIMUM SPACING
|
| 139 |
+
report = "# YouTube Video Analysis\n\n\n"
|
| 140 |
+
report += "---\n\n\n"
|
| 141 |
+
report += "## 🎬 AI Enhanced Title Options\n\n\n"
|
| 142 |
+
|
| 143 |
+
# Parse the titles and format them nicely with extra spacing
|
| 144 |
+
lines = raw_titles.strip().split('\n')
|
| 145 |
+
for line in lines:
|
| 146 |
+
if line.strip():
|
| 147 |
+
if ':' in line:
|
| 148 |
+
category, title = line.split(':', 1)
|
| 149 |
+
report += f"### {category.strip().title()}\n\n"
|
| 150 |
+
report += f"**{title.strip()}**\n\n\n"
|
| 151 |
+
|
| 152 |
+
# Check if user wants captions
|
| 153 |
+
if "caption" in task.lower() or "transcript" in task.lower():
|
| 154 |
+
report += "---\n\n\n"
|
| 155 |
+
report += "## 📝 Full Captions\n\n\n"
|
| 156 |
+
report += f"```text\n{transcript_text}\n```\n\n\n"
|
| 157 |
+
else:
|
| 158 |
+
report += "---\n\n\n"
|
| 159 |
+
report += "> **Note:** Captions are available for this video! Add 'with captions' to your request to see them.\n\n\n"
|
| 160 |
+
|
| 161 |
+
return {
|
| 162 |
+
"final_report": report,
|
| 163 |
+
"youtube_captions": transcript_text,
|
| 164 |
+
"steps": ["Processed YouTube video: Checked duration, fetched captions, generated title"]
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
def plan_node(state: AgentState):
|
| 168 |
+
"""
|
| 169 |
+
Planner Agent: Breaks down the task into sub-questions.
|
| 170 |
+
"""
|
| 171 |
+
print("---PLANNER---")
|
| 172 |
+
task = state["task"]
|
| 173 |
+
|
| 174 |
+
system = """You are a Research Planner. Given a user topic, break it down into 3-5 distinct, specific research questions or sub-topics that need to be investigated to write a comprehensive report.
|
| 175 |
+
Return the result as a list of strings."""
|
| 176 |
+
|
| 177 |
+
prompt = ChatPromptTemplate.from_messages(
|
| 178 |
+
[
|
| 179 |
+
("system", system),
|
| 180 |
+
("human", "{task}"),
|
| 181 |
+
]
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
planner = prompt | llm.with_structured_output(Plan)
|
| 185 |
+
plan = planner.invoke({"task": task})
|
| 186 |
+
|
| 187 |
+
return {
|
| 188 |
+
"plan": plan.steps,
|
| 189 |
+
"steps": [f"Created research plan with {len(plan.steps)} steps: {', '.join(plan.steps)}"]
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
def research_node(state: AgentState):
|
| 193 |
+
"""
|
| 194 |
+
Researcher Agent: Executes the plan.
|
| 195 |
+
"""
|
| 196 |
+
print("---RESEARCHER---")
|
| 197 |
+
plan = state["plan"]
|
| 198 |
+
content = []
|
| 199 |
+
steps_log = []
|
| 200 |
+
|
| 201 |
+
api_key = os.getenv("TAVILY_API_KEY")
|
| 202 |
+
tavily = TavilyClient(api_key=api_key) if api_key else None
|
| 203 |
+
|
| 204 |
+
for step in plan:
|
| 205 |
+
print(f"Researching: {step}")
|
| 206 |
+
steps_log.append(f"Researching: {step}")
|
| 207 |
+
|
| 208 |
+
# 1. Try Vector Store first
|
| 209 |
+
docs = retriever.invoke(step)
|
| 210 |
+
if docs:
|
| 211 |
+
context = "\n".join([d.page_content for d in docs])
|
| 212 |
+
content.append(f"Source: Local Documents\nTopic: {step}\nContent: {context}")
|
| 213 |
+
|
| 214 |
+
# 2. Always Web Search for "Deep" research to get fresh info
|
| 215 |
+
if tavily:
|
| 216 |
+
try:
|
| 217 |
+
response = tavily.search(query=step, max_results=2)
|
| 218 |
+
web_docs = response.get('results', [])
|
| 219 |
+
web_context = "\n".join([d["content"] for d in web_docs])
|
| 220 |
+
content.append(f"Source: Web Search\nTopic: {step}\nContent: {web_context}")
|
| 221 |
+
except Exception as e:
|
| 222 |
+
print(f"Web search error: {e}")
|
| 223 |
+
|
| 224 |
+
return {"content": content, "steps": steps_log}
|
| 225 |
+
|
| 226 |
+
def writer_node(state: AgentState):
|
| 227 |
+
"""
|
| 228 |
+
Writer Agent: Synthesizes the report.
|
| 229 |
+
"""
|
| 230 |
+
print("---WRITER---")
|
| 231 |
+
task = state["task"]
|
| 232 |
+
content = state["content"]
|
| 233 |
+
|
| 234 |
+
system = """You are a Professional Research Writer. Your goal is to write a comprehensive, well-structured Markdown report based on the provided research notes.
|
| 235 |
+
|
| 236 |
+
Guidelines:
|
| 237 |
+
1. **Structure**: Start with an engaging Title (#) and Introduction.
|
| 238 |
+
2. **Headers**: Use headers (##, ###) to organize sections. **IMPORTANT**: Always add a blank line before and after every header.
|
| 239 |
+
3. **Content**: Synthesize the information. Do not just list facts.
|
| 240 |
+
4. **Formatting**:
|
| 241 |
+
- Use **bold** for key terms.
|
| 242 |
+
- Use bullet points for lists (ensure there is a blank line before the list starts).
|
| 243 |
+
- Use > Blockquotes for important summaries.
|
| 244 |
+
5. **Citations**: If the notes mention specific sources, cite them.
|
| 245 |
+
6. **Conclusion**: End with a strong conclusion.
|
| 246 |
+
|
| 247 |
+
Make the report visually appealing and easy to read.
|
| 248 |
+
"""
|
| 249 |
+
|
| 250 |
+
prompt = ChatPromptTemplate.from_messages(
|
| 251 |
+
[
|
| 252 |
+
("system", system),
|
| 253 |
+
("human", "Task: {task}\n\nResearch Notes:\n{content}"),
|
| 254 |
+
]
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
chain = prompt | llm | StrOutputParser()
|
| 258 |
+
report = chain.invoke({"task": task, "content": "\n\n".join(content)})
|
| 259 |
+
|
| 260 |
+
return {
|
| 261 |
+
"final_report": report,
|
| 262 |
+
"messages": [AIMessage(content=report)], # For compatibility with main.py
|
| 263 |
+
"steps": ["Wrote final report"]
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
def route_task(state: AgentState):
|
| 267 |
+
"""
|
| 268 |
+
Conditional edge to route to YouTube or Planner.
|
| 269 |
+
"""
|
| 270 |
+
if state.get("youtube_url"):
|
| 271 |
+
return "youtube_node"
|
| 272 |
+
return "planner"
|
| 273 |
+
|
| 274 |
+
# --- Graph Construction ---
|
| 275 |
+
|
| 276 |
+
workflow = StateGraph(AgentState)
|
| 277 |
+
|
| 278 |
+
workflow.add_node("router", router_node)
|
| 279 |
+
workflow.add_node("youtube_node", youtube_node)
|
| 280 |
+
workflow.add_node("planner", plan_node)
|
| 281 |
+
workflow.add_node("researcher", research_node)
|
| 282 |
+
workflow.add_node("writer", writer_node)
|
| 283 |
+
|
| 284 |
+
workflow.set_entry_point("router")
|
| 285 |
+
|
| 286 |
+
workflow.add_conditional_edges(
|
| 287 |
+
"router",
|
| 288 |
+
route_task,
|
| 289 |
+
{
|
| 290 |
+
"youtube_node": "youtube_node",
|
| 291 |
+
"planner": "planner"
|
| 292 |
+
}
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
workflow.add_edge("youtube_node", END)
|
| 296 |
+
workflow.add_edge("planner", "researcher")
|
| 297 |
+
workflow.add_edge("researcher", "writer")
|
| 298 |
+
workflow.add_edge("writer", END)
|
| 299 |
+
|
| 300 |
+
app = workflow.compile()
|
main.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException, UploadFile, File
|
| 2 |
+
from fastapi.staticfiles import StaticFiles
|
| 3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
from typing import List, Dict, Any
|
| 6 |
+
import os
|
| 7 |
+
import shutil
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
load_dotenv("../.env", override=True) # Load from root
|
| 11 |
+
|
| 12 |
+
from agent import app as agent_app, vector_store
|
| 13 |
+
from langchain_core.messages import HumanMessage, AIMessage
|
| 14 |
+
from langchain_community.document_loaders import PyPDFLoader, TextLoader
|
| 15 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 16 |
+
|
| 17 |
+
app = FastAPI()
|
| 18 |
+
|
| 19 |
+
@app.on_event("startup")
|
| 20 |
+
async def startup_event():
|
| 21 |
+
tavily_key = os.getenv("TAVILY_API_KEY")
|
| 22 |
+
if tavily_key:
|
| 23 |
+
print(f"Startup: TAVILY_API_KEY found: {tavily_key[:5]}...")
|
| 24 |
+
else:
|
| 25 |
+
print("Startup: TAVILY_API_KEY NOT found!")
|
| 26 |
+
|
| 27 |
+
app.add_middleware(
|
| 28 |
+
CORSMiddleware,
|
| 29 |
+
allow_origins=["*"],
|
| 30 |
+
allow_credentials=True,
|
| 31 |
+
allow_methods=["*"],
|
| 32 |
+
allow_headers=["*"],
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
class ChatRequest(BaseModel):
|
| 36 |
+
message: str
|
| 37 |
+
history: List[Dict[str, str]] = []
|
| 38 |
+
|
| 39 |
+
@app.post("/api/upload")
|
| 40 |
+
async def upload_file(file: UploadFile = File(...)):
|
| 41 |
+
try:
|
| 42 |
+
# Save file temporarily
|
| 43 |
+
file_path = f"temp_{file.filename}"
|
| 44 |
+
with open(file_path, "wb") as buffer:
|
| 45 |
+
shutil.copyfileobj(file.file, buffer)
|
| 46 |
+
|
| 47 |
+
# Load and split document
|
| 48 |
+
if file.filename.endswith(".pdf"):
|
| 49 |
+
loader = PyPDFLoader(file_path)
|
| 50 |
+
else:
|
| 51 |
+
loader = TextLoader(file_path)
|
| 52 |
+
|
| 53 |
+
docs = loader.load()
|
| 54 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 55 |
+
splits = text_splitter.split_documents(docs)
|
| 56 |
+
|
| 57 |
+
# Add to Vector Store
|
| 58 |
+
vector_store.add_documents(splits)
|
| 59 |
+
|
| 60 |
+
# Cleanup
|
| 61 |
+
os.remove(file_path)
|
| 62 |
+
|
| 63 |
+
return {"status": "success", "message": f"Processed {len(splits)} chunks"}
|
| 64 |
+
except Exception as e:
|
| 65 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 66 |
+
|
| 67 |
+
@app.post("/api/chat")
|
| 68 |
+
async def chat_endpoint(request: ChatRequest):
|
| 69 |
+
try:
|
| 70 |
+
# Convert history to LangChain messages
|
| 71 |
+
messages = []
|
| 72 |
+
for msg in request.history:
|
| 73 |
+
if msg["role"] == "user":
|
| 74 |
+
messages.append(HumanMessage(content=msg["content"]))
|
| 75 |
+
elif msg["role"] == "assistant":
|
| 76 |
+
messages.append(AIMessage(content=msg["content"]))
|
| 77 |
+
|
| 78 |
+
# Add current message
|
| 79 |
+
messages.append(HumanMessage(content=request.message))
|
| 80 |
+
|
| 81 |
+
# Invoke Agent
|
| 82 |
+
# Deep Research Graph expects 'task'
|
| 83 |
+
inputs = {
|
| 84 |
+
"task": request.message,
|
| 85 |
+
"plan": [],
|
| 86 |
+
"content": [],
|
| 87 |
+
"revision_number": 0,
|
| 88 |
+
"max_revisions": 1,
|
| 89 |
+
"final_report": "",
|
| 90 |
+
"steps": [],
|
| 91 |
+
"messages": messages # Keep for history if needed
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
result = agent_app.invoke(inputs)
|
| 95 |
+
|
| 96 |
+
# Get final report
|
| 97 |
+
final_response = result.get("final_report", "No report generated.")
|
| 98 |
+
|
| 99 |
+
# Extract steps
|
| 100 |
+
steps = result.get("steps", [])
|
| 101 |
+
thoughts = []
|
| 102 |
+
for step in steps:
|
| 103 |
+
thoughts.append({
|
| 104 |
+
"tool": "agent_step",
|
| 105 |
+
"input": step,
|
| 106 |
+
"status": "completed"
|
| 107 |
+
})
|
| 108 |
+
|
| 109 |
+
return {"response": final_response, "thoughts": thoughts}
|
| 110 |
+
except Exception as e:
|
| 111 |
+
print(f"Error in chat endpoint: {e}")
|
| 112 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 113 |
+
|
| 114 |
+
class SummarizeRequest(BaseModel):
|
| 115 |
+
content: str
|
| 116 |
+
|
| 117 |
+
@app.post("/api/summarize")
|
| 118 |
+
async def summarize_endpoint(request: SummarizeRequest):
|
| 119 |
+
try:
|
| 120 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 121 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 122 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 123 |
+
|
| 124 |
+
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
|
| 125 |
+
|
| 126 |
+
system = """You are a professional summarizer. Create a concise summary of the provided content.
|
| 127 |
+
|
| 128 |
+
Guidelines:
|
| 129 |
+
1. Keep it to 3-5 sentences
|
| 130 |
+
2. Capture the main points and key takeaways
|
| 131 |
+
3. Use clear, simple language
|
| 132 |
+
4. Maintain the professional tone
|
| 133 |
+
"""
|
| 134 |
+
|
| 135 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 136 |
+
("system", system),
|
| 137 |
+
("human", "Summarize this content:\n\n{content}")
|
| 138 |
+
])
|
| 139 |
+
|
| 140 |
+
chain = prompt | llm | StrOutputParser()
|
| 141 |
+
summary = chain.invoke({"content": request.content})
|
| 142 |
+
|
| 143 |
+
return {"summary": summary}
|
| 144 |
+
except Exception as e:
|
| 145 |
+
print(f"Error in summarize endpoint: {e}")
|
| 146 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 147 |
+
|
| 148 |
+
@app.get("/api/health")
|
| 149 |
+
async def health_check():
|
| 150 |
+
return {"status": "ok"}
|
| 151 |
+
|
| 152 |
+
@app.get("/")
|
| 153 |
+
async def root():
|
| 154 |
+
return {"message": "RAG Backend is running"}
|
| 155 |
+
|
| 156 |
+
# Serve static files (Frontend) - to be configured after build
|
| 157 |
+
# app.mount("/", StaticFiles(directory="../frontend/out", html=True), name="static")
|
| 158 |
+
|
| 159 |
+
if __name__ == "__main__":
|
| 160 |
+
import uvicorn
|
| 161 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
requirements.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
langgraph
|
| 4 |
+
langchain-google-genai>=1.0.3
|
| 5 |
+
google-generativeai
|
| 6 |
+
tavily-python
|
| 7 |
+
chromadb
|
| 8 |
+
langchain
|
| 9 |
+
langchain-chroma
|
| 10 |
+
langchain-community
|
| 11 |
+
langchain-text-splitters
|
| 12 |
+
pypdf
|
| 13 |
+
python-multipart
|
| 14 |
+
python-dotenv
|
| 15 |
+
youtube-transcript-api
|
| 16 |
+
yt-dlp
|