Spaces:
Running
Running
Commit ·
b02630d
0
Parent(s):
Perplexity AI Clone - Full Production Version with 8 Modes
Browse files- .gitignore +10 -0
- .python-version +1 -0
- README.md +0 -0
- app/__init__.py +0 -0
- app/api.py +984 -0
- config/__init__.py +0 -0
- config/config.py +30 -0
- config/system_prompt.py +11 -0
- document_processing/__init__.py +0 -0
- document_processing/processor.py +26 -0
- embeddings/__init__.py +0 -0
- embeddings/embedder.py +24 -0
- files/__init__.py +1 -0
- files/file_manager.py +99 -0
- main.py +6 -0
- pyproject.toml +32 -0
- rag/__init__.py +0 -0
- rag/agents.py +803 -0
- rag/graph_deep.py +285 -0
- rag/rag_state.py +95 -0
- rag/router.py +113 -0
- requirements.txt +32 -0
- streamlit_app.py +709 -0
- tools/__init__.py +0 -0
- tools/browse_tool.py +18 -0
- tools/citation_tool.py +19 -0
- tools/followup_tool.py +33 -0
- tools/image_tavily.py +55 -0
- tools/knowledge_panel.py +66 -0
- tools/memory_tool.py +34 -0
- tools/name_extractor.py +9 -0
- tools/name_tool.py +28 -0
- tools/reranker_tool.py +18 -0
- tools/search_tool.py +22 -0
- tools/summarizer_tool.py +27 -0
- tools/wiki_tool.py +14 -0
- uv.lock +0 -0
- vectorstore/__init__.py +0 -0
- vectorstore/store.py +25 -0
.gitignore
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
README.md
ADDED
|
File without changes
|
app/__init__.py
ADDED
|
File without changes
|
app/api.py
ADDED
|
@@ -0,0 +1,984 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ===================== api.py ==========================
|
| 2 |
+
from typing import List, Dict
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from fastapi import FastAPI, UploadFile, File, Form
|
| 5 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 6 |
+
from fastapi.responses import StreamingResponse
|
| 7 |
+
from pydantic import BaseModel
|
| 8 |
+
|
| 9 |
+
from config.config import Config
|
| 10 |
+
from config.system_prompt import PPLX_SYSTEM_PROMPT
|
| 11 |
+
|
| 12 |
+
# Core routing / graph
|
| 13 |
+
from rag.router import RouterAgent
|
| 14 |
+
from rag.graph_deep import (
|
| 15 |
+
DeepResearchGraph,
|
| 16 |
+
WebSearchGraph,
|
| 17 |
+
RAGOnlyGraph,
|
| 18 |
+
AgenticRAGGraph,
|
| 19 |
+
AnalysisGraph,
|
| 20 |
+
SummarizeGraph
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# Tools
|
| 24 |
+
from tools.memory_tool import MemoryTool
|
| 25 |
+
from tools.name_tool import NameTool
|
| 26 |
+
from tools.search_tool import SearchTool
|
| 27 |
+
from tools.browse_tool import BrowseTool
|
| 28 |
+
from tools.reranker_tool import Reranker
|
| 29 |
+
from tools.followup_tool import FollowUpGenerator
|
| 30 |
+
from tools.image_tavily import TavilyImageSearch
|
| 31 |
+
from tools.knowledge_panel import KnowledgePanel
|
| 32 |
+
from tools.summarizer_tool import SummarizerTool
|
| 33 |
+
|
| 34 |
+
# RAG pipeline
|
| 35 |
+
from document_processing.processor import DocumentProcessor
|
| 36 |
+
from vectorstore.store import VectorStore
|
| 37 |
+
|
| 38 |
+
# File Manager for per-workspace RAG
|
| 39 |
+
from files.file_manager import FileManager
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# =======================================================
|
| 43 |
+
# FastAPI App
|
| 44 |
+
# =======================================================
|
| 45 |
+
app = FastAPI(title="Perplexity Clone API", version="8.0 - Production LangGraph")
|
| 46 |
+
|
| 47 |
+
app.add_middleware(
|
| 48 |
+
CORSMiddleware,
|
| 49 |
+
allow_origins=["*"],
|
| 50 |
+
allow_headers=["*"],
|
| 51 |
+
allow_methods=["*"],
|
| 52 |
+
allow_credentials=True,
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# =======================================================
|
| 56 |
+
# Global Components
|
| 57 |
+
# =======================================================
|
| 58 |
+
llm = Config.get_llm()
|
| 59 |
+
router = RouterAgent()
|
| 60 |
+
|
| 61 |
+
memory = MemoryTool()
|
| 62 |
+
name_tool = NameTool()
|
| 63 |
+
followup = FollowUpGenerator()
|
| 64 |
+
search_tool = SearchTool()
|
| 65 |
+
browse_tool = BrowseTool()
|
| 66 |
+
reranker = Reranker()
|
| 67 |
+
image_search = TavilyImageSearch()
|
| 68 |
+
knowledge_panel = KnowledgePanel()
|
| 69 |
+
summarizer = SummarizerTool()
|
| 70 |
+
|
| 71 |
+
# RAG demo vectorstore
|
| 72 |
+
processor = DocumentProcessor(
|
| 73 |
+
chunk_size=Config.CHUNK_SIZE,
|
| 74 |
+
chunk_overlap=Config.CHUNK_OVERLAP,
|
| 75 |
+
)
|
| 76 |
+
demo_docs = processor.load_url("https://lilianweng.github.io/posts/2023-06-23-agent/")
|
| 77 |
+
demo_splits = processor.split(demo_docs)
|
| 78 |
+
|
| 79 |
+
vector = VectorStore()
|
| 80 |
+
vector.create(demo_splits)
|
| 81 |
+
|
| 82 |
+
# File manager for per-workspace document RAG
|
| 83 |
+
file_manager = FileManager(base_dir="workspace_data")
|
| 84 |
+
|
| 85 |
+
# =======================================================
|
| 86 |
+
# Initialize All LangGraph Pipelines
|
| 87 |
+
# =======================================================
|
| 88 |
+
deep_graph = DeepResearchGraph(vector)
|
| 89 |
+
web_graph = WebSearchGraph()
|
| 90 |
+
rag_graph = RAGOnlyGraph(file_manager)
|
| 91 |
+
agentic_graph = AgenticRAGGraph(file_manager, vector, image_search)
|
| 92 |
+
analysis_graph = AnalysisGraph()
|
| 93 |
+
summarize_graph = SummarizeGraph()
|
| 94 |
+
|
| 95 |
+
print("✅ All LangGraph pipelines initialized!")
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
# =======================================================
|
| 99 |
+
# Models
|
| 100 |
+
# =======================================================
|
| 101 |
+
class ChatRequest(BaseModel):
|
| 102 |
+
message: str
|
| 103 |
+
workspace_id: str = "default"
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
class ChatResponse(BaseModel):
|
| 107 |
+
answer: str
|
| 108 |
+
sources: List[Dict[str, str]] = []
|
| 109 |
+
links: List[Dict[str, str]] = []
|
| 110 |
+
images: List[Dict[str, str]] = []
|
| 111 |
+
followups: List[str] = []
|
| 112 |
+
default_tab: str = "answer" # "answer" | "links" | "images"
|
| 113 |
+
workspace_id: str
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
# =======================================================
|
| 117 |
+
# Utils
|
| 118 |
+
# =======================================================
|
| 119 |
+
def build_context(ws: str, new_msg: str):
|
| 120 |
+
"""Inject full workspace chat history + system prompt."""
|
| 121 |
+
messages = [{"role": "system", "content": PPLX_SYSTEM_PROMPT}]
|
| 122 |
+
for msg in memory.get_long_chat(ws):
|
| 123 |
+
messages.append({"role": msg["role"], "content": msg["content"]})
|
| 124 |
+
messages.append({"role": "user", "content": new_msg})
|
| 125 |
+
return messages
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def guess_default_tab(query: str, mode: str) -> str:
|
| 129 |
+
"""Decide which UI tab should be first (Answer / Links / Images)."""
|
| 130 |
+
q = query.lower()
|
| 131 |
+
image_words = [
|
| 132 |
+
"image", "images", "photo", "photos", "picture", "pictures",
|
| 133 |
+
"wallpaper", "logo", "flag", "screenshot", "pic"
|
| 134 |
+
]
|
| 135 |
+
if any(w in q for w in image_words):
|
| 136 |
+
return "images"
|
| 137 |
+
if mode == "web":
|
| 138 |
+
return "links"
|
| 139 |
+
return "answer"
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def tavily_images_safe(query: str) -> List[Dict[str, str]]:
|
| 143 |
+
"""Safe wrapper around Tavily image search."""
|
| 144 |
+
try:
|
| 145 |
+
return image_search.search(query, count=6)
|
| 146 |
+
except Exception as e:
|
| 147 |
+
print("Tavily image search error:", e)
|
| 148 |
+
return []
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def convert_links(results: List[Dict]) -> List[Dict[str, str]]:
|
| 152 |
+
"""Convert Tavily web search results to link objects."""
|
| 153 |
+
links = []
|
| 154 |
+
for r in results:
|
| 155 |
+
url = r.get("url")
|
| 156 |
+
if not url:
|
| 157 |
+
continue
|
| 158 |
+
links.append(
|
| 159 |
+
{
|
| 160 |
+
"title": r.get("title", "Result"),
|
| 161 |
+
"url": url,
|
| 162 |
+
"snippet": (r.get("content") or "")[:200],
|
| 163 |
+
}
|
| 164 |
+
)
|
| 165 |
+
return links
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
# =======================================================
|
| 169 |
+
# Chat Endpoint
|
| 170 |
+
# =======================================================
|
| 171 |
+
@app.post("/api/chat", response_model=ChatResponse)
|
| 172 |
+
def chat(req: ChatRequest):
|
| 173 |
+
|
| 174 |
+
q = req.message.strip()
|
| 175 |
+
ws = req.workspace_id
|
| 176 |
+
|
| 177 |
+
memory.add(ws, "user", q)
|
| 178 |
+
|
| 179 |
+
# -------- Name memory special cases --------
|
| 180 |
+
extracted = name_tool.extract_name(q)
|
| 181 |
+
if extracted:
|
| 182 |
+
memory.set_name(ws, extracted)
|
| 183 |
+
reply = f"Nice to meet you, {extracted}! I’ll remember your name."
|
| 184 |
+
memory.add(ws, "assistant", reply)
|
| 185 |
+
return ChatResponse(answer=reply, workspace_id=ws)
|
| 186 |
+
|
| 187 |
+
if q.lower() in ["tell me my name", "what is my name"]:
|
| 188 |
+
nm = memory.get_name(ws)
|
| 189 |
+
ans = f"Your name is {nm} 😊" if nm else "You haven’t told me your name yet."
|
| 190 |
+
memory.add(ws, "assistant", ans)
|
| 191 |
+
return ChatResponse(answer=ans, workspace_id=ws)
|
| 192 |
+
|
| 193 |
+
# -------- Routing --------
|
| 194 |
+
mode = router.route(q)
|
| 195 |
+
default_tab = guess_default_tab(q, mode)
|
| 196 |
+
|
| 197 |
+
answer = ""
|
| 198 |
+
links: List[Dict[str, str]] = []
|
| 199 |
+
sources: List[Dict[str, str]] = []
|
| 200 |
+
follow: List[str] = []
|
| 201 |
+
|
| 202 |
+
# -------- LLM Mode (chat / creative / small talk) --------
|
| 203 |
+
if mode == "llm":
|
| 204 |
+
msgs = build_context(ws, q)
|
| 205 |
+
answer = llm.invoke(msgs).content
|
| 206 |
+
follow = followup.generate(answer, q)
|
| 207 |
+
|
| 208 |
+
# Optional small set of links
|
| 209 |
+
try:
|
| 210 |
+
res = search_tool.search(q, num_results=3)
|
| 211 |
+
links = convert_links(res)
|
| 212 |
+
except Exception as e:
|
| 213 |
+
print("search error (llm mode):", e)
|
| 214 |
+
|
| 215 |
+
# -------- Image Mode (image search queries) --------
|
| 216 |
+
elif mode == "image":
|
| 217 |
+
# For image queries, provide brief context + focus on images tab
|
| 218 |
+
try:
|
| 219 |
+
res = search_tool.search(q, num_results=3)
|
| 220 |
+
ctx = res[0].get("snippet", "") if res else ""
|
| 221 |
+
answer = f"Here are images related to '{q}'."
|
| 222 |
+
if ctx:
|
| 223 |
+
answer += f"\n\n{ctx}"
|
| 224 |
+
links = convert_links(res)
|
| 225 |
+
except Exception as e:
|
| 226 |
+
print("search error (image mode):", e)
|
| 227 |
+
answer = f"Showing images for: {q}"
|
| 228 |
+
|
| 229 |
+
follow = []
|
| 230 |
+
|
| 231 |
+
# -------- AGENTIC RAG Mode (files + web + images + knowledge) --------
|
| 232 |
+
elif mode == "rag":
|
| 233 |
+
# PLANNER AGENT: Decide which agents to activate
|
| 234 |
+
q_lower = q.lower()
|
| 235 |
+
|
| 236 |
+
use_file_rag = any(
|
| 237 |
+
w in q_lower for w in [
|
| 238 |
+
"summarize", "according to", "in this pdf", "in the document",
|
| 239 |
+
"based on the file", "read my", "extract from", "uploaded",
|
| 240 |
+
"this file", "the file", "my file", "from file"
|
| 241 |
+
]
|
| 242 |
+
) or len(q.split()) > 2 # multi-word questions likely need file RAG
|
| 243 |
+
|
| 244 |
+
use_web = any(
|
| 245 |
+
w in q_lower for w in [
|
| 246 |
+
"today", "latest", "current", "news", "stock", "price",
|
| 247 |
+
"real-time", "weather", "who is", "what is", "where is",
|
| 248 |
+
"when", "how much", "compare"
|
| 249 |
+
]
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
use_images = any(
|
| 253 |
+
w in q_lower for w in [
|
| 254 |
+
"image", "images", "logo", "flag", "photos", "look like",
|
| 255 |
+
"picture", "show me", "wallpaper", "screenshot"
|
| 256 |
+
]
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
# FILE AGENT: Retrieve from workspace uploaded docs
|
| 260 |
+
ws_obj = file_manager.get_workspace(ws)
|
| 261 |
+
file_chunks = []
|
| 262 |
+
if use_file_rag and ws_obj.initialized:
|
| 263 |
+
file_chunks = ws_obj.retrieve(q, k=6)
|
| 264 |
+
|
| 265 |
+
# REFERENCE AGENT: Retrieve from base vector store (demo docs)
|
| 266 |
+
base_chunks = vector.retrieve(q, k=4)
|
| 267 |
+
base_chunks = reranker.rerank(q, base_chunks, top_k=3)
|
| 268 |
+
|
| 269 |
+
# WEB AGENT: Fetch live web content
|
| 270 |
+
web_pages = []
|
| 271 |
+
web_results = []
|
| 272 |
+
if use_web:
|
| 273 |
+
try:
|
| 274 |
+
web_results = search_tool.search(q, num_results=4)
|
| 275 |
+
for r in web_results:
|
| 276 |
+
url = r.get("url")
|
| 277 |
+
if not url:
|
| 278 |
+
continue
|
| 279 |
+
text = browse_tool.fetch_clean(url)
|
| 280 |
+
if text:
|
| 281 |
+
web_pages.append({
|
| 282 |
+
"title": r.get("title", ""),
|
| 283 |
+
"url": url,
|
| 284 |
+
"content": text[:1500] # Speed optimization
|
| 285 |
+
})
|
| 286 |
+
except Exception as e:
|
| 287 |
+
print(f"Web agent error: {e}")
|
| 288 |
+
|
| 289 |
+
# IMAGE AGENT: Fetch relevant images
|
| 290 |
+
images_result = tavily_images_safe(q) if use_images else []
|
| 291 |
+
|
| 292 |
+
# BUILD COMBINED CONTEXT
|
| 293 |
+
contexts = []
|
| 294 |
+
|
| 295 |
+
if file_chunks:
|
| 296 |
+
file_ctx = "\n\n".join(d.page_content for d in file_chunks)
|
| 297 |
+
contexts.append(f"📄 FILE CONTEXT (from uploaded documents):\n{file_ctx}")
|
| 298 |
+
|
| 299 |
+
if base_chunks:
|
| 300 |
+
ref_ctx = "\n\n".join(d.page_content for d in base_chunks)
|
| 301 |
+
contexts.append(f"📚 REFERENCE CONTEXT:\n{ref_ctx}")
|
| 302 |
+
|
| 303 |
+
if web_pages:
|
| 304 |
+
web_ctx = "\n\n".join(f"[{p['title']}]: {p['content']}" for p in web_pages)
|
| 305 |
+
contexts.append(f"🌐 WEB CONTEXT (live web data):\n{web_ctx}")
|
| 306 |
+
|
| 307 |
+
full_context = "\n\n-----\n\n".join(contexts) if contexts else "No context available."
|
| 308 |
+
|
| 309 |
+
# SYNTHESIZER AGENT: Generate final answer
|
| 310 |
+
synth_prompt = f"""You are an AGENTIC RAG synthesis model like Perplexity AI.
|
| 311 |
+
Combine information from FILE CONTEXT, REFERENCE CONTEXT and WEB CONTEXT.
|
| 312 |
+
|
| 313 |
+
RULES:
|
| 314 |
+
1. PRIORITIZE info from FILE CONTEXT (user's uploaded documents) when available.
|
| 315 |
+
2. Use WEB CONTEXT to add current/live information.
|
| 316 |
+
3. Use REFERENCE CONTEXT for background knowledge.
|
| 317 |
+
4. Cite sources using [1], [2], etc. when referencing specific info.
|
| 318 |
+
5. If answering from a file, say "According to your uploaded document..."
|
| 319 |
+
6. Do NOT hallucinate - only use info from the provided contexts.
|
| 320 |
+
7. Be concise but comprehensive.
|
| 321 |
+
|
| 322 |
+
AVAILABLE CONTEXT:
|
| 323 |
+
{full_context}
|
| 324 |
+
|
| 325 |
+
USER QUESTION: {q}
|
| 326 |
+
|
| 327 |
+
FINAL ANSWER:"""
|
| 328 |
+
|
| 329 |
+
msgs = build_context(ws, synth_prompt)
|
| 330 |
+
answer = llm.invoke(msgs).content
|
| 331 |
+
follow = followup.generate(answer, q)
|
| 332 |
+
|
| 333 |
+
# BUILD SOURCES
|
| 334 |
+
sources = []
|
| 335 |
+
if file_chunks:
|
| 336 |
+
for d in file_chunks:
|
| 337 |
+
sources.append({
|
| 338 |
+
"title": d.metadata.get("source", "📄 Uploaded File"),
|
| 339 |
+
"url": d.metadata.get("file_path", "")
|
| 340 |
+
})
|
| 341 |
+
|
| 342 |
+
if web_pages:
|
| 343 |
+
for p in web_pages:
|
| 344 |
+
sources.append({"title": p["title"], "url": p["url"]})
|
| 345 |
+
|
| 346 |
+
# BUILD LINKS
|
| 347 |
+
links = convert_links(web_results)
|
| 348 |
+
|
| 349 |
+
# Set images from image agent
|
| 350 |
+
if images_result:
|
| 351 |
+
# Will be set at the end with tavily_images_safe
|
| 352 |
+
pass
|
| 353 |
+
|
| 354 |
+
# -------- Web Mode (real-time / entities / news) --------
|
| 355 |
+
elif mode == "web":
|
| 356 |
+
res = search_tool.search(q, num_results=5)
|
| 357 |
+
|
| 358 |
+
pages = []
|
| 359 |
+
for r in res:
|
| 360 |
+
url = r.get("url")
|
| 361 |
+
if not url:
|
| 362 |
+
continue
|
| 363 |
+
text = browse_tool.fetch_clean(url)
|
| 364 |
+
if not text:
|
| 365 |
+
continue
|
| 366 |
+
pages.append(
|
| 367 |
+
{
|
| 368 |
+
"title": r.get("title", "Webpage"),
|
| 369 |
+
"url": url,
|
| 370 |
+
"content": text[:2000],
|
| 371 |
+
}
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
ctx = "\n\n".join(p["content"] for p in pages)
|
| 375 |
+
prompt = (
|
| 376 |
+
"Use ONLY the following web content to answer. "
|
| 377 |
+
"Cite sources using [1], [2], etc.\n\n"
|
| 378 |
+
f"{ctx}\n\nQuestion: {q}"
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
msgs = build_context(ws, prompt)
|
| 382 |
+
answer = llm.invoke(msgs).content
|
| 383 |
+
follow = followup.generate(answer, q)
|
| 384 |
+
|
| 385 |
+
links = [
|
| 386 |
+
{
|
| 387 |
+
"title": p["title"],
|
| 388 |
+
"url": p["url"],
|
| 389 |
+
"snippet": p["content"][:200],
|
| 390 |
+
}
|
| 391 |
+
for p in pages
|
| 392 |
+
]
|
| 393 |
+
|
| 394 |
+
sources = [{"title": p["title"], "url": p["url"]} for p in pages]
|
| 395 |
+
|
| 396 |
+
# -------- Fallback → LLM --------
|
| 397 |
+
else:
|
| 398 |
+
msgs = build_context(ws, q)
|
| 399 |
+
answer = llm.invoke(msgs).content
|
| 400 |
+
follow = followup.generate(answer, q)
|
| 401 |
+
|
| 402 |
+
# -------- Images (for Images tab) --------
|
| 403 |
+
images = tavily_images_safe(q)
|
| 404 |
+
|
| 405 |
+
# Debug logging
|
| 406 |
+
print(f"\n=== API Response Debug ===")
|
| 407 |
+
print(f"Mode: {mode}")
|
| 408 |
+
print(f"Links count: {len(links)}")
|
| 409 |
+
print(f"Images count: {len(images)}")
|
| 410 |
+
print(f"Sources count: {len(sources)}")
|
| 411 |
+
if links:
|
| 412 |
+
print(f"First link: {links[0]}")
|
| 413 |
+
if images:
|
| 414 |
+
print(f"First image: {images[0]}")
|
| 415 |
+
print(f"========================\n")
|
| 416 |
+
|
| 417 |
+
memory.add(ws, "assistant", answer)
|
| 418 |
+
|
| 419 |
+
return ChatResponse(
|
| 420 |
+
answer=answer,
|
| 421 |
+
sources=sources,
|
| 422 |
+
links=links,
|
| 423 |
+
images=images,
|
| 424 |
+
followups=follow,
|
| 425 |
+
default_tab=default_tab,
|
| 426 |
+
workspace_id=ws,
|
| 427 |
+
)
|
| 428 |
+
|
| 429 |
+
|
| 430 |
+
# =======================================================
|
| 431 |
+
# Streaming Endpoint
|
| 432 |
+
# =======================================================
|
| 433 |
+
@app.post("/api/chat/stream")
|
| 434 |
+
def chat_stream(req: ChatRequest):
|
| 435 |
+
|
| 436 |
+
q = req.message
|
| 437 |
+
ws = req.workspace_id
|
| 438 |
+
memory.add(ws, "user", q)
|
| 439 |
+
|
| 440 |
+
msgs = build_context(ws, q)
|
| 441 |
+
|
| 442 |
+
def generate():
|
| 443 |
+
full = ""
|
| 444 |
+
for chunk in llm.stream(msgs):
|
| 445 |
+
tok = getattr(chunk, "content", "")
|
| 446 |
+
if tok:
|
| 447 |
+
full += tok
|
| 448 |
+
yield tok
|
| 449 |
+
memory.add(ws, "assistant", full)
|
| 450 |
+
|
| 451 |
+
return StreamingResponse(generate(), media_type="text/plain")
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
# =======================================================
|
| 455 |
+
# Deep Research Endpoint
|
| 456 |
+
# =======================================================
|
| 457 |
+
@app.post("/api/deep_research", response_model=ChatResponse)
|
| 458 |
+
def deep_research(req: ChatRequest):
|
| 459 |
+
|
| 460 |
+
q = req.message
|
| 461 |
+
ws = req.workspace_id
|
| 462 |
+
|
| 463 |
+
memory.add(ws, "user", q)
|
| 464 |
+
|
| 465 |
+
try:
|
| 466 |
+
state = deep_graph.run(q)
|
| 467 |
+
# state is a dict (TypedDict), not an object
|
| 468 |
+
answer = state.get("final_answer", "No answer generated.")
|
| 469 |
+
sources = state.get("sources", [])
|
| 470 |
+
except Exception as e:
|
| 471 |
+
print("Deep research error:", e)
|
| 472 |
+
answer = "Something went wrong in deep research mode."
|
| 473 |
+
sources = []
|
| 474 |
+
|
| 475 |
+
memory.add(ws, "assistant", answer)
|
| 476 |
+
images = tavily_images_safe(q)
|
| 477 |
+
follow = followup.generate(answer, q)
|
| 478 |
+
|
| 479 |
+
return ChatResponse(
|
| 480 |
+
answer=answer,
|
| 481 |
+
sources=sources,
|
| 482 |
+
links=[],
|
| 483 |
+
images=images,
|
| 484 |
+
followups=follow,
|
| 485 |
+
default_tab="answer",
|
| 486 |
+
workspace_id=ws,
|
| 487 |
+
)
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
# =======================================================
|
| 491 |
+
# Knowledge Panel Endpoint
|
| 492 |
+
# =======================================================
|
| 493 |
+
@app.get("/api/knowledge_panel")
|
| 494 |
+
def get_knowledge_panel(q: str):
|
| 495 |
+
"""
|
| 496 |
+
Returns Wikipedia-style infobox + AI-generated facts.
|
| 497 |
+
Used by UI to render a sidebar knowledge card.
|
| 498 |
+
"""
|
| 499 |
+
try:
|
| 500 |
+
panel = knowledge_panel.build_panel(q)
|
| 501 |
+
return panel
|
| 502 |
+
except Exception as e:
|
| 503 |
+
print("Knowledge panel error:", e)
|
| 504 |
+
return {"wiki": {}, "facts": []}
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
# =======================================================
|
| 508 |
+
# FILE UPLOAD (PDF / TXT / PPTX) - Perplexity Spaces Feature
|
| 509 |
+
# =======================================================
|
| 510 |
+
@app.post("/api/upload_docs")
|
| 511 |
+
async def upload_docs(
|
| 512 |
+
workspace_id: str = Form("default"),
|
| 513 |
+
files: List[UploadFile] = File(...)
|
| 514 |
+
):
|
| 515 |
+
"""
|
| 516 |
+
Upload one or more documents and index them for this workspace.
|
| 517 |
+
Supports PDF, TXT, MD, PPT, PPTX files.
|
| 518 |
+
"""
|
| 519 |
+
ws = file_manager.get_workspace(workspace_id)
|
| 520 |
+
saved_paths = []
|
| 521 |
+
|
| 522 |
+
for f in files:
|
| 523 |
+
ext = Path(f.filename).suffix.lower()
|
| 524 |
+
if ext not in [".pdf", ".txt", ".md", ".ppt", ".pptx"]:
|
| 525 |
+
continue # skip unsupported types
|
| 526 |
+
|
| 527 |
+
dest = Path(ws.base_dir) / f.filename
|
| 528 |
+
with open(dest, "wb") as out:
|
| 529 |
+
content = await f.read()
|
| 530 |
+
out.write(content)
|
| 531 |
+
saved_paths.append(dest)
|
| 532 |
+
|
| 533 |
+
if saved_paths:
|
| 534 |
+
ws.add_files(saved_paths)
|
| 535 |
+
print(f"✅ Indexed {len(saved_paths)} files for workspace '{workspace_id}'")
|
| 536 |
+
|
| 537 |
+
return {
|
| 538 |
+
"workspace_id": workspace_id,
|
| 539 |
+
"files": ws.files,
|
| 540 |
+
"count": len(ws.files),
|
| 541 |
+
"message": f"Successfully indexed {len(saved_paths)} files"
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
|
| 545 |
+
@app.get("/api/workspace_files/{workspace_id}")
|
| 546 |
+
def get_workspace_files(workspace_id: str):
|
| 547 |
+
"""Get list of files uploaded to a workspace."""
|
| 548 |
+
ws = file_manager.get_workspace(workspace_id)
|
| 549 |
+
return {
|
| 550 |
+
"workspace_id": workspace_id,
|
| 551 |
+
"files": ws.files,
|
| 552 |
+
"initialized": ws.initialized
|
| 553 |
+
}
|
| 554 |
+
|
| 555 |
+
|
| 556 |
+
@app.delete("/api/workspace/{workspace_id}")
|
| 557 |
+
def clear_workspace(workspace_id: str):
|
| 558 |
+
"""Clear all files from a workspace."""
|
| 559 |
+
file_manager.clear_workspace(workspace_id)
|
| 560 |
+
return {"message": f"Workspace '{workspace_id}' cleared"}
|
| 561 |
+
|
| 562 |
+
|
| 563 |
+
# =======================================================
|
| 564 |
+
# MODE-SPECIFIC ENDPOINTS
|
| 565 |
+
# =======================================================
|
| 566 |
+
|
| 567 |
+
class ModeRequest(BaseModel):
|
| 568 |
+
message: str
|
| 569 |
+
workspace_id: str = "default"
|
| 570 |
+
mode: str = "auto"
|
| 571 |
+
|
| 572 |
+
|
| 573 |
+
@app.post("/api/focus", response_model=ChatResponse)
|
| 574 |
+
def focus_mode(req: ModeRequest):
|
| 575 |
+
"""Focus mode - concise, direct answers without web search."""
|
| 576 |
+
q = req.message.strip()
|
| 577 |
+
ws = req.workspace_id
|
| 578 |
+
|
| 579 |
+
memory.add(ws, "user", q)
|
| 580 |
+
|
| 581 |
+
prompt = f"""You are in FOCUS mode. Provide a concise, direct answer.
|
| 582 |
+
- No unnecessary elaboration
|
| 583 |
+
- Get straight to the point
|
| 584 |
+
- Use bullet points if helpful
|
| 585 |
+
- Be accurate and helpful
|
| 586 |
+
|
| 587 |
+
Question: {q}
|
| 588 |
+
|
| 589 |
+
Answer:"""
|
| 590 |
+
|
| 591 |
+
msgs = build_context(ws, prompt)
|
| 592 |
+
answer = llm.invoke(msgs).content
|
| 593 |
+
follow = followup.generate(answer, q)
|
| 594 |
+
|
| 595 |
+
memory.add(ws, "assistant", answer)
|
| 596 |
+
|
| 597 |
+
return ChatResponse(
|
| 598 |
+
answer=answer,
|
| 599 |
+
sources=[],
|
| 600 |
+
links=[],
|
| 601 |
+
images=[],
|
| 602 |
+
followups=follow,
|
| 603 |
+
default_tab="answer",
|
| 604 |
+
workspace_id=ws
|
| 605 |
+
)
|
| 606 |
+
|
| 607 |
+
|
| 608 |
+
@app.post("/api/writing", response_model=ChatResponse)
|
| 609 |
+
def writing_mode(req: ModeRequest):
|
| 610 |
+
"""Writing mode - creative writing, essays, content generation."""
|
| 611 |
+
q = req.message.strip()
|
| 612 |
+
ws = req.workspace_id
|
| 613 |
+
|
| 614 |
+
memory.add(ws, "user", q)
|
| 615 |
+
|
| 616 |
+
prompt = f"""You are in WRITING mode - a creative writing assistant.
|
| 617 |
+
Help with:
|
| 618 |
+
- Essays, articles, blog posts
|
| 619 |
+
- Creative writing, stories
|
| 620 |
+
- Professional emails and documents
|
| 621 |
+
- Content improvement and editing
|
| 622 |
+
- Grammar and style suggestions
|
| 623 |
+
|
| 624 |
+
Be creative, engaging, and helpful. Format your response well.
|
| 625 |
+
|
| 626 |
+
Request: {q}
|
| 627 |
+
|
| 628 |
+
Response:"""
|
| 629 |
+
|
| 630 |
+
msgs = build_context(ws, prompt)
|
| 631 |
+
answer = llm.invoke(msgs).content
|
| 632 |
+
follow = followup.generate(answer, q)
|
| 633 |
+
|
| 634 |
+
memory.add(ws, "assistant", answer)
|
| 635 |
+
|
| 636 |
+
return ChatResponse(
|
| 637 |
+
answer=answer,
|
| 638 |
+
sources=[],
|
| 639 |
+
links=[],
|
| 640 |
+
images=[],
|
| 641 |
+
followups=follow,
|
| 642 |
+
default_tab="answer",
|
| 643 |
+
workspace_id=ws
|
| 644 |
+
)
|
| 645 |
+
|
| 646 |
+
|
| 647 |
+
@app.post("/api/math", response_model=ChatResponse)
|
| 648 |
+
def math_mode(req: ModeRequest):
|
| 649 |
+
"""Math mode - mathematical calculations and explanations."""
|
| 650 |
+
q = req.message.strip()
|
| 651 |
+
ws = req.workspace_id
|
| 652 |
+
|
| 653 |
+
memory.add(ws, "user", q)
|
| 654 |
+
|
| 655 |
+
prompt = f"""You are in MATH mode - a mathematical assistant.
|
| 656 |
+
- Solve mathematical problems step by step
|
| 657 |
+
- Show all work and calculations
|
| 658 |
+
- Explain the reasoning
|
| 659 |
+
- Use proper mathematical notation
|
| 660 |
+
- Handle algebra, calculus, statistics, geometry, etc.
|
| 661 |
+
|
| 662 |
+
Problem: {q}
|
| 663 |
+
|
| 664 |
+
Solution:"""
|
| 665 |
+
|
| 666 |
+
msgs = build_context(ws, prompt)
|
| 667 |
+
answer = llm.invoke(msgs).content
|
| 668 |
+
follow = followup.generate(answer, q)
|
| 669 |
+
|
| 670 |
+
memory.add(ws, "assistant", answer)
|
| 671 |
+
|
| 672 |
+
return ChatResponse(
|
| 673 |
+
answer=answer,
|
| 674 |
+
sources=[],
|
| 675 |
+
links=[],
|
| 676 |
+
images=[],
|
| 677 |
+
followups=follow,
|
| 678 |
+
default_tab="answer",
|
| 679 |
+
workspace_id=ws
|
| 680 |
+
)
|
| 681 |
+
|
| 682 |
+
|
| 683 |
+
@app.post("/api/code", response_model=ChatResponse)
|
| 684 |
+
def code_mode(req: ModeRequest):
|
| 685 |
+
"""Code mode - programming help and code generation."""
|
| 686 |
+
q = req.message.strip()
|
| 687 |
+
ws = req.workspace_id
|
| 688 |
+
|
| 689 |
+
memory.add(ws, "user", q)
|
| 690 |
+
|
| 691 |
+
prompt = f"""You are in CODE mode - an expert programming assistant.
|
| 692 |
+
- Write clean, efficient, well-commented code
|
| 693 |
+
- Explain the code logic
|
| 694 |
+
- Follow best practices
|
| 695 |
+
- Handle any programming language
|
| 696 |
+
- Debug and fix code issues
|
| 697 |
+
- Suggest improvements
|
| 698 |
+
|
| 699 |
+
Request: {q}
|
| 700 |
+
|
| 701 |
+
Response:"""
|
| 702 |
+
|
| 703 |
+
msgs = build_context(ws, prompt)
|
| 704 |
+
answer = llm.invoke(msgs).content
|
| 705 |
+
follow = followup.generate(answer, q)
|
| 706 |
+
|
| 707 |
+
memory.add(ws, "assistant", answer)
|
| 708 |
+
|
| 709 |
+
return ChatResponse(
|
| 710 |
+
answer=answer,
|
| 711 |
+
sources=[],
|
| 712 |
+
links=[],
|
| 713 |
+
images=[],
|
| 714 |
+
followups=follow,
|
| 715 |
+
default_tab="answer",
|
| 716 |
+
workspace_id=ws
|
| 717 |
+
)
|
| 718 |
+
|
| 719 |
+
|
| 720 |
+
@app.post("/api/analyze", response_model=ChatResponse)
|
| 721 |
+
def analyze_mode(req: ModeRequest):
|
| 722 |
+
"""
|
| 723 |
+
Analysis mode - deep analysis with web research.
|
| 724 |
+
Production-level LangGraph implementation.
|
| 725 |
+
"""
|
| 726 |
+
q = req.message.strip()
|
| 727 |
+
ws = req.workspace_id
|
| 728 |
+
|
| 729 |
+
memory.add(ws, "user", q)
|
| 730 |
+
|
| 731 |
+
# Run the AnalysisGraph pipeline
|
| 732 |
+
state = analysis_graph.run(q)
|
| 733 |
+
|
| 734 |
+
answer = state.get("answer", "No analysis generated.")
|
| 735 |
+
sources = state.get("sources", [])
|
| 736 |
+
links = state.get("links", [])
|
| 737 |
+
follow = state.get("followups", [])
|
| 738 |
+
|
| 739 |
+
# Get related images
|
| 740 |
+
images = tavily_images_safe(q)
|
| 741 |
+
|
| 742 |
+
memory.add(ws, "assistant", answer)
|
| 743 |
+
|
| 744 |
+
return ChatResponse(
|
| 745 |
+
answer=answer,
|
| 746 |
+
sources=sources,
|
| 747 |
+
links=links,
|
| 748 |
+
images=images,
|
| 749 |
+
followups=follow,
|
| 750 |
+
default_tab="answer",
|
| 751 |
+
workspace_id=ws
|
| 752 |
+
)
|
| 753 |
+
|
| 754 |
+
|
| 755 |
+
@app.post("/api/summarize", response_model=ChatResponse)
|
| 756 |
+
def summarize_mode(req: ModeRequest):
|
| 757 |
+
"""
|
| 758 |
+
Summarize mode - summarize uploaded documents OR web content.
|
| 759 |
+
Prioritizes uploaded files, then falls back to web search.
|
| 760 |
+
"""
|
| 761 |
+
q = req.message.strip()
|
| 762 |
+
ws = req.workspace_id
|
| 763 |
+
|
| 764 |
+
memory.add(ws, "user", q)
|
| 765 |
+
|
| 766 |
+
# STEP 1: Check for uploaded files first
|
| 767 |
+
ws_obj = file_manager.get_workspace(ws)
|
| 768 |
+
|
| 769 |
+
if ws_obj.initialized and ws_obj.files:
|
| 770 |
+
# Summarize from uploaded files
|
| 771 |
+
print(f"📝 SUMMARIZE MODE: Using uploaded files")
|
| 772 |
+
try:
|
| 773 |
+
# Retrieve relevant chunks from files
|
| 774 |
+
chunks = ws_obj.retrieve(q, k=10)
|
| 775 |
+
if chunks:
|
| 776 |
+
# Combine chunk content for summarization
|
| 777 |
+
content = "\n\n".join([c.page_content for c in chunks])
|
| 778 |
+
|
| 779 |
+
# Generate summary
|
| 780 |
+
summary = summarizer.summarize(content, max_words=400)
|
| 781 |
+
|
| 782 |
+
# Build sources from files
|
| 783 |
+
seen_files = set()
|
| 784 |
+
sources = []
|
| 785 |
+
for c in chunks:
|
| 786 |
+
fname = c.metadata.get("source", "Document")
|
| 787 |
+
if fname not in seen_files:
|
| 788 |
+
sources.append({"title": f"📄 {fname}", "url": ""})
|
| 789 |
+
seen_files.add(fname)
|
| 790 |
+
|
| 791 |
+
follow = followup.generate(summary, q)
|
| 792 |
+
|
| 793 |
+
memory.add(ws, "assistant", summary)
|
| 794 |
+
|
| 795 |
+
return ChatResponse(
|
| 796 |
+
answer=summary,
|
| 797 |
+
sources=sources,
|
| 798 |
+
links=[],
|
| 799 |
+
images=[],
|
| 800 |
+
followups=follow,
|
| 801 |
+
default_tab="answer",
|
| 802 |
+
workspace_id=ws
|
| 803 |
+
)
|
| 804 |
+
except Exception as e:
|
| 805 |
+
print(f" ❌ File summarize error: {e}")
|
| 806 |
+
|
| 807 |
+
# STEP 2: Check if it's a URL
|
| 808 |
+
if q.startswith("http"):
|
| 809 |
+
print(f"📝 SUMMARIZE MODE: URL detected")
|
| 810 |
+
try:
|
| 811 |
+
content = browse_tool.fetch_clean(q)
|
| 812 |
+
if content:
|
| 813 |
+
summary = summarizer.summarize(content, max_words=400)
|
| 814 |
+
sources = [{"title": "Source URL", "url": q}]
|
| 815 |
+
links = [{"title": "Source", "url": q, "snippet": content[:200]}]
|
| 816 |
+
follow = followup.generate(summary, q)
|
| 817 |
+
|
| 818 |
+
memory.add(ws, "assistant", summary)
|
| 819 |
+
|
| 820 |
+
return ChatResponse(
|
| 821 |
+
answer=summary,
|
| 822 |
+
sources=sources,
|
| 823 |
+
links=links,
|
| 824 |
+
images=[],
|
| 825 |
+
followups=follow,
|
| 826 |
+
default_tab="answer",
|
| 827 |
+
workspace_id=ws
|
| 828 |
+
)
|
| 829 |
+
except Exception as e:
|
| 830 |
+
print(f" ❌ URL fetch error: {e}")
|
| 831 |
+
|
| 832 |
+
# STEP 3: Fall back to web search and summarize
|
| 833 |
+
print(f"📝 SUMMARIZE MODE: Web search fallback")
|
| 834 |
+
try:
|
| 835 |
+
results = search_tool.search(q, num_results=3)
|
| 836 |
+
content_parts = []
|
| 837 |
+
links = []
|
| 838 |
+
|
| 839 |
+
for r in results:
|
| 840 |
+
url = r.get("url", "")
|
| 841 |
+
title = r.get("title", "")
|
| 842 |
+
text = browse_tool.fetch_clean(url)
|
| 843 |
+
if text:
|
| 844 |
+
content_parts.append(text[:1500])
|
| 845 |
+
links.append({"title": title, "url": url, "snippet": text[:150]})
|
| 846 |
+
|
| 847 |
+
if content_parts:
|
| 848 |
+
combined = "\n\n".join(content_parts)
|
| 849 |
+
summary = summarizer.summarize(combined, max_words=400)
|
| 850 |
+
else:
|
| 851 |
+
summary = "Could not find content to summarize."
|
| 852 |
+
|
| 853 |
+
sources = [{"title": l["title"], "url": l["url"]} for l in links]
|
| 854 |
+
follow = followup.generate(summary, q)
|
| 855 |
+
|
| 856 |
+
memory.add(ws, "assistant", summary)
|
| 857 |
+
|
| 858 |
+
return ChatResponse(
|
| 859 |
+
answer=summary,
|
| 860 |
+
sources=sources,
|
| 861 |
+
links=links,
|
| 862 |
+
images=[],
|
| 863 |
+
followups=follow,
|
| 864 |
+
default_tab="answer",
|
| 865 |
+
workspace_id=ws
|
| 866 |
+
)
|
| 867 |
+
except Exception as e:
|
| 868 |
+
print(f" ❌ Summarize error: {e}")
|
| 869 |
+
return ChatResponse(
|
| 870 |
+
answer=f"Error generating summary: {str(e)}",
|
| 871 |
+
sources=[],
|
| 872 |
+
links=[],
|
| 873 |
+
images=[],
|
| 874 |
+
followups=[],
|
| 875 |
+
default_tab="answer",
|
| 876 |
+
workspace_id=ws
|
| 877 |
+
)
|
| 878 |
+
|
| 879 |
+
|
| 880 |
+
# =======================================================
|
| 881 |
+
# PRODUCTION-LEVEL MODE ENDPOINTS
|
| 882 |
+
# =======================================================
|
| 883 |
+
|
| 884 |
+
@app.post("/api/web", response_model=ChatResponse)
|
| 885 |
+
def web_search_mode(req: ModeRequest):
|
| 886 |
+
"""
|
| 887 |
+
Web Search Mode - Real-time web search with source citations.
|
| 888 |
+
Production-level LangGraph implementation.
|
| 889 |
+
"""
|
| 890 |
+
q = req.message.strip()
|
| 891 |
+
ws = req.workspace_id
|
| 892 |
+
|
| 893 |
+
memory.add(ws, "user", q)
|
| 894 |
+
|
| 895 |
+
# Run the WebSearchGraph pipeline
|
| 896 |
+
state = web_graph.run(q)
|
| 897 |
+
|
| 898 |
+
answer = state.get("answer", "No answer generated.")
|
| 899 |
+
sources = state.get("sources", [])
|
| 900 |
+
links = state.get("links", [])
|
| 901 |
+
follow = state.get("followups", [])
|
| 902 |
+
|
| 903 |
+
# Get images separately
|
| 904 |
+
images = tavily_images_safe(q)
|
| 905 |
+
|
| 906 |
+
memory.add(ws, "assistant", answer)
|
| 907 |
+
|
| 908 |
+
return ChatResponse(
|
| 909 |
+
answer=answer,
|
| 910 |
+
sources=sources,
|
| 911 |
+
links=links,
|
| 912 |
+
images=images,
|
| 913 |
+
followups=follow,
|
| 914 |
+
default_tab="answer",
|
| 915 |
+
workspace_id=ws
|
| 916 |
+
)
|
| 917 |
+
|
| 918 |
+
|
| 919 |
+
@app.post("/api/rag", response_model=ChatResponse)
|
| 920 |
+
def rag_mode(req: ModeRequest):
|
| 921 |
+
"""
|
| 922 |
+
RAG Mode - Search uploaded documents only.
|
| 923 |
+
Production-level LangGraph implementation.
|
| 924 |
+
"""
|
| 925 |
+
q = req.message.strip()
|
| 926 |
+
ws = req.workspace_id
|
| 927 |
+
|
| 928 |
+
memory.add(ws, "user", q)
|
| 929 |
+
|
| 930 |
+
# Run the RAGOnlyGraph pipeline
|
| 931 |
+
state = rag_graph.run(q, ws)
|
| 932 |
+
|
| 933 |
+
answer = state.get("answer", "No answer generated.")
|
| 934 |
+
sources = state.get("sources", [])
|
| 935 |
+
follow = state.get("followups", [])
|
| 936 |
+
|
| 937 |
+
memory.add(ws, "assistant", answer)
|
| 938 |
+
|
| 939 |
+
return ChatResponse(
|
| 940 |
+
answer=answer,
|
| 941 |
+
sources=sources,
|
| 942 |
+
links=[],
|
| 943 |
+
images=[],
|
| 944 |
+
followups=follow,
|
| 945 |
+
default_tab="answer",
|
| 946 |
+
workspace_id=ws
|
| 947 |
+
)
|
| 948 |
+
|
| 949 |
+
|
| 950 |
+
@app.post("/api/agentic", response_model=ChatResponse)
|
| 951 |
+
def agentic_mode(req: ModeRequest):
|
| 952 |
+
"""
|
| 953 |
+
Agentic Mode - Multi-agent RAG with Planner, File, Web, Knowledge, Image, Synthesizer.
|
| 954 |
+
Production-level LangGraph implementation.
|
| 955 |
+
"""
|
| 956 |
+
q = req.message.strip()
|
| 957 |
+
ws = req.workspace_id
|
| 958 |
+
|
| 959 |
+
memory.add(ws, "user", q)
|
| 960 |
+
print(f"\n🤖 AGENTIC MODE (LangGraph): {q}")
|
| 961 |
+
|
| 962 |
+
# Run the AgenticRAGGraph pipeline
|
| 963 |
+
state = agentic_graph.run(q, ws)
|
| 964 |
+
|
| 965 |
+
answer = state.get("answer", "No answer generated.")
|
| 966 |
+
sources = state.get("sources", [])
|
| 967 |
+
links = state.get("links", [])
|
| 968 |
+
images = state.get("images", [])
|
| 969 |
+
follow = state.get("followups", [])
|
| 970 |
+
|
| 971 |
+
memory.add(ws, "assistant", answer)
|
| 972 |
+
print(f" ✅ AgenticGraph: Completed with {len(sources)} sources")
|
| 973 |
+
|
| 974 |
+
return ChatResponse(
|
| 975 |
+
answer=answer,
|
| 976 |
+
sources=sources,
|
| 977 |
+
links=links,
|
| 978 |
+
images=images,
|
| 979 |
+
followups=follow,
|
| 980 |
+
default_tab="answer",
|
| 981 |
+
workspace_id=ws
|
| 982 |
+
)
|
| 983 |
+
|
| 984 |
+
|
config/__init__.py
ADDED
|
File without changes
|
config/config.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
from langchain.chat_models import init_chat_model
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
class Config:
|
| 8 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 9 |
+
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
|
| 10 |
+
|
| 11 |
+
LLM_MODEL = "groq:openai/gpt-oss-120b"
|
| 12 |
+
|
| 13 |
+
CHUNK_SIZE = 400
|
| 14 |
+
CHUNK_OVERLAP = 80
|
| 15 |
+
|
| 16 |
+
@classmethod
|
| 17 |
+
def get_llm(cls):
|
| 18 |
+
"""Return chat LLM instance with tool calling disabled."""
|
| 19 |
+
if not cls.GROQ_API_KEY:
|
| 20 |
+
raise RuntimeError("GROQ_API_KEY missing in .env")
|
| 21 |
+
os.environ["GROQ_API_KEY"] = cls.GROQ_API_KEY
|
| 22 |
+
llm = init_chat_model(cls.LLM_MODEL)
|
| 23 |
+
# Disable tool calling by binding empty tools list
|
| 24 |
+
try:
|
| 25 |
+
return llm.bind(tools=[])
|
| 26 |
+
except:
|
| 27 |
+
# Fallback if bind doesn't work
|
| 28 |
+
return llm
|
| 29 |
+
|
| 30 |
+
|
config/system_prompt.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PPLX_SYSTEM_PROMPT = """
|
| 2 |
+
You are Perplexity AI.
|
| 3 |
+
|
| 4 |
+
When user greets (hi, hello, hey), respond like a friendly assistant:
|
| 5 |
+
Short, conversational, natural.
|
| 6 |
+
|
| 7 |
+
Do NOT give definitions or grammar explanations unless user asks.
|
| 8 |
+
Your tone: concise, helpful, modern.
|
| 9 |
+
|
| 10 |
+
Always adapt style based on question.
|
| 11 |
+
"""
|
document_processing/__init__.py
ADDED
|
File without changes
|
document_processing/processor.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader, TextLoader
|
| 3 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 4 |
+
from langchain.schema import Document
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class DocumentProcessor:
|
| 8 |
+
"""Loads and splits documents into chunks for RAG."""
|
| 9 |
+
|
| 10 |
+
def __init__(self, chunk_size: int = 400, chunk_overlap: int = 80) -> None:
|
| 11 |
+
self.splitter = RecursiveCharacterTextSplitter(
|
| 12 |
+
chunk_size=chunk_size,
|
| 13 |
+
chunk_overlap=chunk_overlap,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
def load_url(self, url: str) -> List[Document]:
|
| 17 |
+
return WebBaseLoader(url).load()
|
| 18 |
+
|
| 19 |
+
def load_pdf(self, file_path: str) -> List[Document]:
|
| 20 |
+
return PyPDFLoader(file_path).load()
|
| 21 |
+
|
| 22 |
+
def load_txt(self, file_path: str) -> List[Document]:
|
| 23 |
+
return TextLoader(file_path, encoding="utf-8").load()
|
| 24 |
+
|
| 25 |
+
def split(self, docs: List[Document]) -> List[Document]:
|
| 26 |
+
return self.splitter.split_documents(docs)
|
embeddings/__init__.py
ADDED
|
File without changes
|
embeddings/embedder.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Embedding module using SentenceTransformer (free)."""
|
| 2 |
+
|
| 3 |
+
from typing import List
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
from langchain_core.embeddings import Embeddings
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Embedder(Embeddings):
|
| 9 |
+
"""LangChain-compatible wrapper around SentenceTransformer embedding model."""
|
| 10 |
+
|
| 11 |
+
def __init__(self, model_name: str = "all-MiniLM-L6-v2") -> None:
|
| 12 |
+
self.model = SentenceTransformer(model_name)
|
| 13 |
+
|
| 14 |
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
| 15 |
+
"""Embed a list of documents."""
|
| 16 |
+
return self.model.encode(texts).tolist()
|
| 17 |
+
|
| 18 |
+
def embed_query(self, text: str) -> List[float]:
|
| 19 |
+
"""Embed a single query text."""
|
| 20 |
+
return self.model.encode([text])[0].tolist()
|
| 21 |
+
|
| 22 |
+
def embed(self, texts: List[str]) -> List[List[float]]:
|
| 23 |
+
"""Embed a list of texts into dense vectors."""
|
| 24 |
+
return self.embed_documents(texts)
|
files/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# files/__init__.py
|
files/file_manager.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# files/file_manager.py
|
| 2 |
+
|
| 3 |
+
from typing import Dict, List
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import shutil
|
| 6 |
+
|
| 7 |
+
from document_processing.processor import DocumentProcessor
|
| 8 |
+
from vectorstore.store import VectorStore
|
| 9 |
+
from langchain.schema import Document
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class FileWorkspace:
|
| 13 |
+
"""
|
| 14 |
+
Holds vector store + metadata for one workspace's uploaded docs.
|
| 15 |
+
"""
|
| 16 |
+
def __init__(self, workspace_id: str, base_dir: str = "workspace_data"):
|
| 17 |
+
self.workspace_id = workspace_id
|
| 18 |
+
self.base_dir = Path(base_dir) / workspace_id
|
| 19 |
+
self.base_dir.mkdir(parents=True, exist_ok=True)
|
| 20 |
+
|
| 21 |
+
self.processor = DocumentProcessor()
|
| 22 |
+
self.vector = VectorStore()
|
| 23 |
+
self.initialized = False
|
| 24 |
+
self.files: List[str] = [] # filenames
|
| 25 |
+
|
| 26 |
+
def add_files(self, uploaded_paths: List[Path]):
|
| 27 |
+
"""
|
| 28 |
+
Index newly uploaded files into the workspace vector store.
|
| 29 |
+
"""
|
| 30 |
+
docs: List[Document] = []
|
| 31 |
+
|
| 32 |
+
for p in uploaded_paths:
|
| 33 |
+
try:
|
| 34 |
+
if p.suffix.lower() == ".pdf":
|
| 35 |
+
docs.extend(self.processor.load_pdf(str(p)))
|
| 36 |
+
elif p.suffix.lower() in [".txt", ".md"]:
|
| 37 |
+
docs.extend(self.processor.load_txt(str(p)))
|
| 38 |
+
elif p.suffix.lower() in [".ppt", ".pptx"]:
|
| 39 |
+
# Use UnstructuredPowerPointLoader if available
|
| 40 |
+
try:
|
| 41 |
+
from langchain_community.document_loaders import UnstructuredPowerPointLoader
|
| 42 |
+
loader = UnstructuredPowerPointLoader(str(p))
|
| 43 |
+
docs.extend(loader.load())
|
| 44 |
+
except ImportError:
|
| 45 |
+
# Fallback: read as binary and extract text
|
| 46 |
+
print(f"UnstructuredPowerPointLoader not available for {p.name}")
|
| 47 |
+
continue
|
| 48 |
+
|
| 49 |
+
self.files.append(p.name)
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"Error loading file {p.name}: {e}")
|
| 52 |
+
continue
|
| 53 |
+
|
| 54 |
+
if not docs:
|
| 55 |
+
return
|
| 56 |
+
|
| 57 |
+
# Add file path metadata
|
| 58 |
+
for doc in docs:
|
| 59 |
+
doc.metadata["file_path"] = str(p)
|
| 60 |
+
doc.metadata["source"] = p.name
|
| 61 |
+
|
| 62 |
+
chunks = self.processor.split(docs)
|
| 63 |
+
|
| 64 |
+
if not self.initialized:
|
| 65 |
+
self.vector.create(chunks)
|
| 66 |
+
self.initialized = True
|
| 67 |
+
else:
|
| 68 |
+
self.vector.store.add_documents(chunks)
|
| 69 |
+
|
| 70 |
+
def retrieve(self, query: str, k: int = 6):
|
| 71 |
+
if not self.initialized:
|
| 72 |
+
return []
|
| 73 |
+
return self.vector.retrieve(query, k=k)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
class FileManager:
|
| 77 |
+
"""
|
| 78 |
+
Keeps a map: workspace_id -> FileWorkspace
|
| 79 |
+
"""
|
| 80 |
+
def __init__(self, base_dir: str = "workspace_data"):
|
| 81 |
+
self.base_dir = base_dir
|
| 82 |
+
self._workspaces: Dict[str, FileWorkspace] = {}
|
| 83 |
+
|
| 84 |
+
def get_workspace(self, workspace_id: str) -> FileWorkspace:
|
| 85 |
+
if workspace_id not in self._workspaces:
|
| 86 |
+
self._workspaces[workspace_id] = FileWorkspace(workspace_id, self.base_dir)
|
| 87 |
+
return self._workspaces[workspace_id]
|
| 88 |
+
|
| 89 |
+
def clear_workspace(self, workspace_id: str):
|
| 90 |
+
ws_dir = Path(self.base_dir) / workspace_id
|
| 91 |
+
if ws_dir.exists():
|
| 92 |
+
shutil.rmtree(ws_dir)
|
| 93 |
+
if workspace_id in self._workspaces:
|
| 94 |
+
del self._workspaces[workspace_id]
|
| 95 |
+
|
| 96 |
+
def get_files(self, workspace_id: str) -> List[str]:
|
| 97 |
+
if workspace_id in self._workspaces:
|
| 98 |
+
return self._workspaces[workspace_id].files
|
| 99 |
+
return []
|
main.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def main():
|
| 2 |
+
print("Hello from ai-clone!")
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
if __name__ == "__main__":
|
| 6 |
+
main()
|
pyproject.toml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "ai-clone"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"beautifulsoup4>=4.14.3",
|
| 9 |
+
"faiss-cpu>=1.13.0",
|
| 10 |
+
"fastapi>=0.123.0",
|
| 11 |
+
"langchain>=0.3.0",
|
| 12 |
+
"langchain-community>=0.3.0",
|
| 13 |
+
"langchain-core>=0.3.0",
|
| 14 |
+
"langchain-groq>=0.2.0",
|
| 15 |
+
"langchain-openai>=0.2.0",
|
| 16 |
+
"langchain-text-splitters>=0.3.0",
|
| 17 |
+
"langgraph>=0.2.0",
|
| 18 |
+
"numpy>=2.3.5",
|
| 19 |
+
"pdfminer-six>=20251107",
|
| 20 |
+
"pinecone-client>=6.0.0",
|
| 21 |
+
"pydantic>=2.12.5",
|
| 22 |
+
"pypdf>=6.4.0",
|
| 23 |
+
"pypdf2>=3.0.1",
|
| 24 |
+
"python-dotenv>=1.2.1",
|
| 25 |
+
"requests>=2.32.5",
|
| 26 |
+
"sentence-transformers>=5.1.2",
|
| 27 |
+
"tavily-python>=0.7.13",
|
| 28 |
+
"tqdm>=4.67.1",
|
| 29 |
+
"trafilatura>=2.0.0",
|
| 30 |
+
"uvicorn[standard]>=0.38.0",
|
| 31 |
+
"wikipedia>=1.4.0",
|
| 32 |
+
]
|
rag/__init__.py
ADDED
|
File without changes
|
rag/agents.py
ADDED
|
@@ -0,0 +1,803 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Production-Level Agents for Perplexity Clone
|
| 3 |
+
=============================================
|
| 4 |
+
Each agent is a node in LangGraph pipelines.
|
| 5 |
+
Agents handle specific tasks and pass state to next nodes.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from typing import List, Dict, Any
|
| 9 |
+
from rag.rag_state import (
|
| 10 |
+
RAGState,
|
| 11 |
+
WebSearchState,
|
| 12 |
+
RAGOnlyState,
|
| 13 |
+
AgenticState,
|
| 14 |
+
AnalysisState,
|
| 15 |
+
SummarizeState
|
| 16 |
+
)
|
| 17 |
+
from config.config import Config
|
| 18 |
+
from config.system_prompt import PPLX_SYSTEM_PROMPT
|
| 19 |
+
from vectorstore.store import VectorStore
|
| 20 |
+
from tools.search_tool import SearchTool
|
| 21 |
+
from tools.browse_tool import BrowseTool
|
| 22 |
+
from tools.reranker_tool import Reranker
|
| 23 |
+
from tools.citation_tool import CitationTool
|
| 24 |
+
from tools.summarizer_tool import SummarizerTool
|
| 25 |
+
from tools.followup_tool import FollowUpGenerator
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# =============================================================================
|
| 29 |
+
# DEEP RESEARCH AGENTS (Original)
|
| 30 |
+
# =============================================================================
|
| 31 |
+
|
| 32 |
+
class PlannerAgent:
|
| 33 |
+
"""Decomposes question into sub-questions."""
|
| 34 |
+
|
| 35 |
+
def __init__(self) -> None:
|
| 36 |
+
self.llm = Config.get_llm()
|
| 37 |
+
|
| 38 |
+
def plan(self, state: RAGState) -> RAGState:
|
| 39 |
+
prompt = (
|
| 40 |
+
"Break the following question into 3-5 clear sub-questions.\n"
|
| 41 |
+
"Return them as a numbered list.\n\n"
|
| 42 |
+
f"{state['question']}"
|
| 43 |
+
)
|
| 44 |
+
resp = self.llm.invoke([
|
| 45 |
+
{"role": "system", "content": PPLX_SYSTEM_PROMPT},
|
| 46 |
+
{"role": "user", "content": prompt},
|
| 47 |
+
])
|
| 48 |
+
lines = [l.strip("-• ").strip() for l in resp.content.splitlines() if l.strip()]
|
| 49 |
+
subqs: List[str] = []
|
| 50 |
+
for l in lines:
|
| 51 |
+
if l[0].isdigit() or len(lines) <= 5:
|
| 52 |
+
subqs.append(l)
|
| 53 |
+
state["sub_questions"] = subqs[:5]
|
| 54 |
+
return state
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class ResearchAgent:
|
| 58 |
+
"""Collects evidence from local RAG + web search."""
|
| 59 |
+
|
| 60 |
+
def __init__(self, vector_store: VectorStore) -> None:
|
| 61 |
+
self.vs = vector_store
|
| 62 |
+
self.search_tool = SearchTool()
|
| 63 |
+
self.browse_tool = BrowseTool()
|
| 64 |
+
self.reranker = Reranker()
|
| 65 |
+
|
| 66 |
+
def research(self, state: RAGState) -> RAGState:
|
| 67 |
+
evidence: List[str] = []
|
| 68 |
+
pages_all: List[Dict] = []
|
| 69 |
+
|
| 70 |
+
for sq in state.get("sub_questions", []):
|
| 71 |
+
# Local RAG
|
| 72 |
+
docs = self.vs.retrieve(sq, k=8)
|
| 73 |
+
docs = self.reranker.rerank(sq, docs, top_k=4)
|
| 74 |
+
evidence.extend(d.page_content for d in docs)
|
| 75 |
+
|
| 76 |
+
# Web search + browse
|
| 77 |
+
results = self.search_tool.search(sq, num_results=3)
|
| 78 |
+
for r in results:
|
| 79 |
+
url = r.get("url")
|
| 80 |
+
title = r.get("title", "Web result")
|
| 81 |
+
if not url:
|
| 82 |
+
continue
|
| 83 |
+
content = self.browse_tool.fetch_clean(url)
|
| 84 |
+
if not content:
|
| 85 |
+
continue
|
| 86 |
+
pages_all.append({"title": title, "url": url, "content": content})
|
| 87 |
+
evidence.append(content[:1500])
|
| 88 |
+
|
| 89 |
+
state["web_pages"] = pages_all
|
| 90 |
+
state["evidence"] = evidence
|
| 91 |
+
return state
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
class AggregatorAgent:
|
| 95 |
+
"""Writes draft answers per sub-question from evidence."""
|
| 96 |
+
|
| 97 |
+
def __init__(self) -> None:
|
| 98 |
+
self.llm = Config.get_llm()
|
| 99 |
+
|
| 100 |
+
def aggregate(self, state: RAGState) -> RAGState:
|
| 101 |
+
drafts: List[str] = []
|
| 102 |
+
for sq in state.get("sub_questions", []):
|
| 103 |
+
context = "\n\n".join(state.get("evidence", [])[:12])
|
| 104 |
+
prompt = (
|
| 105 |
+
"Using the evidence below, answer the sub-question briefly and clearly.\n\n"
|
| 106 |
+
f"Evidence:\n{context}\n\nSub-question: {sq}"
|
| 107 |
+
)
|
| 108 |
+
resp = self.llm.invoke([
|
| 109 |
+
{"role": "system", "content": PPLX_SYSTEM_PROMPT},
|
| 110 |
+
{"role": "user", "content": prompt},
|
| 111 |
+
])
|
| 112 |
+
drafts.append(f"Sub-question: {sq}\n{resp.content}")
|
| 113 |
+
|
| 114 |
+
state["draft_answers"] = drafts
|
| 115 |
+
return state
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
class WriterAgent:
|
| 119 |
+
"""Writes final structured deep-research report."""
|
| 120 |
+
|
| 121 |
+
def __init__(self) -> None:
|
| 122 |
+
self.llm = Config.get_llm()
|
| 123 |
+
|
| 124 |
+
def write(self, state: RAGState) -> RAGState:
|
| 125 |
+
findings = "\n\n".join(state.get("draft_answers", []))
|
| 126 |
+
prompt = (
|
| 127 |
+
"You are Perplexity in deep research mode.\n"
|
| 128 |
+
"Write a structured answer with sections: Overview, Key Points, Details, Conclusion.\n"
|
| 129 |
+
"Use inline citations like [1], [2] where appropriate.\n\n"
|
| 130 |
+
f"Original question:\n{state['question']}\n\nFindings:\n{findings}"
|
| 131 |
+
)
|
| 132 |
+
resp = self.llm.invoke([
|
| 133 |
+
{"role": "system", "content": PPLX_SYSTEM_PROMPT},
|
| 134 |
+
{"role": "user", "content": prompt},
|
| 135 |
+
])
|
| 136 |
+
state["final_answer"] = resp.content
|
| 137 |
+
return state
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
class ValidatorAgent:
|
| 141 |
+
"""Builds source list & (optionally) validates citations."""
|
| 142 |
+
|
| 143 |
+
def __init__(self) -> None:
|
| 144 |
+
self.citation_tool = CitationTool()
|
| 145 |
+
|
| 146 |
+
def validate_and_attach(self, state: RAGState) -> RAGState:
|
| 147 |
+
sources: List[Dict] = []
|
| 148 |
+
for p in state.get("web_pages", [])[:10]:
|
| 149 |
+
sources.append({"title": p["title"], "url": p["url"]})
|
| 150 |
+
|
| 151 |
+
used_sources = self.citation_tool.attach_sources(state.get("final_answer", ""), sources)
|
| 152 |
+
|
| 153 |
+
state["sources"] = used_sources
|
| 154 |
+
return state
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
# =============================================================================
|
| 158 |
+
# WEB SEARCH AGENTS
|
| 159 |
+
# =============================================================================
|
| 160 |
+
|
| 161 |
+
class WebSearchNode:
|
| 162 |
+
"""Node 1: Execute web search query."""
|
| 163 |
+
|
| 164 |
+
def __init__(self):
|
| 165 |
+
self.search_tool = SearchTool()
|
| 166 |
+
|
| 167 |
+
def search(self, state: WebSearchState) -> WebSearchState:
|
| 168 |
+
query = state.get("query", "")
|
| 169 |
+
print(f" 🔍 WebSearchNode: Searching for '{query[:50]}...'")
|
| 170 |
+
|
| 171 |
+
try:
|
| 172 |
+
results = self.search_tool.search(query, num_results=6)
|
| 173 |
+
state["search_results"] = results
|
| 174 |
+
except Exception as e:
|
| 175 |
+
print(f" ❌ WebSearchNode error: {e}")
|
| 176 |
+
state["search_results"] = []
|
| 177 |
+
|
| 178 |
+
return state
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
class WebFetchNode:
|
| 182 |
+
"""Node 2: Fetch and parse web pages."""
|
| 183 |
+
|
| 184 |
+
def __init__(self):
|
| 185 |
+
self.browse_tool = BrowseTool()
|
| 186 |
+
|
| 187 |
+
def fetch(self, state: WebSearchState) -> WebSearchState:
|
| 188 |
+
pages = []
|
| 189 |
+
links = []
|
| 190 |
+
|
| 191 |
+
for r in state.get("search_results", []):
|
| 192 |
+
url = r.get("url")
|
| 193 |
+
if not url:
|
| 194 |
+
continue
|
| 195 |
+
|
| 196 |
+
try:
|
| 197 |
+
content = self.browse_tool.fetch_clean(url)
|
| 198 |
+
if content:
|
| 199 |
+
pages.append({
|
| 200 |
+
"title": r.get("title", ""),
|
| 201 |
+
"url": url,
|
| 202 |
+
"content": content[:2500]
|
| 203 |
+
})
|
| 204 |
+
links.append({
|
| 205 |
+
"title": r.get("title", ""),
|
| 206 |
+
"url": url,
|
| 207 |
+
"snippet": content[:200]
|
| 208 |
+
})
|
| 209 |
+
except:
|
| 210 |
+
continue
|
| 211 |
+
|
| 212 |
+
print(f" 📄 WebFetchNode: Fetched {len(pages)} pages")
|
| 213 |
+
state["web_pages"] = pages
|
| 214 |
+
state["links"] = links
|
| 215 |
+
return state
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
class WebContextNode:
|
| 219 |
+
"""Node 3: Build context from fetched pages."""
|
| 220 |
+
|
| 221 |
+
def build_context(self, state: WebSearchState) -> WebSearchState:
|
| 222 |
+
pages = state.get("web_pages", [])
|
| 223 |
+
|
| 224 |
+
if pages:
|
| 225 |
+
context_parts = []
|
| 226 |
+
for i, p in enumerate(pages):
|
| 227 |
+
context_parts.append(f"[{i+1}] {p['title']}:\n{p['content']}")
|
| 228 |
+
state["context"] = "\n\n---\n\n".join(context_parts)
|
| 229 |
+
else:
|
| 230 |
+
state["context"] = ""
|
| 231 |
+
|
| 232 |
+
print(f" 📝 WebContextNode: Built context from {len(pages)} sources")
|
| 233 |
+
return state
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
class WebAnswerNode:
|
| 237 |
+
"""Node 4: Generate answer from context."""
|
| 238 |
+
|
| 239 |
+
def __init__(self):
|
| 240 |
+
self.llm = Config.get_llm()
|
| 241 |
+
self.followup = FollowUpGenerator()
|
| 242 |
+
|
| 243 |
+
def answer(self, state: WebSearchState) -> WebSearchState:
|
| 244 |
+
query = state.get("query", "")
|
| 245 |
+
context = state.get("context", "")
|
| 246 |
+
|
| 247 |
+
if context:
|
| 248 |
+
prompt = f"""You are a web search assistant like Perplexity AI.
|
| 249 |
+
Use ONLY the following web sources to answer. Cite sources using [1], [2], etc.
|
| 250 |
+
|
| 251 |
+
WEB SOURCES:
|
| 252 |
+
{context}
|
| 253 |
+
|
| 254 |
+
QUESTION: {query}
|
| 255 |
+
|
| 256 |
+
Provide a comprehensive, well-cited answer:"""
|
| 257 |
+
else:
|
| 258 |
+
prompt = f"Answer this question: {query}"
|
| 259 |
+
|
| 260 |
+
resp = self.llm.invoke([
|
| 261 |
+
{"role": "system", "content": PPLX_SYSTEM_PROMPT},
|
| 262 |
+
{"role": "user", "content": prompt}
|
| 263 |
+
])
|
| 264 |
+
|
| 265 |
+
answer = resp.content
|
| 266 |
+
state["answer"] = answer
|
| 267 |
+
state["followups"] = self.followup.generate(answer, query)
|
| 268 |
+
|
| 269 |
+
# Build sources
|
| 270 |
+
sources = [{"title": p["title"], "url": p["url"]} for p in state.get("web_pages", [])]
|
| 271 |
+
state["sources"] = sources
|
| 272 |
+
|
| 273 |
+
print(f" ✅ WebAnswerNode: Generated answer")
|
| 274 |
+
return state
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
# =============================================================================
|
| 278 |
+
# RAG-ONLY AGENTS
|
| 279 |
+
# =============================================================================
|
| 280 |
+
|
| 281 |
+
class RAGRetrieveNode:
|
| 282 |
+
"""Node 1: Retrieve from uploaded documents."""
|
| 283 |
+
|
| 284 |
+
def __init__(self, file_manager):
|
| 285 |
+
self.file_manager = file_manager
|
| 286 |
+
self.reranker = Reranker()
|
| 287 |
+
|
| 288 |
+
def retrieve(self, state: RAGOnlyState) -> RAGOnlyState:
|
| 289 |
+
query = state.get("query", "")
|
| 290 |
+
ws_id = state.get("workspace_id", "default")
|
| 291 |
+
|
| 292 |
+
ws = self.file_manager.get_workspace(ws_id)
|
| 293 |
+
|
| 294 |
+
if not ws.initialized or not ws.files:
|
| 295 |
+
state["file_chunks"] = []
|
| 296 |
+
print(f" 📁 RAGRetrieveNode: No files in workspace")
|
| 297 |
+
return state
|
| 298 |
+
|
| 299 |
+
try:
|
| 300 |
+
chunks = ws.retrieve(query, k=8)
|
| 301 |
+
# Convert to dicts for state
|
| 302 |
+
state["file_chunks"] = [
|
| 303 |
+
{"content": c.page_content, "source": c.metadata.get("source", "Document")}
|
| 304 |
+
for c in chunks
|
| 305 |
+
]
|
| 306 |
+
print(f" 📁 RAGRetrieveNode: Retrieved {len(chunks)} chunks")
|
| 307 |
+
except Exception as e:
|
| 308 |
+
print(f" ❌ RAGRetrieveNode error: {e}")
|
| 309 |
+
state["file_chunks"] = []
|
| 310 |
+
|
| 311 |
+
return state
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
class RAGContextNode:
|
| 315 |
+
"""Node 2: Build context from retrieved chunks."""
|
| 316 |
+
|
| 317 |
+
def build_context(self, state: RAGOnlyState) -> RAGOnlyState:
|
| 318 |
+
chunks = state.get("file_chunks", [])
|
| 319 |
+
|
| 320 |
+
if chunks:
|
| 321 |
+
context_parts = []
|
| 322 |
+
for i, c in enumerate(chunks):
|
| 323 |
+
context_parts.append(f"[DOC {i+1}] {c['source']}:\n{c['content']}")
|
| 324 |
+
state["context"] = "\n\n---\n\n".join(context_parts)
|
| 325 |
+
else:
|
| 326 |
+
state["context"] = ""
|
| 327 |
+
|
| 328 |
+
print(f" 📝 RAGContextNode: Built context from {len(chunks)} chunks")
|
| 329 |
+
return state
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
class RAGAnswerNode:
|
| 333 |
+
"""Node 3: Generate answer from document context."""
|
| 334 |
+
|
| 335 |
+
def __init__(self):
|
| 336 |
+
self.llm = Config.get_llm()
|
| 337 |
+
self.followup = FollowUpGenerator()
|
| 338 |
+
|
| 339 |
+
def answer(self, state: RAGOnlyState) -> RAGOnlyState:
|
| 340 |
+
query = state.get("query", "")
|
| 341 |
+
context = state.get("context", "")
|
| 342 |
+
chunks = state.get("file_chunks", [])
|
| 343 |
+
|
| 344 |
+
if not context:
|
| 345 |
+
state["answer"] = "📚 No documents found. Please upload files first using the 📎 button."
|
| 346 |
+
state["sources"] = []
|
| 347 |
+
state["followups"] = []
|
| 348 |
+
return state
|
| 349 |
+
|
| 350 |
+
prompt = f"""You are a document analysis assistant.
|
| 351 |
+
Answer ONLY based on the provided documents. Do NOT use external knowledge.
|
| 352 |
+
|
| 353 |
+
DOCUMENTS:
|
| 354 |
+
{context}
|
| 355 |
+
|
| 356 |
+
QUESTION: {query}
|
| 357 |
+
|
| 358 |
+
Instructions:
|
| 359 |
+
- Answer based ONLY on document content
|
| 360 |
+
- Say "According to your documents..." when citing
|
| 361 |
+
- Quote relevant parts when helpful
|
| 362 |
+
- If info is not in documents, say so
|
| 363 |
+
|
| 364 |
+
ANSWER:"""
|
| 365 |
+
|
| 366 |
+
resp = self.llm.invoke([
|
| 367 |
+
{"role": "system", "content": PPLX_SYSTEM_PROMPT},
|
| 368 |
+
{"role": "user", "content": prompt}
|
| 369 |
+
])
|
| 370 |
+
|
| 371 |
+
answer = resp.content
|
| 372 |
+
state["answer"] = answer
|
| 373 |
+
state["followups"] = self.followup.generate(answer, query)
|
| 374 |
+
|
| 375 |
+
# Build sources from chunks
|
| 376 |
+
seen = set()
|
| 377 |
+
sources = []
|
| 378 |
+
for c in chunks:
|
| 379 |
+
src = c.get("source", "Document")
|
| 380 |
+
if src not in seen:
|
| 381 |
+
sources.append({"title": f"📄 {src}", "url": ""})
|
| 382 |
+
seen.add(src)
|
| 383 |
+
state["sources"] = sources
|
| 384 |
+
|
| 385 |
+
print(f" ✅ RAGAnswerNode: Generated answer from {len(sources)} sources")
|
| 386 |
+
return state
|
| 387 |
+
|
| 388 |
+
|
| 389 |
+
# =============================================================================
|
| 390 |
+
# AGENTIC RAG AGENTS (Multi-Agent Pipeline)
|
| 391 |
+
# =============================================================================
|
| 392 |
+
|
| 393 |
+
class AgenticPlannerNode:
|
| 394 |
+
"""Node 1: Planner agent decides which sub-agents to activate."""
|
| 395 |
+
|
| 396 |
+
def plan(self, state: AgenticState) -> AgenticState:
|
| 397 |
+
query = state.get("query", "").lower()
|
| 398 |
+
|
| 399 |
+
# Determine which agents to use
|
| 400 |
+
state["use_file"] = any(w in query for w in [
|
| 401 |
+
"document", "file", "pdf", "uploaded", "summarize my",
|
| 402 |
+
"according to", "in the file", "extract", "my notes"
|
| 403 |
+
])
|
| 404 |
+
|
| 405 |
+
state["use_web"] = any(w in query for w in [
|
| 406 |
+
"today", "current", "latest", "news", "weather", "stock",
|
| 407 |
+
"who is", "what is", "where", "when", "price", "live",
|
| 408 |
+
"recent", "update"
|
| 409 |
+
]) or len(query.split()) <= 4
|
| 410 |
+
|
| 411 |
+
state["use_images"] = any(w in query for w in [
|
| 412 |
+
"image", "photo", "picture", "logo", "show me", "look like",
|
| 413 |
+
"flag", "screenshot"
|
| 414 |
+
])
|
| 415 |
+
|
| 416 |
+
state["use_knowledge"] = any(w in query for w in [
|
| 417 |
+
"explain", "define", "concept", "theory", "how does",
|
| 418 |
+
"what is", "meaning of"
|
| 419 |
+
])
|
| 420 |
+
|
| 421 |
+
print(f" 📋 AgenticPlannerNode: file={state['use_file']}, web={state['use_web']}, images={state['use_images']}")
|
| 422 |
+
return state
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
class AgenticFileNode:
|
| 426 |
+
"""Node 2: File agent retrieves from uploaded documents."""
|
| 427 |
+
|
| 428 |
+
def __init__(self, file_manager):
|
| 429 |
+
self.file_manager = file_manager
|
| 430 |
+
|
| 431 |
+
def retrieve(self, state: AgenticState) -> AgenticState:
|
| 432 |
+
if not state.get("use_file", False):
|
| 433 |
+
state["file_context"] = ""
|
| 434 |
+
state["file_sources"] = []
|
| 435 |
+
return state
|
| 436 |
+
|
| 437 |
+
query = state.get("query", "")
|
| 438 |
+
ws_id = state.get("workspace_id", "default")
|
| 439 |
+
ws = self.file_manager.get_workspace(ws_id)
|
| 440 |
+
|
| 441 |
+
if not ws.initialized:
|
| 442 |
+
state["file_context"] = ""
|
| 443 |
+
state["file_sources"] = []
|
| 444 |
+
return state
|
| 445 |
+
|
| 446 |
+
try:
|
| 447 |
+
chunks = ws.retrieve(query, k=6)
|
| 448 |
+
if chunks:
|
| 449 |
+
state["file_context"] = "\n\n".join([c.page_content for c in chunks])
|
| 450 |
+
state["file_sources"] = [
|
| 451 |
+
{"title": f"📄 {c.metadata.get('source', 'Document')}", "url": ""}
|
| 452 |
+
for c in chunks
|
| 453 |
+
]
|
| 454 |
+
print(f" 📁 AgenticFileNode: Found {len(chunks)} chunks")
|
| 455 |
+
else:
|
| 456 |
+
state["file_context"] = ""
|
| 457 |
+
state["file_sources"] = []
|
| 458 |
+
except Exception as e:
|
| 459 |
+
print(f" ❌ AgenticFileNode error: {e}")
|
| 460 |
+
state["file_context"] = ""
|
| 461 |
+
state["file_sources"] = []
|
| 462 |
+
|
| 463 |
+
return state
|
| 464 |
+
|
| 465 |
+
|
| 466 |
+
class AgenticWebNode:
|
| 467 |
+
"""Node 3: Web agent fetches real-time information."""
|
| 468 |
+
|
| 469 |
+
def __init__(self):
|
| 470 |
+
self.search_tool = SearchTool()
|
| 471 |
+
self.browse_tool = BrowseTool()
|
| 472 |
+
|
| 473 |
+
def search(self, state: AgenticState) -> AgenticState:
|
| 474 |
+
if not state.get("use_web", False):
|
| 475 |
+
state["web_context"] = ""
|
| 476 |
+
state["web_sources"] = []
|
| 477 |
+
state["links"] = []
|
| 478 |
+
return state
|
| 479 |
+
|
| 480 |
+
query = state.get("query", "")
|
| 481 |
+
|
| 482 |
+
try:
|
| 483 |
+
results = self.search_tool.search(query, num_results=4)
|
| 484 |
+
web_parts = []
|
| 485 |
+
sources = []
|
| 486 |
+
links = []
|
| 487 |
+
|
| 488 |
+
for r in results:
|
| 489 |
+
url = r.get("url")
|
| 490 |
+
title = r.get("title", "")
|
| 491 |
+
if not url:
|
| 492 |
+
continue
|
| 493 |
+
|
| 494 |
+
content = self.browse_tool.fetch_clean(url)
|
| 495 |
+
if content:
|
| 496 |
+
web_parts.append(f"[{title}]: {content[:1500]}")
|
| 497 |
+
sources.append({"title": title, "url": url})
|
| 498 |
+
links.append({"title": title, "url": url, "snippet": content[:150]})
|
| 499 |
+
|
| 500 |
+
state["web_context"] = "\n\n".join(web_parts)
|
| 501 |
+
state["web_sources"] = sources
|
| 502 |
+
state["links"] = links
|
| 503 |
+
print(f" 🌐 AgenticWebNode: Found {len(sources)} sources")
|
| 504 |
+
|
| 505 |
+
except Exception as e:
|
| 506 |
+
print(f" ❌ AgenticWebNode error: {e}")
|
| 507 |
+
state["web_context"] = ""
|
| 508 |
+
state["web_sources"] = []
|
| 509 |
+
state["links"] = []
|
| 510 |
+
|
| 511 |
+
return state
|
| 512 |
+
|
| 513 |
+
|
| 514 |
+
class AgenticKnowledgeNode:
|
| 515 |
+
"""Node 4: Knowledge agent retrieves from base vector store."""
|
| 516 |
+
|
| 517 |
+
def __init__(self, vector_store: VectorStore):
|
| 518 |
+
self.vs = vector_store
|
| 519 |
+
self.reranker = Reranker()
|
| 520 |
+
|
| 521 |
+
def retrieve(self, state: AgenticState) -> AgenticState:
|
| 522 |
+
if not state.get("use_knowledge", False):
|
| 523 |
+
state["knowledge_context"] = ""
|
| 524 |
+
return state
|
| 525 |
+
|
| 526 |
+
query = state.get("query", "")
|
| 527 |
+
|
| 528 |
+
try:
|
| 529 |
+
chunks = self.vs.retrieve(query, k=4)
|
| 530 |
+
chunks = self.reranker.rerank(query, chunks, top_k=3)
|
| 531 |
+
|
| 532 |
+
if chunks:
|
| 533 |
+
state["knowledge_context"] = "\n\n".join([c.page_content for c in chunks])
|
| 534 |
+
print(f" 📚 AgenticKnowledgeNode: Found {len(chunks)} chunks")
|
| 535 |
+
else:
|
| 536 |
+
state["knowledge_context"] = ""
|
| 537 |
+
|
| 538 |
+
except Exception as e:
|
| 539 |
+
print(f" ❌ AgenticKnowledgeNode error: {e}")
|
| 540 |
+
state["knowledge_context"] = ""
|
| 541 |
+
|
| 542 |
+
return state
|
| 543 |
+
|
| 544 |
+
|
| 545 |
+
class AgenticImageNode:
|
| 546 |
+
"""Node 5: Image agent fetches relevant images."""
|
| 547 |
+
|
| 548 |
+
def __init__(self, image_search):
|
| 549 |
+
self.image_search = image_search
|
| 550 |
+
|
| 551 |
+
def search(self, state: AgenticState) -> AgenticState:
|
| 552 |
+
if not state.get("use_images", False):
|
| 553 |
+
state["images"] = []
|
| 554 |
+
return state
|
| 555 |
+
|
| 556 |
+
query = state.get("query", "")
|
| 557 |
+
|
| 558 |
+
try:
|
| 559 |
+
images = self.image_search.search(query, count=6)
|
| 560 |
+
state["images"] = images
|
| 561 |
+
print(f" 🖼️ AgenticImageNode: Found {len(images)} images")
|
| 562 |
+
except Exception as e:
|
| 563 |
+
print(f" ❌ AgenticImageNode error: {e}")
|
| 564 |
+
state["images"] = []
|
| 565 |
+
|
| 566 |
+
return state
|
| 567 |
+
|
| 568 |
+
|
| 569 |
+
class AgenticSynthesizerNode:
|
| 570 |
+
"""Node 6: Synthesizer agent combines all contexts and generates final answer."""
|
| 571 |
+
|
| 572 |
+
def __init__(self):
|
| 573 |
+
self.llm = Config.get_llm()
|
| 574 |
+
self.followup = FollowUpGenerator()
|
| 575 |
+
|
| 576 |
+
def synthesize(self, state: AgenticState) -> AgenticState:
|
| 577 |
+
query = state.get("query", "")
|
| 578 |
+
|
| 579 |
+
# Build combined context
|
| 580 |
+
contexts = []
|
| 581 |
+
if state.get("file_context"):
|
| 582 |
+
contexts.append(f"📄 FROM YOUR DOCUMENTS:\n{state['file_context'][:2500]}")
|
| 583 |
+
if state.get("web_context"):
|
| 584 |
+
contexts.append(f"🌐 FROM THE WEB:\n{state['web_context'][:2500]}")
|
| 585 |
+
if state.get("knowledge_context"):
|
| 586 |
+
contexts.append(f"📚 KNOWLEDGE BASE:\n{state['knowledge_context'][:1500]}")
|
| 587 |
+
|
| 588 |
+
if not contexts:
|
| 589 |
+
contexts.append("No specific context found. Using general knowledge.")
|
| 590 |
+
|
| 591 |
+
combined = "\n\n---\n\n".join(contexts)
|
| 592 |
+
state["combined_context"] = combined
|
| 593 |
+
|
| 594 |
+
prompt = f"""You are an AGENTIC AI assistant that synthesizes information from multiple sources.
|
| 595 |
+
|
| 596 |
+
AVAILABLE CONTEXT:
|
| 597 |
+
{combined}
|
| 598 |
+
|
| 599 |
+
USER QUESTION: {query}
|
| 600 |
+
|
| 601 |
+
INSTRUCTIONS:
|
| 602 |
+
1. Prioritize user's documents (📄) if relevant
|
| 603 |
+
2. Add real-time info from web (🌐) when available
|
| 604 |
+
3. Use knowledge base (📚) for background
|
| 605 |
+
4. Cite sources appropriately
|
| 606 |
+
5. Be comprehensive but concise
|
| 607 |
+
|
| 608 |
+
SYNTHESIZED ANSWER:"""
|
| 609 |
+
|
| 610 |
+
resp = self.llm.invoke([
|
| 611 |
+
{"role": "system", "content": PPLX_SYSTEM_PROMPT},
|
| 612 |
+
{"role": "user", "content": prompt}
|
| 613 |
+
])
|
| 614 |
+
|
| 615 |
+
answer = resp.content
|
| 616 |
+
state["answer"] = answer
|
| 617 |
+
state["followups"] = self.followup.generate(answer, query)
|
| 618 |
+
|
| 619 |
+
# Combine sources
|
| 620 |
+
all_sources = state.get("file_sources", []) + state.get("web_sources", [])
|
| 621 |
+
state["sources"] = all_sources
|
| 622 |
+
|
| 623 |
+
print(f" ✅ AgenticSynthesizerNode: Generated answer with {len(all_sources)} sources")
|
| 624 |
+
return state
|
| 625 |
+
|
| 626 |
+
|
| 627 |
+
# =============================================================================
|
| 628 |
+
# ANALYSIS AGENTS
|
| 629 |
+
# =============================================================================
|
| 630 |
+
|
| 631 |
+
class AnalysisSearchNode:
|
| 632 |
+
"""Node 1: Search for analysis data."""
|
| 633 |
+
|
| 634 |
+
def __init__(self):
|
| 635 |
+
self.search_tool = SearchTool()
|
| 636 |
+
self.browse_tool = BrowseTool()
|
| 637 |
+
|
| 638 |
+
def search(self, state: AnalysisState) -> AnalysisState:
|
| 639 |
+
query = state.get("query", "")
|
| 640 |
+
print(f" 🔍 AnalysisSearchNode: Searching for analysis data")
|
| 641 |
+
|
| 642 |
+
try:
|
| 643 |
+
results = self.search_tool.search(query, num_results=6)
|
| 644 |
+
state["web_results"] = results
|
| 645 |
+
|
| 646 |
+
# Fetch content
|
| 647 |
+
web_parts = []
|
| 648 |
+
links = []
|
| 649 |
+
for r in results:
|
| 650 |
+
url = r.get("url")
|
| 651 |
+
title = r.get("title", "")
|
| 652 |
+
if not url:
|
| 653 |
+
continue
|
| 654 |
+
content = self.browse_tool.fetch_clean(url)
|
| 655 |
+
if content:
|
| 656 |
+
web_parts.append(f"[{title}]:\n{content[:2000]}")
|
| 657 |
+
links.append({"title": title, "url": url, "snippet": content[:200]})
|
| 658 |
+
|
| 659 |
+
state["web_context"] = "\n\n".join(web_parts)
|
| 660 |
+
state["links"] = links
|
| 661 |
+
state["sources"] = [{"title": l["title"], "url": l["url"]} for l in links]
|
| 662 |
+
|
| 663 |
+
except Exception as e:
|
| 664 |
+
print(f" ❌ AnalysisSearchNode error: {e}")
|
| 665 |
+
state["web_context"] = ""
|
| 666 |
+
state["links"] = []
|
| 667 |
+
state["sources"] = []
|
| 668 |
+
|
| 669 |
+
return state
|
| 670 |
+
|
| 671 |
+
|
| 672 |
+
class AnalysisProcessNode:
|
| 673 |
+
"""Node 2: Generate structured analysis."""
|
| 674 |
+
|
| 675 |
+
def __init__(self):
|
| 676 |
+
self.llm = Config.get_llm()
|
| 677 |
+
self.followup = FollowUpGenerator()
|
| 678 |
+
|
| 679 |
+
def analyze(self, state: AnalysisState) -> AnalysisState:
|
| 680 |
+
query = state.get("query", "")
|
| 681 |
+
context = state.get("web_context", "")
|
| 682 |
+
|
| 683 |
+
prompt = f"""You are an expert analyst. Provide deep, comprehensive analysis.
|
| 684 |
+
|
| 685 |
+
RESEARCH DATA:
|
| 686 |
+
{context if context else "No external data available."}
|
| 687 |
+
|
| 688 |
+
ANALYSIS REQUEST: {query}
|
| 689 |
+
|
| 690 |
+
Provide structured analysis with:
|
| 691 |
+
|
| 692 |
+
## Executive Summary
|
| 693 |
+
(2-3 sentence overview)
|
| 694 |
+
|
| 695 |
+
## Key Findings
|
| 696 |
+
(Bullet points of main discoveries)
|
| 697 |
+
|
| 698 |
+
## Detailed Analysis
|
| 699 |
+
(In-depth examination with evidence)
|
| 700 |
+
|
| 701 |
+
## Data & Statistics
|
| 702 |
+
(Numbers, trends, comparisons if available)
|
| 703 |
+
|
| 704 |
+
## Conclusions
|
| 705 |
+
(Main takeaways)
|
| 706 |
+
|
| 707 |
+
## Recommendations
|
| 708 |
+
(Actionable suggestions)
|
| 709 |
+
|
| 710 |
+
Use citations [1], [2] when referencing sources.
|
| 711 |
+
|
| 712 |
+
ANALYSIS:"""
|
| 713 |
+
|
| 714 |
+
resp = self.llm.invoke([
|
| 715 |
+
{"role": "system", "content": PPLX_SYSTEM_PROMPT},
|
| 716 |
+
{"role": "user", "content": prompt}
|
| 717 |
+
])
|
| 718 |
+
|
| 719 |
+
answer = resp.content
|
| 720 |
+
state["answer"] = answer
|
| 721 |
+
state["followups"] = self.followup.generate(answer, query)
|
| 722 |
+
|
| 723 |
+
print(f" ✅ AnalysisProcessNode: Generated analysis")
|
| 724 |
+
return state
|
| 725 |
+
|
| 726 |
+
|
| 727 |
+
# =============================================================================
|
| 728 |
+
# SUMMARIZE AGENTS
|
| 729 |
+
# =============================================================================
|
| 730 |
+
|
| 731 |
+
class SummarizeInputNode:
|
| 732 |
+
"""Node 1: Determine input type and fetch content."""
|
| 733 |
+
|
| 734 |
+
def __init__(self):
|
| 735 |
+
self.browse_tool = BrowseTool()
|
| 736 |
+
self.search_tool = SearchTool()
|
| 737 |
+
|
| 738 |
+
def process_input(self, state: SummarizeState) -> SummarizeState:
|
| 739 |
+
query = state.get("query", "")
|
| 740 |
+
|
| 741 |
+
# Check if URL
|
| 742 |
+
if query.startswith("http"):
|
| 743 |
+
state["is_url"] = True
|
| 744 |
+
try:
|
| 745 |
+
content = self.browse_tool.fetch_clean(query)
|
| 746 |
+
state["content"] = content or ""
|
| 747 |
+
state["links"] = [{"title": "Source", "url": query, "snippet": content[:200] if content else ""}]
|
| 748 |
+
state["sources"] = [{"title": "Source URL", "url": query}]
|
| 749 |
+
print(f" 🔗 SummarizeInputNode: Fetched URL content")
|
| 750 |
+
except Exception as e:
|
| 751 |
+
print(f" ❌ Error fetching URL: {e}")
|
| 752 |
+
state["content"] = ""
|
| 753 |
+
else:
|
| 754 |
+
state["is_url"] = False
|
| 755 |
+
# Search and fetch
|
| 756 |
+
try:
|
| 757 |
+
results = self.search_tool.search(query, num_results=3)
|
| 758 |
+
content_parts = []
|
| 759 |
+
links = []
|
| 760 |
+
for r in results:
|
| 761 |
+
url = r.get("url")
|
| 762 |
+
title = r.get("title", "")
|
| 763 |
+
if url:
|
| 764 |
+
text = self.browse_tool.fetch_clean(url)
|
| 765 |
+
if text:
|
| 766 |
+
content_parts.append(text[:1500])
|
| 767 |
+
links.append({"title": title, "url": url, "snippet": text[:150]})
|
| 768 |
+
|
| 769 |
+
state["content"] = "\n\n".join(content_parts)
|
| 770 |
+
state["links"] = links
|
| 771 |
+
state["sources"] = [{"title": l["title"], "url": l["url"]} for l in links]
|
| 772 |
+
print(f" 🔍 SummarizeInputNode: Fetched {len(links)} sources")
|
| 773 |
+
except Exception as e:
|
| 774 |
+
print(f" ❌ Error searching: {e}")
|
| 775 |
+
state["content"] = query # Use query as content
|
| 776 |
+
state["links"] = []
|
| 777 |
+
state["sources"] = []
|
| 778 |
+
|
| 779 |
+
return state
|
| 780 |
+
|
| 781 |
+
|
| 782 |
+
class SummarizeProcessNode:
|
| 783 |
+
"""Node 2: Generate summary."""
|
| 784 |
+
|
| 785 |
+
def __init__(self):
|
| 786 |
+
self.summarizer = SummarizerTool()
|
| 787 |
+
self.followup = FollowUpGenerator()
|
| 788 |
+
|
| 789 |
+
def summarize(self, state: SummarizeState) -> SummarizeState:
|
| 790 |
+
content = state.get("content", "")
|
| 791 |
+
query = state.get("query", "")
|
| 792 |
+
|
| 793 |
+
if content:
|
| 794 |
+
summary = self.summarizer.summarize(content, max_words=300)
|
| 795 |
+
state["answer"] = summary
|
| 796 |
+
else:
|
| 797 |
+
state["answer"] = "Could not find content to summarize."
|
| 798 |
+
|
| 799 |
+
state["followups"] = self.followup.generate(state["answer"], query)
|
| 800 |
+
|
| 801 |
+
print(f" ✅ SummarizeProcessNode: Generated summary")
|
| 802 |
+
return state
|
| 803 |
+
|
rag/graph_deep.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Production-Level LangGraph Pipelines for Perplexity Clone
|
| 3 |
+
==========================================================
|
| 4 |
+
Each mode has its own graph with proper node structure.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from langgraph.graph import StateGraph, END
|
| 8 |
+
from rag.rag_state import (
|
| 9 |
+
RAGState,
|
| 10 |
+
WebSearchState,
|
| 11 |
+
RAGOnlyState,
|
| 12 |
+
AgenticState,
|
| 13 |
+
AnalysisState,
|
| 14 |
+
SummarizeState
|
| 15 |
+
)
|
| 16 |
+
from rag.agents import (
|
| 17 |
+
# Deep Research agents
|
| 18 |
+
PlannerAgent,
|
| 19 |
+
ResearchAgent,
|
| 20 |
+
AggregatorAgent,
|
| 21 |
+
WriterAgent,
|
| 22 |
+
ValidatorAgent,
|
| 23 |
+
# Web Search agents
|
| 24 |
+
WebSearchNode,
|
| 25 |
+
WebFetchNode,
|
| 26 |
+
WebContextNode,
|
| 27 |
+
WebAnswerNode,
|
| 28 |
+
# RAG agents
|
| 29 |
+
RAGRetrieveNode,
|
| 30 |
+
RAGContextNode,
|
| 31 |
+
RAGAnswerNode,
|
| 32 |
+
# Agentic agents
|
| 33 |
+
AgenticPlannerNode,
|
| 34 |
+
AgenticFileNode,
|
| 35 |
+
AgenticWebNode,
|
| 36 |
+
AgenticKnowledgeNode,
|
| 37 |
+
AgenticImageNode,
|
| 38 |
+
AgenticSynthesizerNode,
|
| 39 |
+
# Analysis agents
|
| 40 |
+
AnalysisSearchNode,
|
| 41 |
+
AnalysisProcessNode,
|
| 42 |
+
# Summarize agents
|
| 43 |
+
SummarizeInputNode,
|
| 44 |
+
SummarizeProcessNode,
|
| 45 |
+
)
|
| 46 |
+
from vectorstore.store import VectorStore
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class DeepResearchGraph:
|
| 50 |
+
"""
|
| 51 |
+
Deep Research Mode Graph
|
| 52 |
+
========================
|
| 53 |
+
Pipeline: Planner → Research → Aggregate → Write → Validate
|
| 54 |
+
|
| 55 |
+
Used for complex queries requiring multi-step analysis.
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
def __init__(self, vector_store: VectorStore) -> None:
|
| 59 |
+
self.vs = vector_store
|
| 60 |
+
self.planner = PlannerAgent()
|
| 61 |
+
self.researcher = ResearchAgent(self.vs)
|
| 62 |
+
self.aggregator = AggregatorAgent()
|
| 63 |
+
self.writer = WriterAgent()
|
| 64 |
+
self.validator = ValidatorAgent()
|
| 65 |
+
self.graph = None
|
| 66 |
+
|
| 67 |
+
def build(self):
|
| 68 |
+
g = StateGraph(RAGState)
|
| 69 |
+
g.add_node("plan", self.planner.plan)
|
| 70 |
+
g.add_node("research", self.researcher.research)
|
| 71 |
+
g.add_node("aggregate", self.aggregator.aggregate)
|
| 72 |
+
g.add_node("write", self.writer.write)
|
| 73 |
+
g.add_node("validate", self.validator.validate_and_attach)
|
| 74 |
+
|
| 75 |
+
g.set_entry_point("plan")
|
| 76 |
+
g.add_edge("plan", "research")
|
| 77 |
+
g.add_edge("research", "aggregate")
|
| 78 |
+
g.add_edge("aggregate", "write")
|
| 79 |
+
g.add_edge("write", "validate")
|
| 80 |
+
g.add_edge("validate", END)
|
| 81 |
+
|
| 82 |
+
self.graph = g.compile()
|
| 83 |
+
return self.graph
|
| 84 |
+
|
| 85 |
+
def run(self, question: str) -> RAGState:
|
| 86 |
+
if self.graph is None:
|
| 87 |
+
self.build()
|
| 88 |
+
print(f"\n🧠 DEEP RESEARCH GRAPH: {question[:50]}...")
|
| 89 |
+
return self.graph.invoke({"question": question})
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
class WebSearchGraph:
|
| 93 |
+
"""
|
| 94 |
+
Web Search Mode Graph
|
| 95 |
+
=====================
|
| 96 |
+
Pipeline: Search → Fetch → Context → Answer
|
| 97 |
+
|
| 98 |
+
Used for real-time web queries with citations.
|
| 99 |
+
"""
|
| 100 |
+
|
| 101 |
+
def __init__(self):
|
| 102 |
+
self.search_node = WebSearchNode()
|
| 103 |
+
self.fetch_node = WebFetchNode()
|
| 104 |
+
self.context_node = WebContextNode()
|
| 105 |
+
self.answer_node = WebAnswerNode()
|
| 106 |
+
self.graph = None
|
| 107 |
+
|
| 108 |
+
def build(self):
|
| 109 |
+
g = StateGraph(WebSearchState)
|
| 110 |
+
|
| 111 |
+
g.add_node("search", self.search_node.search)
|
| 112 |
+
g.add_node("fetch", self.fetch_node.fetch)
|
| 113 |
+
g.add_node("context", self.context_node.build_context)
|
| 114 |
+
g.add_node("answer", self.answer_node.answer)
|
| 115 |
+
|
| 116 |
+
g.set_entry_point("search")
|
| 117 |
+
g.add_edge("search", "fetch")
|
| 118 |
+
g.add_edge("fetch", "context")
|
| 119 |
+
g.add_edge("context", "answer")
|
| 120 |
+
g.add_edge("answer", END)
|
| 121 |
+
|
| 122 |
+
self.graph = g.compile()
|
| 123 |
+
return self.graph
|
| 124 |
+
|
| 125 |
+
def run(self, query: str) -> WebSearchState:
|
| 126 |
+
if self.graph is None:
|
| 127 |
+
self.build()
|
| 128 |
+
print(f"\n🌐 WEB SEARCH GRAPH: {query[:50]}...")
|
| 129 |
+
return self.graph.invoke({"query": query})
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
class RAGOnlyGraph:
|
| 133 |
+
"""
|
| 134 |
+
RAG-Only Mode Graph
|
| 135 |
+
===================
|
| 136 |
+
Pipeline: Retrieve → Context → Answer
|
| 137 |
+
|
| 138 |
+
Used for searching uploaded documents only.
|
| 139 |
+
"""
|
| 140 |
+
|
| 141 |
+
def __init__(self, file_manager):
|
| 142 |
+
self.retrieve_node = RAGRetrieveNode(file_manager)
|
| 143 |
+
self.context_node = RAGContextNode()
|
| 144 |
+
self.answer_node = RAGAnswerNode()
|
| 145 |
+
self.graph = None
|
| 146 |
+
|
| 147 |
+
def build(self):
|
| 148 |
+
g = StateGraph(RAGOnlyState)
|
| 149 |
+
|
| 150 |
+
g.add_node("retrieve", self.retrieve_node.retrieve)
|
| 151 |
+
g.add_node("context", self.context_node.build_context)
|
| 152 |
+
g.add_node("answer", self.answer_node.answer)
|
| 153 |
+
|
| 154 |
+
g.set_entry_point("retrieve")
|
| 155 |
+
g.add_edge("retrieve", "context")
|
| 156 |
+
g.add_edge("context", "answer")
|
| 157 |
+
g.add_edge("answer", END)
|
| 158 |
+
|
| 159 |
+
self.graph = g.compile()
|
| 160 |
+
return self.graph
|
| 161 |
+
|
| 162 |
+
def run(self, query: str, workspace_id: str = "default") -> RAGOnlyState:
|
| 163 |
+
if self.graph is None:
|
| 164 |
+
self.build()
|
| 165 |
+
print(f"\n📚 RAG ONLY GRAPH: {query[:50]}...")
|
| 166 |
+
return self.graph.invoke({"query": query, "workspace_id": workspace_id})
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
class AgenticRAGGraph:
|
| 170 |
+
"""
|
| 171 |
+
Agentic RAG Mode Graph
|
| 172 |
+
======================
|
| 173 |
+
Pipeline: Planner → [File, Web, Knowledge, Image] → Synthesizer
|
| 174 |
+
|
| 175 |
+
Multi-agent collaboration for comprehensive answers.
|
| 176 |
+
Planner decides which agents to activate.
|
| 177 |
+
"""
|
| 178 |
+
|
| 179 |
+
def __init__(self, file_manager, vector_store: VectorStore, image_search):
|
| 180 |
+
self.planner_node = AgenticPlannerNode()
|
| 181 |
+
self.file_node = AgenticFileNode(file_manager)
|
| 182 |
+
self.web_node = AgenticWebNode()
|
| 183 |
+
self.knowledge_node = AgenticKnowledgeNode(vector_store)
|
| 184 |
+
self.image_node = AgenticImageNode(image_search)
|
| 185 |
+
self.synthesizer_node = AgenticSynthesizerNode()
|
| 186 |
+
self.graph = None
|
| 187 |
+
|
| 188 |
+
def build(self):
|
| 189 |
+
g = StateGraph(AgenticState)
|
| 190 |
+
|
| 191 |
+
# Add all nodes
|
| 192 |
+
g.add_node("planner", self.planner_node.plan)
|
| 193 |
+
g.add_node("file_agent", self.file_node.retrieve)
|
| 194 |
+
g.add_node("web_agent", self.web_node.search)
|
| 195 |
+
g.add_node("knowledge_agent", self.knowledge_node.retrieve)
|
| 196 |
+
g.add_node("image_agent", self.image_node.search)
|
| 197 |
+
g.add_node("synthesizer", self.synthesizer_node.synthesize)
|
| 198 |
+
|
| 199 |
+
# Define flow
|
| 200 |
+
g.set_entry_point("planner")
|
| 201 |
+
|
| 202 |
+
# After planner, run all agents (they check flags internally)
|
| 203 |
+
g.add_edge("planner", "file_agent")
|
| 204 |
+
g.add_edge("file_agent", "web_agent")
|
| 205 |
+
g.add_edge("web_agent", "knowledge_agent")
|
| 206 |
+
g.add_edge("knowledge_agent", "image_agent")
|
| 207 |
+
g.add_edge("image_agent", "synthesizer")
|
| 208 |
+
g.add_edge("synthesizer", END)
|
| 209 |
+
|
| 210 |
+
self.graph = g.compile()
|
| 211 |
+
return self.graph
|
| 212 |
+
|
| 213 |
+
def run(self, query: str, workspace_id: str = "default") -> AgenticState:
|
| 214 |
+
if self.graph is None:
|
| 215 |
+
self.build()
|
| 216 |
+
print(f"\n🤖 AGENTIC RAG GRAPH: {query[:50]}...")
|
| 217 |
+
return self.graph.invoke({"query": query, "workspace_id": workspace_id})
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
class AnalysisGraph:
|
| 221 |
+
"""
|
| 222 |
+
Analysis Mode Graph
|
| 223 |
+
===================
|
| 224 |
+
Pipeline: Search → Analyze
|
| 225 |
+
|
| 226 |
+
Deep analysis with structured output format.
|
| 227 |
+
"""
|
| 228 |
+
|
| 229 |
+
def __init__(self):
|
| 230 |
+
self.search_node = AnalysisSearchNode()
|
| 231 |
+
self.process_node = AnalysisProcessNode()
|
| 232 |
+
self.graph = None
|
| 233 |
+
|
| 234 |
+
def build(self):
|
| 235 |
+
g = StateGraph(AnalysisState)
|
| 236 |
+
|
| 237 |
+
g.add_node("search", self.search_node.search)
|
| 238 |
+
g.add_node("analyze", self.process_node.analyze)
|
| 239 |
+
|
| 240 |
+
g.set_entry_point("search")
|
| 241 |
+
g.add_edge("search", "analyze")
|
| 242 |
+
g.add_edge("analyze", END)
|
| 243 |
+
|
| 244 |
+
self.graph = g.compile()
|
| 245 |
+
return self.graph
|
| 246 |
+
|
| 247 |
+
def run(self, query: str) -> AnalysisState:
|
| 248 |
+
if self.graph is None:
|
| 249 |
+
self.build()
|
| 250 |
+
print(f"\n📊 ANALYSIS GRAPH: {query[:50]}...")
|
| 251 |
+
return self.graph.invoke({"query": query})
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
class SummarizeGraph:
|
| 255 |
+
"""
|
| 256 |
+
Summarize Mode Graph
|
| 257 |
+
====================
|
| 258 |
+
Pipeline: Input → Summarize
|
| 259 |
+
|
| 260 |
+
Handles URL or search-based summarization.
|
| 261 |
+
"""
|
| 262 |
+
|
| 263 |
+
def __init__(self):
|
| 264 |
+
self.input_node = SummarizeInputNode()
|
| 265 |
+
self.process_node = SummarizeProcessNode()
|
| 266 |
+
self.graph = None
|
| 267 |
+
|
| 268 |
+
def build(self):
|
| 269 |
+
g = StateGraph(SummarizeState)
|
| 270 |
+
|
| 271 |
+
g.add_node("input", self.input_node.process_input)
|
| 272 |
+
g.add_node("summarize", self.process_node.summarize)
|
| 273 |
+
|
| 274 |
+
g.set_entry_point("input")
|
| 275 |
+
g.add_edge("input", "summarize")
|
| 276 |
+
g.add_edge("summarize", END)
|
| 277 |
+
|
| 278 |
+
self.graph = g.compile()
|
| 279 |
+
return self.graph
|
| 280 |
+
|
| 281 |
+
def run(self, query: str) -> SummarizeState:
|
| 282 |
+
if self.graph is None:
|
| 283 |
+
self.build()
|
| 284 |
+
print(f"\n📝 SUMMARIZE GRAPH: {query[:50]}...")
|
| 285 |
+
return self.graph.invoke({"query": query})
|
rag/rag_state.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict, Any, Optional
|
| 2 |
+
from typing_extensions import TypedDict
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class RAGState(TypedDict, total=False):
|
| 6 |
+
"""State object for deep research pipeline."""
|
| 7 |
+
|
| 8 |
+
question: str
|
| 9 |
+
sub_questions: List[str]
|
| 10 |
+
local_docs: List[Dict[str, Any]] # Serializable version of docs
|
| 11 |
+
web_pages: List[Dict] # {"title","url","content"}
|
| 12 |
+
evidence: List[str] # text snippets
|
| 13 |
+
draft_answers: List[str] # per sub-question
|
| 14 |
+
final_answer: str
|
| 15 |
+
sources: List[Dict] # {"title","url"}
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class WebSearchState(TypedDict, total=False):
|
| 19 |
+
"""State for Web Search mode graph."""
|
| 20 |
+
query: str
|
| 21 |
+
search_results: List[Dict]
|
| 22 |
+
web_pages: List[Dict]
|
| 23 |
+
context: str
|
| 24 |
+
answer: str
|
| 25 |
+
sources: List[Dict]
|
| 26 |
+
links: List[Dict]
|
| 27 |
+
images: List[Dict]
|
| 28 |
+
followups: List[str]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class RAGOnlyState(TypedDict, total=False):
|
| 32 |
+
"""State for RAG-only mode graph."""
|
| 33 |
+
query: str
|
| 34 |
+
workspace_id: str
|
| 35 |
+
file_chunks: List[Dict]
|
| 36 |
+
base_chunks: List[Dict]
|
| 37 |
+
context: str
|
| 38 |
+
answer: str
|
| 39 |
+
sources: List[Dict]
|
| 40 |
+
followups: List[str]
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class AgenticState(TypedDict, total=False):
|
| 44 |
+
"""State for Agentic RAG mode graph."""
|
| 45 |
+
query: str
|
| 46 |
+
workspace_id: str
|
| 47 |
+
|
| 48 |
+
# Planner outputs
|
| 49 |
+
use_file: bool
|
| 50 |
+
use_web: bool
|
| 51 |
+
use_images: bool
|
| 52 |
+
use_knowledge: bool
|
| 53 |
+
|
| 54 |
+
# Agent outputs
|
| 55 |
+
file_context: str
|
| 56 |
+
file_sources: List[Dict]
|
| 57 |
+
web_context: str
|
| 58 |
+
web_sources: List[Dict]
|
| 59 |
+
links: List[Dict]
|
| 60 |
+
knowledge_context: str
|
| 61 |
+
images: List[Dict]
|
| 62 |
+
|
| 63 |
+
# Synthesizer output
|
| 64 |
+
combined_context: str
|
| 65 |
+
answer: str
|
| 66 |
+
sources: List[Dict]
|
| 67 |
+
followups: List[str]
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class AnalysisState(TypedDict, total=False):
|
| 71 |
+
"""State for Analysis mode graph."""
|
| 72 |
+
query: str
|
| 73 |
+
web_results: List[Dict]
|
| 74 |
+
web_context: str
|
| 75 |
+
analysis: str
|
| 76 |
+
executive_summary: str
|
| 77 |
+
key_findings: List[str]
|
| 78 |
+
answer: str
|
| 79 |
+
sources: List[Dict]
|
| 80 |
+
links: List[Dict]
|
| 81 |
+
images: List[Dict]
|
| 82 |
+
followups: List[str]
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
class SummarizeState(TypedDict, total=False):
|
| 86 |
+
"""State for Summarize mode graph."""
|
| 87 |
+
query: str
|
| 88 |
+
is_url: bool
|
| 89 |
+
content: str
|
| 90 |
+
summary: str
|
| 91 |
+
answer: str
|
| 92 |
+
sources: List[Dict]
|
| 93 |
+
links: List[Dict]
|
| 94 |
+
followups: List[str]
|
| 95 |
+
|
rag/router.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from config.config import Config
|
| 3 |
+
|
| 4 |
+
class RouterAgent:
|
| 5 |
+
"""
|
| 6 |
+
Production-grade router exactly like Perplexity:
|
| 7 |
+
1. Rule-based fast routing
|
| 8 |
+
2. NER-based entity detection
|
| 9 |
+
3. Real-time classifier
|
| 10 |
+
4. LLM semantic classifier (handles ANY query)
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
def __init__(self):
|
| 14 |
+
self.llm = Config.get_llm()
|
| 15 |
+
|
| 16 |
+
# ---------------- FAST RULES ----------------
|
| 17 |
+
def contains(self, q, words):
|
| 18 |
+
q = q.lower()
|
| 19 |
+
return any(w in q for w in words)
|
| 20 |
+
|
| 21 |
+
def is_greeting(self, q):
|
| 22 |
+
q_low = q.lower().strip()
|
| 23 |
+
return q_low in ["hi", "hello", "hey", "yo", "sup", "hi there", "hello there"]
|
| 24 |
+
|
| 25 |
+
def is_image_query(self, q):
|
| 26 |
+
image_words = ["image", "photo", "pic", "picture", "logo", "wallpaper", "screenshot"]
|
| 27 |
+
return self.contains(q, image_words)
|
| 28 |
+
|
| 29 |
+
def is_realtime(self, q):
|
| 30 |
+
realtime = [
|
| 31 |
+
"today", "now", "latest", "current",
|
| 32 |
+
"price", "stock", "weather", "news",
|
| 33 |
+
"update", "live", "score", "match", "schedule"
|
| 34 |
+
]
|
| 35 |
+
return self.contains(q, realtime)
|
| 36 |
+
|
| 37 |
+
def is_world_fact(self, q):
|
| 38 |
+
patterns = [
|
| 39 |
+
"prime minister", "president", "capital of",
|
| 40 |
+
"ceo", "founder", "population", "richest",
|
| 41 |
+
"oldest", "largest", "smallest", "currency",
|
| 42 |
+
"country", "state", "city", "minister",
|
| 43 |
+
"government", "party"
|
| 44 |
+
]
|
| 45 |
+
return self.contains(q, patterns)
|
| 46 |
+
|
| 47 |
+
def is_ai_model(self, q):
|
| 48 |
+
ai_models = ["gpt", "gemini", "llama", "claude", "grok", "mistral", "phi"]
|
| 49 |
+
return self.contains(q, ai_models)
|
| 50 |
+
|
| 51 |
+
def is_definition(self, q):
|
| 52 |
+
q = q.lower()
|
| 53 |
+
return q.startswith(("what is", "define", "explain"))
|
| 54 |
+
|
| 55 |
+
def is_deep(self, q):
|
| 56 |
+
q = q.lower()
|
| 57 |
+
return any(x in q for x in [
|
| 58 |
+
"compare", "analysis", "impact", "advantages", "disadvantages",
|
| 59 |
+
"evaluate", "future", "strategy", "risk"
|
| 60 |
+
])
|
| 61 |
+
|
| 62 |
+
def is_entity(self, q):
|
| 63 |
+
"""Detects entities by uppercase words"""
|
| 64 |
+
words = q.split()
|
| 65 |
+
caps = [w for w in words if w[:1].isupper()]
|
| 66 |
+
return len(caps) >= 1
|
| 67 |
+
|
| 68 |
+
# ---------------- LLM CLASSIFIER ----------------
|
| 69 |
+
def llm_decide(self, q):
|
| 70 |
+
"""
|
| 71 |
+
FINAL DECISION MAKER.
|
| 72 |
+
If rules fail or query is unusual → LLM decides mode.
|
| 73 |
+
"""
|
| 74 |
+
system = {
|
| 75 |
+
"role": "system",
|
| 76 |
+
"content": """
|
| 77 |
+
Classify this query into exactly one mode:
|
| 78 |
+
|
| 79 |
+
- "web" → real-time facts, entities, news, people, companies, trending topics
|
| 80 |
+
- "rag" → definitions, conceptual explanations, structured factual info
|
| 81 |
+
- "llm" → normal chat, creative tasks, responses without external info
|
| 82 |
+
- "deep_research" → multi-step analysis, long reports, deep comparisons
|
| 83 |
+
|
| 84 |
+
Return ONLY one word: web, rag, llm, or deep_research.
|
| 85 |
+
"""
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
user = {"role": "user", "content": q}
|
| 89 |
+
|
| 90 |
+
resp = self.llm.invoke([system, user]).content.strip().lower()
|
| 91 |
+
if resp in ["web", "rag", "llm", "deep_research"]:
|
| 92 |
+
return resp
|
| 93 |
+
return "llm"
|
| 94 |
+
|
| 95 |
+
# ---------------- FINAL ROUTER ----------------
|
| 96 |
+
def route(self, q: str) -> str:
|
| 97 |
+
q = q.strip()
|
| 98 |
+
|
| 99 |
+
# LAYER 1 — FAST RULES
|
| 100 |
+
if self.is_greeting(q): return "llm"
|
| 101 |
+
if self.is_image_query(q): return "image"
|
| 102 |
+
if self.is_realtime(q): return "web"
|
| 103 |
+
if self.is_world_fact(q): return "web"
|
| 104 |
+
if self.is_ai_model(q): return "web"
|
| 105 |
+
|
| 106 |
+
# Short entity queries (1-2 words) → web
|
| 107 |
+
if len(q.split()) <= 2 and self.is_entity(q): return "web"
|
| 108 |
+
|
| 109 |
+
if self.is_deep(q): return "deep_research"
|
| 110 |
+
if self.is_definition(q): return "rag"
|
| 111 |
+
|
| 112 |
+
# LAYER 2 — LLM SEMANTIC CLASSIFICATION
|
| 113 |
+
return self.llm_decide(q)
|
requirements.txt
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core LLM + tools
|
| 2 |
+
langchain
|
| 3 |
+
langchain-core
|
| 4 |
+
langchain-community
|
| 5 |
+
langgraph
|
| 6 |
+
|
| 7 |
+
# LLM providers (Groq via LangChain)
|
| 8 |
+
langchain-groq
|
| 9 |
+
|
| 10 |
+
# Web API backend
|
| 11 |
+
fastapi
|
| 12 |
+
uvicorn[standard]
|
| 13 |
+
pydantic
|
| 14 |
+
python-dotenv
|
| 15 |
+
|
| 16 |
+
# Embeddings + vector search
|
| 17 |
+
sentence-transformers
|
| 18 |
+
faiss-cpu
|
| 19 |
+
|
| 20 |
+
# Web search + HTTP
|
| 21 |
+
requests
|
| 22 |
+
tavily-python
|
| 23 |
+
|
| 24 |
+
# Scraping
|
| 25 |
+
trafilatura
|
| 26 |
+
beautifulsoup4
|
| 27 |
+
|
| 28 |
+
# Wikipedia tool dependency
|
| 29 |
+
wikipedia
|
| 30 |
+
|
| 31 |
+
# PDF/text load support
|
| 32 |
+
pypdf
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,709 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import requests
|
| 3 |
+
from urllib.parse import urlparse
|
| 4 |
+
|
| 5 |
+
# =====================================
|
| 6 |
+
# PAGE CONFIG
|
| 7 |
+
# =====================================
|
| 8 |
+
st.set_page_config(
|
| 9 |
+
page_title="Perplexity AI Clone",
|
| 10 |
+
page_icon="🔍",
|
| 11 |
+
layout="wide",
|
| 12 |
+
initial_sidebar_state="collapsed"
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
# =====================================
|
| 16 |
+
# SESSION STATE
|
| 17 |
+
# =====================================
|
| 18 |
+
if "messages" not in st.session_state:
|
| 19 |
+
st.session_state.messages = []
|
| 20 |
+
if "mode" not in st.session_state:
|
| 21 |
+
st.session_state.mode = "Automatic"
|
| 22 |
+
if "current_result" not in st.session_state:
|
| 23 |
+
st.session_state.current_result = None
|
| 24 |
+
if "theme" not in st.session_state:
|
| 25 |
+
st.session_state.theme = "dark"
|
| 26 |
+
if "uploaded_files" not in st.session_state:
|
| 27 |
+
st.session_state.uploaded_files = []
|
| 28 |
+
if "show_upload" not in st.session_state:
|
| 29 |
+
st.session_state.show_upload = False
|
| 30 |
+
|
| 31 |
+
# =====================================
|
| 32 |
+
# CONFIGURATION
|
| 33 |
+
# =====================================
|
| 34 |
+
API_URL = "http://localhost:8000"
|
| 35 |
+
WORKSPACE = "default"
|
| 36 |
+
|
| 37 |
+
# MODE MAPPING - All 8 modes with correct backend endpoints
|
| 38 |
+
MODES = {
|
| 39 |
+
"Automatic": {
|
| 40 |
+
"icon": "🔍",
|
| 41 |
+
"desc": "Auto-routes to best mode",
|
| 42 |
+
"endpoint": "/api/chat"
|
| 43 |
+
},
|
| 44 |
+
"Web Search": {
|
| 45 |
+
"icon": "🌐",
|
| 46 |
+
"desc": "Real-time web search",
|
| 47 |
+
"endpoint": "/api/web"
|
| 48 |
+
},
|
| 49 |
+
"RAG": {
|
| 50 |
+
"icon": "📚",
|
| 51 |
+
"desc": "Search uploaded documents",
|
| 52 |
+
"endpoint": "/api/rag"
|
| 53 |
+
},
|
| 54 |
+
"Agentic": {
|
| 55 |
+
"icon": "🤖",
|
| 56 |
+
"desc": "Multi-agent collaboration",
|
| 57 |
+
"endpoint": "/api/agentic"
|
| 58 |
+
},
|
| 59 |
+
"Deep Research": {
|
| 60 |
+
"icon": "🧠",
|
| 61 |
+
"desc": "In-depth research",
|
| 62 |
+
"endpoint": "/api/deep_research"
|
| 63 |
+
},
|
| 64 |
+
"Analysis": {
|
| 65 |
+
"icon": "📊",
|
| 66 |
+
"desc": "Deep data analysis",
|
| 67 |
+
"endpoint": "/api/analyze"
|
| 68 |
+
},
|
| 69 |
+
"Summarize": {
|
| 70 |
+
"icon": "📝",
|
| 71 |
+
"desc": "Summarize content",
|
| 72 |
+
"endpoint": "/api/summarize"
|
| 73 |
+
},
|
| 74 |
+
"Chat": {
|
| 75 |
+
"icon": "💬",
|
| 76 |
+
"desc": "Direct AI chat",
|
| 77 |
+
"endpoint": "/api/focus"
|
| 78 |
+
},
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
# =====================================
|
| 82 |
+
# CSS - PERPLEXITY EXACT STYLE
|
| 83 |
+
# =====================================
|
| 84 |
+
def get_css():
|
| 85 |
+
is_dark = st.session_state.theme == "dark"
|
| 86 |
+
|
| 87 |
+
if is_dark:
|
| 88 |
+
colors = {
|
| 89 |
+
"bg": "#191A1A",
|
| 90 |
+
"bg2": "#1F2020",
|
| 91 |
+
"bg3": "#2A2B2B",
|
| 92 |
+
"text": "#ECECEC",
|
| 93 |
+
"text2": "#A1A1A1",
|
| 94 |
+
"muted": "#6B6B6B",
|
| 95 |
+
"accent": "#20B8CD",
|
| 96 |
+
"border": "#3A3B3B",
|
| 97 |
+
"success": "#22C55E"
|
| 98 |
+
}
|
| 99 |
+
else:
|
| 100 |
+
colors = {
|
| 101 |
+
"bg": "#FFFFFF",
|
| 102 |
+
"bg2": "#F7F7F8",
|
| 103 |
+
"bg3": "#EEEEEF",
|
| 104 |
+
"text": "#1A1A1A",
|
| 105 |
+
"text2": "#666666",
|
| 106 |
+
"muted": "#999999",
|
| 107 |
+
"accent": "#0EA5E9",
|
| 108 |
+
"border": "#E5E5E5",
|
| 109 |
+
"success": "#22C55E"
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
return f"""
|
| 113 |
+
<style>
|
| 114 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&display=swap');
|
| 115 |
+
|
| 116 |
+
* {{ font-family: 'Inter', sans-serif !important; }}
|
| 117 |
+
|
| 118 |
+
#MainMenu, footer, header, [data-testid="stToolbar"], .stDeployButton {{ display: none !important; }}
|
| 119 |
+
|
| 120 |
+
.stApp {{ background: {colors['bg']} !important; }}
|
| 121 |
+
|
| 122 |
+
[data-testid="stSidebar"] {{
|
| 123 |
+
background: {colors['bg']} !important;
|
| 124 |
+
border-right: 1px solid {colors['border']} !important;
|
| 125 |
+
}}
|
| 126 |
+
|
| 127 |
+
/* Hero */
|
| 128 |
+
.hero {{
|
| 129 |
+
text-align: center;
|
| 130 |
+
padding: 30px 0 15px;
|
| 131 |
+
}}
|
| 132 |
+
.hero-compact {{
|
| 133 |
+
text-align: center;
|
| 134 |
+
padding: 15px 0 10px;
|
| 135 |
+
}}
|
| 136 |
+
.hero-compact .logo {{
|
| 137 |
+
font-size: 28px;
|
| 138 |
+
}}
|
| 139 |
+
.hero-compact .tagline {{
|
| 140 |
+
display: none;
|
| 141 |
+
}}
|
| 142 |
+
.logo {{
|
| 143 |
+
font-size: 40px;
|
| 144 |
+
font-weight: 600;
|
| 145 |
+
color: {colors['text']};
|
| 146 |
+
letter-spacing: -1px;
|
| 147 |
+
}}
|
| 148 |
+
.logo span {{
|
| 149 |
+
background: linear-gradient(135deg, {colors['accent']}, #14B8A6);
|
| 150 |
+
-webkit-background-clip: text;
|
| 151 |
+
-webkit-text-fill-color: transparent;
|
| 152 |
+
}}
|
| 153 |
+
.tagline {{
|
| 154 |
+
color: {colors['muted']};
|
| 155 |
+
font-size: 14px;
|
| 156 |
+
margin-top: 5px;
|
| 157 |
+
}}
|
| 158 |
+
|
| 159 |
+
/* UNIFIED SEARCH BOX - All elements inside */
|
| 160 |
+
.search-wrapper {{
|
| 161 |
+
max-width: 800px;
|
| 162 |
+
margin: 0 auto;
|
| 163 |
+
padding: 0 20px;
|
| 164 |
+
}}
|
| 165 |
+
|
| 166 |
+
/* Hide streamlit defaults */
|
| 167 |
+
.stTextInput > div > div {{
|
| 168 |
+
background: {colors['bg2']} !important;
|
| 169 |
+
border: 1px solid {colors['border']} !important;
|
| 170 |
+
border-radius: 25px !important;
|
| 171 |
+
}}
|
| 172 |
+
.stTextInput input {{
|
| 173 |
+
background: transparent !important;
|
| 174 |
+
border: none !important;
|
| 175 |
+
color: {colors['text']} !important;
|
| 176 |
+
font-size: 15px !important;
|
| 177 |
+
padding: 12px 16px !important;
|
| 178 |
+
}}
|
| 179 |
+
.stTextInput input::placeholder {{
|
| 180 |
+
color: {colors['muted']} !important;
|
| 181 |
+
}}
|
| 182 |
+
.stTextInput label {{ display: none !important; }}
|
| 183 |
+
|
| 184 |
+
.stSelectbox > div > div {{
|
| 185 |
+
background: {colors['bg3']} !important;
|
| 186 |
+
border: 1px solid {colors['border']} !important;
|
| 187 |
+
border-radius: 18px !important;
|
| 188 |
+
}}
|
| 189 |
+
.stSelectbox [data-baseweb="select"] > div {{
|
| 190 |
+
background: {colors['bg3']} !important;
|
| 191 |
+
border: none !important;
|
| 192 |
+
}}
|
| 193 |
+
.stSelectbox [data-baseweb="select"] > div > div {{
|
| 194 |
+
color: {colors['text']} !important;
|
| 195 |
+
}}
|
| 196 |
+
/* Dropdown menu styling */
|
| 197 |
+
[data-baseweb="popover"] {{
|
| 198 |
+
background: {colors['bg2']} !important;
|
| 199 |
+
border: 1px solid {colors['border']} !important;
|
| 200 |
+
border-radius: 12px !important;
|
| 201 |
+
}}
|
| 202 |
+
[data-baseweb="menu"] {{
|
| 203 |
+
background: {colors['bg2']} !important;
|
| 204 |
+
}}
|
| 205 |
+
[data-baseweb="menu"] li {{
|
| 206 |
+
background: {colors['bg2']} !important;
|
| 207 |
+
color: {colors['text']} !important;
|
| 208 |
+
}}
|
| 209 |
+
[data-baseweb="menu"] li:hover {{
|
| 210 |
+
background: {colors['bg3']} !important;
|
| 211 |
+
}}
|
| 212 |
+
.stSelectbox label {{ display: none !important; }}
|
| 213 |
+
|
| 214 |
+
/* Buttons - theme aware */
|
| 215 |
+
.stButton > button {{
|
| 216 |
+
background: {colors['bg2']} !important;
|
| 217 |
+
border: 1px solid {colors['border']} !important;
|
| 218 |
+
border-radius: 12px !important;
|
| 219 |
+
color: {colors['text']} !important;
|
| 220 |
+
font-size: 16px !important;
|
| 221 |
+
padding: 8px 16px !important;
|
| 222 |
+
transition: all 0.2s !important;
|
| 223 |
+
}}
|
| 224 |
+
.stButton > button:hover {{
|
| 225 |
+
background: {colors['accent']} !important;
|
| 226 |
+
color: white !important;
|
| 227 |
+
border-color: {colors['accent']} !important;
|
| 228 |
+
}}
|
| 229 |
+
.stButton > button:active {{
|
| 230 |
+
background: {colors['accent']} !important;
|
| 231 |
+
}}
|
| 232 |
+
|
| 233 |
+
/* Form submit button */
|
| 234 |
+
.stFormSubmitButton > button {{
|
| 235 |
+
background: {colors['bg3']} !important;
|
| 236 |
+
border: 1px solid {colors['border']} !important;
|
| 237 |
+
border-radius: 20px !important;
|
| 238 |
+
color: {colors['text']} !important;
|
| 239 |
+
}}
|
| 240 |
+
.stFormSubmitButton > button:hover {{
|
| 241 |
+
background: {colors['accent']} !important;
|
| 242 |
+
color: white !important;
|
| 243 |
+
border-color: {colors['accent']} !important;
|
| 244 |
+
}}
|
| 245 |
+
|
| 246 |
+
/* File uploader styling - COMPLETE FIX */
|
| 247 |
+
.stFileUploader {{
|
| 248 |
+
max-width: 600px;
|
| 249 |
+
margin: 10px auto;
|
| 250 |
+
}}
|
| 251 |
+
.stFileUploader > div {{
|
| 252 |
+
background: transparent !important;
|
| 253 |
+
}}
|
| 254 |
+
.stFileUploader > div > div {{
|
| 255 |
+
background: transparent !important;
|
| 256 |
+
}}
|
| 257 |
+
.stFileUploader [data-testid="stFileUploaderDropzone"] {{
|
| 258 |
+
background: {colors['bg2']} !important;
|
| 259 |
+
border: 2px dashed {colors['border']} !important;
|
| 260 |
+
border-radius: 12px !important;
|
| 261 |
+
padding: 20px !important;
|
| 262 |
+
}}
|
| 263 |
+
.stFileUploader [data-testid="stFileUploaderDropzone"]:hover {{
|
| 264 |
+
border-color: {colors['accent']} !important;
|
| 265 |
+
}}
|
| 266 |
+
/* All text inside dropzone */
|
| 267 |
+
.stFileUploader [data-testid="stFileUploaderDropzone"] * {{
|
| 268 |
+
color: {colors['text']} !important;
|
| 269 |
+
}}
|
| 270 |
+
.stFileUploader [data-testid="stFileUploaderDropzone"] span {{
|
| 271 |
+
color: {colors['text']} !important;
|
| 272 |
+
}}
|
| 273 |
+
.stFileUploader [data-testid="stFileUploaderDropzone"] p {{
|
| 274 |
+
color: {colors['text']} !important;
|
| 275 |
+
}}
|
| 276 |
+
.stFileUploader [data-testid="stFileUploaderDropzone"] small {{
|
| 277 |
+
color: {colors['text2']} !important;
|
| 278 |
+
}}
|
| 279 |
+
.stFileUploader [data-testid="stFileUploaderDropzone"] svg {{
|
| 280 |
+
fill: {colors['text2']} !important;
|
| 281 |
+
stroke: {colors['text2']} !important;
|
| 282 |
+
}}
|
| 283 |
+
.stFileUploader [data-testid="stFileUploaderDropzone"] button {{
|
| 284 |
+
background: {colors['accent']} !important;
|
| 285 |
+
color: white !important;
|
| 286 |
+
border: none !important;
|
| 287 |
+
border-radius: 8px !important;
|
| 288 |
+
}}
|
| 289 |
+
.stFileUploader label {{
|
| 290 |
+
color: {colors['text']} !important;
|
| 291 |
+
font-size: 14px !important;
|
| 292 |
+
}}
|
| 293 |
+
.stFileUploader > section {{
|
| 294 |
+
background: transparent !important;
|
| 295 |
+
border: none !important;
|
| 296 |
+
}}
|
| 297 |
+
.stFileUploader > section > div {{
|
| 298 |
+
background: transparent !important;
|
| 299 |
+
}}
|
| 300 |
+
|
| 301 |
+
/* Answer box */
|
| 302 |
+
.answer-box {{
|
| 303 |
+
background: {colors['bg2']};
|
| 304 |
+
border: 1px solid {colors['border']};
|
| 305 |
+
border-radius: 16px;
|
| 306 |
+
padding: 24px;
|
| 307 |
+
color: {colors['text']};
|
| 308 |
+
font-size: 15px;
|
| 309 |
+
line-height: 1.8;
|
| 310 |
+
}}
|
| 311 |
+
|
| 312 |
+
/* Source cards */
|
| 313 |
+
.source-card {{
|
| 314 |
+
background: {colors['bg3']};
|
| 315 |
+
border: 1px solid {colors['border']};
|
| 316 |
+
border-radius: 10px;
|
| 317 |
+
padding: 12px;
|
| 318 |
+
margin-bottom: 8px;
|
| 319 |
+
transition: all 0.2s;
|
| 320 |
+
}}
|
| 321 |
+
.source-card:hover {{
|
| 322 |
+
border-color: {colors['accent']};
|
| 323 |
+
}}
|
| 324 |
+
.source-title {{
|
| 325 |
+
color: {colors['accent']};
|
| 326 |
+
font-size: 13px;
|
| 327 |
+
font-weight: 500;
|
| 328 |
+
text-decoration: none;
|
| 329 |
+
}}
|
| 330 |
+
.source-domain {{
|
| 331 |
+
color: {colors['muted']};
|
| 332 |
+
font-size: 11px;
|
| 333 |
+
}}
|
| 334 |
+
|
| 335 |
+
/* Query display */
|
| 336 |
+
.query-box {{
|
| 337 |
+
background: {colors['bg2']};
|
| 338 |
+
border: 1px solid {colors['border']};
|
| 339 |
+
border-radius: 12px;
|
| 340 |
+
padding: 16px;
|
| 341 |
+
margin: 15px 0;
|
| 342 |
+
}}
|
| 343 |
+
.query-text {{
|
| 344 |
+
color: {colors['text']};
|
| 345 |
+
font-size: 17px;
|
| 346 |
+
font-weight: 500;
|
| 347 |
+
}}
|
| 348 |
+
.query-mode {{
|
| 349 |
+
color: {colors['accent']};
|
| 350 |
+
font-size: 12px;
|
| 351 |
+
margin-top: 6px;
|
| 352 |
+
}}
|
| 353 |
+
|
| 354 |
+
/* Tabs */
|
| 355 |
+
.stTabs [data-baseweb="tab-list"] {{
|
| 356 |
+
background: transparent !important;
|
| 357 |
+
border-bottom: 1px solid {colors['border']} !important;
|
| 358 |
+
gap: 0 !important;
|
| 359 |
+
}}
|
| 360 |
+
.stTabs [data-baseweb="tab"] {{
|
| 361 |
+
background: transparent !important;
|
| 362 |
+
color: {colors['text2']} !important;
|
| 363 |
+
}}
|
| 364 |
+
.stTabs [data-baseweb="tab"][aria-selected="true"] {{
|
| 365 |
+
color: {colors['accent']} !important;
|
| 366 |
+
border-bottom-color: {colors['accent']} !important;
|
| 367 |
+
}}
|
| 368 |
+
.stTabs [data-baseweb="tab-panel"] {{
|
| 369 |
+
padding-top: 1rem !important;
|
| 370 |
+
}}
|
| 371 |
+
|
| 372 |
+
/* Answer text styling */
|
| 373 |
+
.stTabs [data-testid="stMarkdownContainer"] {{
|
| 374 |
+
color: {colors['text']} !important;
|
| 375 |
+
font-size: 15px !important;
|
| 376 |
+
line-height: 1.7 !important;
|
| 377 |
+
}}
|
| 378 |
+
|
| 379 |
+
/* Mode desc text */
|
| 380 |
+
.mode-desc {{
|
| 381 |
+
text-align: center;
|
| 382 |
+
color: {colors['muted']};
|
| 383 |
+
font-size: 12px;
|
| 384 |
+
margin-top: 8px;
|
| 385 |
+
}}
|
| 386 |
+
|
| 387 |
+
/* Column spacing fix */
|
| 388 |
+
[data-testid="column"] {{ padding: 0 2px !important; }}
|
| 389 |
+
|
| 390 |
+
/* Expander styling */
|
| 391 |
+
.streamlit-expanderHeader {{
|
| 392 |
+
background: {colors['bg3']} !important;
|
| 393 |
+
border: 1px solid {colors['border']} !important;
|
| 394 |
+
border-radius: 8px !important;
|
| 395 |
+
color: {colors['text']} !important;
|
| 396 |
+
}}
|
| 397 |
+
.streamlit-expanderContent {{
|
| 398 |
+
background: {colors['bg2']} !important;
|
| 399 |
+
border: 1px solid {colors['border']} !important;
|
| 400 |
+
border-top: none !important;
|
| 401 |
+
border-radius: 0 0 8px 8px !important;
|
| 402 |
+
color: {colors['text']} !important;
|
| 403 |
+
}}
|
| 404 |
+
[data-testid="stExpander"] {{
|
| 405 |
+
background: {colors['bg2']} !important;
|
| 406 |
+
border: 1px solid {colors['border']} !important;
|
| 407 |
+
border-radius: 8px !important;
|
| 408 |
+
}}
|
| 409 |
+
[data-testid="stExpander"] summary {{
|
| 410 |
+
color: {colors['text']} !important;
|
| 411 |
+
}}
|
| 412 |
+
[data-testid="stExpander"] [data-testid="stMarkdownContainer"] {{
|
| 413 |
+
color: {colors['text']} !important;
|
| 414 |
+
}}
|
| 415 |
+
|
| 416 |
+
/* Spinner and alerts */
|
| 417 |
+
.stSpinner > div {{
|
| 418 |
+
border-color: {colors['accent']} !important;
|
| 419 |
+
}}
|
| 420 |
+
.stAlert {{
|
| 421 |
+
background: {colors['bg2']} !important;
|
| 422 |
+
color: {colors['text']} !important;
|
| 423 |
+
border: 1px solid {colors['border']} !important;
|
| 424 |
+
}}
|
| 425 |
+
|
| 426 |
+
/* Caption text */
|
| 427 |
+
.stCaption, [data-testid="stCaptionContainer"] {{
|
| 428 |
+
color: {colors['text2']} !important;
|
| 429 |
+
}}
|
| 430 |
+
|
| 431 |
+
/* Divider */
|
| 432 |
+
hr {{
|
| 433 |
+
border-color: {colors['border']} !important;
|
| 434 |
+
}}
|
| 435 |
+
</style>
|
| 436 |
+
"""
|
| 437 |
+
|
| 438 |
+
st.markdown(get_css(), unsafe_allow_html=True)
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
# =====================================
|
| 442 |
+
# HELPER FUNCTIONS
|
| 443 |
+
# =====================================
|
| 444 |
+
def call_api(query: str, mode: str):
|
| 445 |
+
"""Call backend API based on selected mode."""
|
| 446 |
+
mode_config = MODES.get(mode, MODES["Automatic"])
|
| 447 |
+
endpoint = mode_config["endpoint"]
|
| 448 |
+
|
| 449 |
+
payload = {
|
| 450 |
+
"message": query,
|
| 451 |
+
"workspace_id": WORKSPACE,
|
| 452 |
+
"mode": mode.lower().replace(" ", "_")
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
try:
|
| 456 |
+
response = requests.post(f"{API_URL}{endpoint}", json=payload, timeout=180)
|
| 457 |
+
return response.json()
|
| 458 |
+
except Exception as e:
|
| 459 |
+
return {
|
| 460 |
+
"answer": f"Error: {str(e)}",
|
| 461 |
+
"sources": [],
|
| 462 |
+
"links": [],
|
| 463 |
+
"images": [],
|
| 464 |
+
"followups": []
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
|
| 468 |
+
def upload_files(files):
|
| 469 |
+
"""Upload files to backend."""
|
| 470 |
+
if not files:
|
| 471 |
+
return False
|
| 472 |
+
|
| 473 |
+
files_payload = [
|
| 474 |
+
("files", (f.name, f.getvalue(), f.type or "application/octet-stream"))
|
| 475 |
+
for f in files
|
| 476 |
+
]
|
| 477 |
+
|
| 478 |
+
try:
|
| 479 |
+
r = requests.post(
|
| 480 |
+
f"{API_URL}/api/upload_docs",
|
| 481 |
+
data={"workspace_id": WORKSPACE},
|
| 482 |
+
files=files_payload,
|
| 483 |
+
timeout=60
|
| 484 |
+
)
|
| 485 |
+
return r.ok
|
| 486 |
+
except:
|
| 487 |
+
return False
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
def get_domain(url: str) -> str:
|
| 491 |
+
try:
|
| 492 |
+
return urlparse(url).netloc.replace('www.', '')
|
| 493 |
+
except:
|
| 494 |
+
return url[:30]
|
| 495 |
+
|
| 496 |
+
|
| 497 |
+
# =====================================
|
| 498 |
+
# THEME TOGGLE
|
| 499 |
+
# =====================================
|
| 500 |
+
col_spacer, col_theme = st.columns([12, 1])
|
| 501 |
+
with col_theme:
|
| 502 |
+
theme_icon = "🌙" if st.session_state.theme == "dark" else "☀️"
|
| 503 |
+
if st.button(theme_icon, key="theme_toggle"):
|
| 504 |
+
st.session_state.theme = "light" if st.session_state.theme == "dark" else "dark"
|
| 505 |
+
st.rerun()
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
# =====================================
|
| 509 |
+
# HERO - Always show
|
| 510 |
+
# =====================================
|
| 511 |
+
if st.session_state.current_result:
|
| 512 |
+
# Compact version when showing results
|
| 513 |
+
st.markdown("""
|
| 514 |
+
<div class="hero-compact">
|
| 515 |
+
<div class="logo">perplexity<span>clone</span></div>
|
| 516 |
+
</div>
|
| 517 |
+
""", unsafe_allow_html=True)
|
| 518 |
+
else:
|
| 519 |
+
# Full version on home
|
| 520 |
+
st.markdown("""
|
| 521 |
+
<div class="hero">
|
| 522 |
+
<div class="logo">perplexity<span>clone</span></div>
|
| 523 |
+
<div class="tagline">Where knowledge begins</div>
|
| 524 |
+
</div>
|
| 525 |
+
""", unsafe_allow_html=True)
|
| 526 |
+
|
| 527 |
+
|
| 528 |
+
# =====================================
|
| 529 |
+
# UNIFIED SEARCH BOX (All elements inside)
|
| 530 |
+
# =====================================
|
| 531 |
+
st.markdown('<div class="search-wrapper">', unsafe_allow_html=True)
|
| 532 |
+
|
| 533 |
+
# Single row with everything inside
|
| 534 |
+
col1, col2, col3, col4 = st.columns([2, 8, 1, 1])
|
| 535 |
+
|
| 536 |
+
with col1:
|
| 537 |
+
# Mode selector dropdown
|
| 538 |
+
mode_list = list(MODES.keys())
|
| 539 |
+
current_idx = mode_list.index(st.session_state.mode)
|
| 540 |
+
selected = st.selectbox(
|
| 541 |
+
"mode",
|
| 542 |
+
mode_list,
|
| 543 |
+
index=current_idx,
|
| 544 |
+
format_func=lambda x: f"{MODES[x]['icon']} {x}",
|
| 545 |
+
label_visibility="collapsed",
|
| 546 |
+
key="mode_select"
|
| 547 |
+
)
|
| 548 |
+
if selected != st.session_state.mode:
|
| 549 |
+
st.session_state.mode = selected
|
| 550 |
+
st.rerun()
|
| 551 |
+
|
| 552 |
+
with col2:
|
| 553 |
+
# Search input
|
| 554 |
+
query = st.text_input(
|
| 555 |
+
"search",
|
| 556 |
+
placeholder="Ask anything...",
|
| 557 |
+
label_visibility="collapsed",
|
| 558 |
+
key="query_input"
|
| 559 |
+
)
|
| 560 |
+
|
| 561 |
+
with col3:
|
| 562 |
+
# File upload icon button - toggles file picker
|
| 563 |
+
if st.button("📎", key="attach_btn", help="Upload files"):
|
| 564 |
+
st.session_state.show_upload = not st.session_state.show_upload
|
| 565 |
+
|
| 566 |
+
with col4:
|
| 567 |
+
# Submit button
|
| 568 |
+
submit = st.button("→", key="submit_btn", help="Search")
|
| 569 |
+
|
| 570 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
| 571 |
+
|
| 572 |
+
# Mode description
|
| 573 |
+
st.markdown(f'<div class="mode-desc">{MODES[st.session_state.mode]["icon"]} {st.session_state.mode}: {MODES[st.session_state.mode]["desc"]}</div>', unsafe_allow_html=True)
|
| 574 |
+
|
| 575 |
+
# Show file uploader when icon is clicked
|
| 576 |
+
if st.session_state.show_upload:
|
| 577 |
+
uploaded = st.file_uploader(
|
| 578 |
+
"Upload documents (PDF, TXT, MD, PPTX)",
|
| 579 |
+
type=["pdf", "txt", "md", "pptx"],
|
| 580 |
+
accept_multiple_files=True,
|
| 581 |
+
key="file_uploader"
|
| 582 |
+
)
|
| 583 |
+
|
| 584 |
+
if uploaded:
|
| 585 |
+
with st.spinner("📤 Uploading..."):
|
| 586 |
+
if upload_files(uploaded):
|
| 587 |
+
new_files = [f.name for f in uploaded if f.name not in st.session_state.uploaded_files]
|
| 588 |
+
if new_files:
|
| 589 |
+
st.session_state.uploaded_files.extend(new_files)
|
| 590 |
+
st.success(f"✅ {len(new_files)} file(s) uploaded!")
|
| 591 |
+
st.session_state.show_upload = False
|
| 592 |
+
st.rerun()
|
| 593 |
+
|
| 594 |
+
# Show uploaded files count
|
| 595 |
+
if st.session_state.uploaded_files:
|
| 596 |
+
st.caption(f"📁 {len(st.session_state.uploaded_files)} file(s) ready for RAG")
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
# =====================================
|
| 600 |
+
# HANDLE SEARCH
|
| 601 |
+
# =====================================
|
| 602 |
+
if submit and query.strip():
|
| 603 |
+
with st.spinner(f"🔄 {st.session_state.mode}..."):
|
| 604 |
+
result = call_api(query.strip(), st.session_state.mode)
|
| 605 |
+
st.session_state.current_result = {
|
| 606 |
+
"query": query.strip(),
|
| 607 |
+
"mode": st.session_state.mode,
|
| 608 |
+
"data": result
|
| 609 |
+
}
|
| 610 |
+
st.rerun()
|
| 611 |
+
|
| 612 |
+
|
| 613 |
+
# =====================================
|
| 614 |
+
# DISPLAY RESULTS
|
| 615 |
+
# =====================================
|
| 616 |
+
if st.session_state.current_result:
|
| 617 |
+
result = st.session_state.current_result
|
| 618 |
+
data = result["data"]
|
| 619 |
+
|
| 620 |
+
st.divider()
|
| 621 |
+
|
| 622 |
+
# Query box
|
| 623 |
+
mode_info = MODES.get(result['mode'], MODES['Automatic'])
|
| 624 |
+
st.markdown(f"""
|
| 625 |
+
<div class="query-box">
|
| 626 |
+
<div class="query-text">{result['query']}</div>
|
| 627 |
+
<div class="query-mode">{mode_info['icon']} {result['mode']}</div>
|
| 628 |
+
</div>
|
| 629 |
+
""", unsafe_allow_html=True)
|
| 630 |
+
|
| 631 |
+
# Sources count
|
| 632 |
+
sources = data.get("sources", []) or data.get("links", [])
|
| 633 |
+
if sources:
|
| 634 |
+
st.success(f"✓ {len(sources)} sources")
|
| 635 |
+
|
| 636 |
+
# Layout - Full width (removed duplicate sidebar sources)
|
| 637 |
+
tabs = st.tabs(["✨ Answer", "🔗 Sources", "🖼️ Images"])
|
| 638 |
+
|
| 639 |
+
with tabs[0]:
|
| 640 |
+
answer = data.get("answer", "No answer.")
|
| 641 |
+
|
| 642 |
+
# Display answer directly with markdown
|
| 643 |
+
st.markdown(answer)
|
| 644 |
+
|
| 645 |
+
followups = data.get("followups", [])
|
| 646 |
+
if followups:
|
| 647 |
+
st.markdown("**Related:**")
|
| 648 |
+
for i, fu in enumerate(followups[:3]):
|
| 649 |
+
if st.button(f"→ {fu}", key=f"fu_{i}"):
|
| 650 |
+
with st.spinner("..."):
|
| 651 |
+
new_result = call_api(fu, st.session_state.mode)
|
| 652 |
+
st.session_state.current_result = {
|
| 653 |
+
"query": fu,
|
| 654 |
+
"mode": st.session_state.mode,
|
| 655 |
+
"data": new_result
|
| 656 |
+
}
|
| 657 |
+
st.rerun()
|
| 658 |
+
|
| 659 |
+
with tabs[1]:
|
| 660 |
+
links = data.get("links", [])
|
| 661 |
+
if links:
|
| 662 |
+
for link in links:
|
| 663 |
+
st.markdown(f"""
|
| 664 |
+
<div class="source-card">
|
| 665 |
+
<a href="{link.get('url','#')}" target="_blank" class="source-title">{link.get('title','Source')}</a>
|
| 666 |
+
<div class="source-domain">{get_domain(link.get('url',''))}</div>
|
| 667 |
+
</div>
|
| 668 |
+
""", unsafe_allow_html=True)
|
| 669 |
+
else:
|
| 670 |
+
st.info("No sources")
|
| 671 |
+
|
| 672 |
+
with tabs[2]:
|
| 673 |
+
images = data.get("images", [])
|
| 674 |
+
if images:
|
| 675 |
+
cols = st.columns(3)
|
| 676 |
+
for i, img in enumerate(images[:9]):
|
| 677 |
+
url = img.get("url") or img.get("thumbnail_url")
|
| 678 |
+
if url:
|
| 679 |
+
with cols[i % 3]:
|
| 680 |
+
st.image(url, use_container_width=True)
|
| 681 |
+
else:
|
| 682 |
+
st.info("No images")
|
| 683 |
+
|
| 684 |
+
|
| 685 |
+
# =====================================
|
| 686 |
+
# SIDEBAR (for settings)
|
| 687 |
+
# =====================================
|
| 688 |
+
with st.sidebar:
|
| 689 |
+
st.markdown("### ⚙️ Settings")
|
| 690 |
+
st.divider()
|
| 691 |
+
|
| 692 |
+
if st.button("🗑️ Clear Chat", use_container_width=True):
|
| 693 |
+
st.session_state.current_result = None
|
| 694 |
+
st.session_state.messages = []
|
| 695 |
+
st.rerun()
|
| 696 |
+
|
| 697 |
+
if st.button("🗑️ Clear Files", use_container_width=True):
|
| 698 |
+
st.session_state.uploaded_files = []
|
| 699 |
+
st.info("Files cleared")
|
| 700 |
+
|
| 701 |
+
st.divider()
|
| 702 |
+
st.caption(f"Theme: {'🌙 Dark' if st.session_state.theme == 'dark' else '☀️ Light'}")
|
| 703 |
+
st.caption(f"Mode: {st.session_state.mode}")
|
| 704 |
+
|
| 705 |
+
if st.session_state.uploaded_files:
|
| 706 |
+
st.divider()
|
| 707 |
+
st.markdown("### 📁 Files")
|
| 708 |
+
for f in st.session_state.uploaded_files:
|
| 709 |
+
st.caption(f"📄 {f}")
|
tools/__init__.py
ADDED
|
File without changes
|
tools/browse_tool.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import trafilatura
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class BrowseTool:
|
| 6 |
+
"""Downloads and cleans web pages."""
|
| 7 |
+
|
| 8 |
+
def fetch_clean(self, url: str) -> str:
|
| 9 |
+
try:
|
| 10 |
+
resp = requests.get(url, timeout=20)
|
| 11 |
+
resp.raise_for_status()
|
| 12 |
+
html = resp.text
|
| 13 |
+
text = trafilatura.extract(
|
| 14 |
+
html, include_comments=False, include_tables=False
|
| 15 |
+
)
|
| 16 |
+
return text or ""
|
| 17 |
+
except Exception:
|
| 18 |
+
return ""
|
tools/citation_tool.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from typing import List, Dict
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class CitationTool:
|
| 6 |
+
"""Extracts [1], [2]… indices from answer and maps to sources."""
|
| 7 |
+
|
| 8 |
+
_pattern = re.compile(r"\[(\d+)\]")
|
| 9 |
+
|
| 10 |
+
def extract_indices(self, answer: str) -> List[int]:
|
| 11 |
+
return sorted({int(m.group(1)) for m in self._pattern.finditer(answer)})
|
| 12 |
+
|
| 13 |
+
def attach_sources(self, answer: str, sources: List[Dict]) -> List[Dict]:
|
| 14 |
+
used = self.extract_indices(answer)
|
| 15 |
+
mapped: List[Dict] = []
|
| 16 |
+
for idx in used:
|
| 17 |
+
if 1 <= idx <= len(sources):
|
| 18 |
+
mapped.append(sources[idx - 1])
|
| 19 |
+
return mapped
|
tools/followup_tool.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from config.config import Config
|
| 2 |
+
|
| 3 |
+
class FollowUpGenerator:
|
| 4 |
+
"""
|
| 5 |
+
Generate 3–5 follow-up suggestions like Perplexity.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.llm = Config.get_llm()
|
| 10 |
+
|
| 11 |
+
def generate(self, answer: str, question: str):
|
| 12 |
+
prompt = f"""
|
| 13 |
+
Given the user question and the assistant answer, generate 3 short follow-up questions the user might ask next.
|
| 14 |
+
|
| 15 |
+
Rules:
|
| 16 |
+
- Keep them brief (max 8–12 words)
|
| 17 |
+
- No numbered list
|
| 18 |
+
- No explanations
|
| 19 |
+
- Only return bullet points starting with "•"
|
| 20 |
+
- Must be relevant and helpful
|
| 21 |
+
|
| 22 |
+
User question: {question}
|
| 23 |
+
Assistant answer: {answer}
|
| 24 |
+
|
| 25 |
+
Generate follow-ups:
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
resp = self.llm.invoke(prompt).content
|
| 29 |
+
lines = resp.strip().split("\n")
|
| 30 |
+
|
| 31 |
+
# Only keep bullet lines
|
| 32 |
+
suggestions = [l.replace("•", "").strip() for l in lines if "•" in l]
|
| 33 |
+
return suggestions[:4]
|
tools/image_tavily.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/image_tavily.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from tavily import TavilyClient
|
| 5 |
+
from typing import List, Dict
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class TavilyImageSearch:
|
| 9 |
+
"""
|
| 10 |
+
Tavily image search API wrapper.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
def __init__(self):
|
| 14 |
+
api_key = os.getenv("TAVILY_API_KEY")
|
| 15 |
+
if not api_key:
|
| 16 |
+
raise RuntimeError("Missing TAVILY_API_KEY in environment")
|
| 17 |
+
self.client = TavilyClient(api_key=api_key)
|
| 18 |
+
|
| 19 |
+
def search(self, query: str, count: int = 6) -> List[Dict]:
|
| 20 |
+
"""
|
| 21 |
+
Fetch images for a query.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
# Try the correct Tavily API method - it's get_search_context with search_depth="advanced"
|
| 26 |
+
resp = self.client.search(
|
| 27 |
+
query=query,
|
| 28 |
+
max_results=count,
|
| 29 |
+
include_images=True,
|
| 30 |
+
include_answer=False
|
| 31 |
+
)
|
| 32 |
+
except Exception as e:
|
| 33 |
+
print("Tavily image search error:", e)
|
| 34 |
+
return []
|
| 35 |
+
|
| 36 |
+
images = []
|
| 37 |
+
raw_images = resp.get("images", [])
|
| 38 |
+
|
| 39 |
+
for item in raw_images:
|
| 40 |
+
# Handle both dict and direct response formats
|
| 41 |
+
if isinstance(item, dict):
|
| 42 |
+
images.append({
|
| 43 |
+
"title": item.get("title", item.get("description", "")),
|
| 44 |
+
"thumbnail_url": item.get("thumbnail", item.get("thumbnail_url", item.get("url", ""))),
|
| 45 |
+
"content_url": item.get("url", item.get("content_url", "")),
|
| 46 |
+
})
|
| 47 |
+
else:
|
| 48 |
+
# Fallback for string URLs
|
| 49 |
+
images.append({
|
| 50 |
+
"title": "",
|
| 51 |
+
"thumbnail_url": str(item),
|
| 52 |
+
"content_url": str(item),
|
| 53 |
+
})
|
| 54 |
+
|
| 55 |
+
return images
|
tools/knowledge_panel.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/knowledge_panel.py
|
| 2 |
+
|
| 3 |
+
import requests
|
| 4 |
+
from tavily import TavilyClient
|
| 5 |
+
from typing import Dict, List
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class KnowledgePanel:
|
| 10 |
+
"""
|
| 11 |
+
Builds an entity knowledge panel similar to Perplexity:
|
| 12 |
+
- Top image
|
| 13 |
+
- Summary
|
| 14 |
+
- Basic facts
|
| 15 |
+
- Wikipedia link
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
def __init__(self):
|
| 19 |
+
self.client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
|
| 20 |
+
|
| 21 |
+
def get_wikipedia_extract(self, query: str) -> Dict:
|
| 22 |
+
"""
|
| 23 |
+
Returns summary + infobox data from Wikipedia.
|
| 24 |
+
"""
|
| 25 |
+
try:
|
| 26 |
+
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{query.replace(' ', '_')}"
|
| 27 |
+
r = requests.get(url, timeout=10)
|
| 28 |
+
data = r.json()
|
| 29 |
+
|
| 30 |
+
return {
|
| 31 |
+
"title": data.get("title", ""),
|
| 32 |
+
"description": data.get("description", ""),
|
| 33 |
+
"summary": data.get("extract", ""),
|
| 34 |
+
"thumbnail": data.get("thumbnail", {}).get("source", ""),
|
| 35 |
+
"url": data.get("content_urls", {}).get("desktop", {}).get("page", "")
|
| 36 |
+
}
|
| 37 |
+
except:
|
| 38 |
+
return {}
|
| 39 |
+
|
| 40 |
+
def get_fast_facts(self, query: str) -> List[str]:
|
| 41 |
+
"""
|
| 42 |
+
Uses Tavily qna to extract AI-generated facts.
|
| 43 |
+
"""
|
| 44 |
+
try:
|
| 45 |
+
resp = self.client.qna(
|
| 46 |
+
query=f"List 8 short bullet facts about {query}. No explanation, only facts.",
|
| 47 |
+
n_tokens=150
|
| 48 |
+
)
|
| 49 |
+
answer = resp.get("answer", "")
|
| 50 |
+
# Parse bullet points
|
| 51 |
+
fact_lines = [line.strip("-• ").strip() for line in answer.split("\n") if line.strip()]
|
| 52 |
+
return fact_lines[:8] # Return max 8 facts
|
| 53 |
+
except:
|
| 54 |
+
return []
|
| 55 |
+
|
| 56 |
+
def build_panel(self, query: str) -> Dict:
|
| 57 |
+
"""
|
| 58 |
+
Builds the full knowledge panel.
|
| 59 |
+
"""
|
| 60 |
+
wiki = self.get_wikipedia_extract(query)
|
| 61 |
+
facts = self.get_fast_facts(query)
|
| 62 |
+
|
| 63 |
+
return {
|
| 64 |
+
"wiki": wiki,
|
| 65 |
+
"facts": facts
|
| 66 |
+
}
|
tools/memory_tool.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, List
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class MemoryTool:
|
| 5 |
+
"""Simple in-memory workspace chat history."""
|
| 6 |
+
|
| 7 |
+
def __init__(self) -> None:
|
| 8 |
+
self.store: Dict[str, List[Dict[str, str]]] = {}
|
| 9 |
+
self.profile: Dict[str, Dict[str, str]] = {} # Store user metadata like name
|
| 10 |
+
|
| 11 |
+
def add(self, workspace_id: str, role: str, content: str) -> None:
|
| 12 |
+
self.store.setdefault(workspace_id, []).append(
|
| 13 |
+
{"role": role, "content": content}
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
def get_context(self, workspace_id: str, max_messages: int = 10) -> str:
|
| 17 |
+
msgs = self.store.get(workspace_id, [])[-max_messages:]
|
| 18 |
+
return "\n".join(f"{m['role'].upper()}: {m['content']}" for m in msgs)
|
| 19 |
+
|
| 20 |
+
def get_recent_messages(self, workspace_id: str, limit: int = 6) -> List[Dict[str, str]]:
|
| 21 |
+
"""Get recent messages for LLM context (default last 6 messages)."""
|
| 22 |
+
return self.store.get(workspace_id, [])[-limit:]
|
| 23 |
+
|
| 24 |
+
def get_long_chat(self, workspace_id: str) -> List[Dict[str, str]]:
|
| 25 |
+
"""Get entire chat history for long-term memory context."""
|
| 26 |
+
return self.store.get(workspace_id, [])
|
| 27 |
+
|
| 28 |
+
def set_name(self, workspace_id: str, name: str) -> None:
|
| 29 |
+
"""Store user's name in profile."""
|
| 30 |
+
self.profile[workspace_id] = {"name": name}
|
| 31 |
+
|
| 32 |
+
def get_name(self, workspace_id: str) -> str:
|
| 33 |
+
"""Retrieve user's name from profile."""
|
| 34 |
+
return self.profile.get(workspace_id, {}).get("name")
|
tools/name_extractor.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
class NameExtractor:
|
| 4 |
+
def extract(self, text: str):
|
| 5 |
+
# Format: "i am naveen" , "my name is naveen"
|
| 6 |
+
match = re.search(r"(i am|my name is)\s+([A-Za-z]+)", text.lower())
|
| 7 |
+
if match:
|
| 8 |
+
return match.group(2).title()
|
| 9 |
+
return None
|
tools/name_tool.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class NameTool:
|
| 5 |
+
"""Extract user names from natural language messages."""
|
| 6 |
+
|
| 7 |
+
def extract_name(self, text: str):
|
| 8 |
+
"""
|
| 9 |
+
Extract name from sentences like:
|
| 10 |
+
- i am naveen
|
| 11 |
+
- I'm Naveen
|
| 12 |
+
- my name is naveen
|
| 13 |
+
"""
|
| 14 |
+
text = text.lower()
|
| 15 |
+
|
| 16 |
+
patterns = [
|
| 17 |
+
r"i am ([a-zA-Z]+)",
|
| 18 |
+
r"i'm ([a-zA-Z]+)",
|
| 19 |
+
r"my name is ([a-zA-Z]+)"
|
| 20 |
+
]
|
| 21 |
+
|
| 22 |
+
for p in patterns:
|
| 23 |
+
m = re.search(p, text)
|
| 24 |
+
if m:
|
| 25 |
+
name = m.group(1).strip().title()
|
| 26 |
+
return name
|
| 27 |
+
|
| 28 |
+
return None
|
tools/reranker_tool.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
from sentence_transformers import CrossEncoder
|
| 3 |
+
from langchain.schema import Document
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class Reranker:
|
| 7 |
+
"""Cross-encoder reranker for retrieved docs."""
|
| 8 |
+
|
| 9 |
+
def __init__(self, model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2") -> None:
|
| 10 |
+
self.model = CrossEncoder(model_name)
|
| 11 |
+
|
| 12 |
+
def rerank(self, query: str, docs: List[Document], top_k: int = 5) -> List[Document]:
|
| 13 |
+
if not docs:
|
| 14 |
+
return []
|
| 15 |
+
pairs = [[query, d.page_content] for d in docs]
|
| 16 |
+
scores = self.model.predict(pairs)
|
| 17 |
+
scored = sorted(zip(docs, scores), key=lambda x: x[1], reverse=True)
|
| 18 |
+
return [d for d, _ in scored[:top_k]]
|
tools/search_tool.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import List, Dict
|
| 3 |
+
import requests
|
| 4 |
+
from config.config import Config
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class SearchTool:
|
| 8 |
+
"""Tavily web search wrapper."""
|
| 9 |
+
|
| 10 |
+
def __init__(self) -> None:
|
| 11 |
+
self.api_key = os.getenv("TAVILY_API_KEY") or Config.TAVILY_API_KEY
|
| 12 |
+
if not self.api_key:
|
| 13 |
+
raise RuntimeError("TAVILY_API_KEY missing in .env")
|
| 14 |
+
|
| 15 |
+
def search(self, query: str, num_results: int = 5) -> List[Dict]:
|
| 16 |
+
url = "https://api.tavily.com/search"
|
| 17 |
+
payload = {"query": query, "num_results": num_results}
|
| 18 |
+
headers = {"Authorization": self.api_key}
|
| 19 |
+
resp = requests.post(url, json=payload, headers=headers, timeout=20)
|
| 20 |
+
resp.raise_for_status()
|
| 21 |
+
data = resp.json()
|
| 22 |
+
return data.get("results", [])
|
tools/summarizer_tool.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Summarization helper using the main LLM."""
|
| 2 |
+
|
| 3 |
+
from config.config import Config
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class SummarizerTool:
|
| 7 |
+
"""Summarizes long texts using the LLM."""
|
| 8 |
+
|
| 9 |
+
def __init__(self) -> None:
|
| 10 |
+
self.llm = Config.get_llm()
|
| 11 |
+
|
| 12 |
+
def summarize(self, text: str, max_words: int = 300) -> str:
|
| 13 |
+
"""
|
| 14 |
+
Summarize the provided text.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
text: Input text.
|
| 18 |
+
max_words: Target summary length.
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
Summary string.
|
| 22 |
+
"""
|
| 23 |
+
prompt = (
|
| 24 |
+
f"Summarize the following text in about {max_words} words:\n\n{text}"
|
| 25 |
+
)
|
| 26 |
+
resp = self.llm.invoke(prompt)
|
| 27 |
+
return resp.content
|
tools/wiki_tool.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Wikipedia search tool."""
|
| 2 |
+
|
| 3 |
+
from langchain_community.utilities import WikipediaAPIWrapper
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class WikiTool:
|
| 7 |
+
"""Wrapper for Wikipedia-based QA."""
|
| 8 |
+
|
| 9 |
+
def __init__(self) -> None:
|
| 10 |
+
self.api = WikipediaAPIWrapper(top_k_results=3, lang="en")
|
| 11 |
+
|
| 12 |
+
def query(self, query: str) -> str:
|
| 13 |
+
"""Search Wikipedia and return a summarized answer."""
|
| 14 |
+
return self.api.run(query)
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vectorstore/__init__.py
ADDED
|
File without changes
|
vectorstore/store.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
from langchain.schema import Document
|
| 3 |
+
from langchain_community.vectorstores import FAISS
|
| 4 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class VectorStore:
|
| 8 |
+
"""FAISS vector store wrapper."""
|
| 9 |
+
|
| 10 |
+
def __init__(self) -> None:
|
| 11 |
+
self.embedding = HuggingFaceEmbeddings(
|
| 12 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
| 13 |
+
)
|
| 14 |
+
self.store: FAISS | None = None
|
| 15 |
+
self.retriever = None
|
| 16 |
+
|
| 17 |
+
def create(self, docs: List[Document]) -> None:
|
| 18 |
+
"""Create FAISS index from documents."""
|
| 19 |
+
self.store = FAISS.from_documents(docs, self.embedding)
|
| 20 |
+
self.retriever = self.store.as_retriever()
|
| 21 |
+
|
| 22 |
+
def retrieve(self, query: str, k: int = 8) -> List[Document]:
|
| 23 |
+
if self.retriever is None:
|
| 24 |
+
raise RuntimeError("Vector store not initialized.")
|
| 25 |
+
return self.retriever.invoke(query)
|