GitHub Actions commited on
Commit ·
1dc0474
1
Parent(s): 18448bc
Sync from GitHub commit: bbc03771
Browse files- .gitattributes +0 -35
- .gitignore +4 -0
- Dockerfile +19 -0
- README.md +6 -9
- app/api/v1/api_router.py +8 -0
- app/api/v1/endpoints/chat.py +37 -0
- app/api/v1/endpoints/document.py +43 -0
- app/api/v1/endpoints/history.py +46 -0
- app/core/config.py +15 -0
- app/db/session.py +20 -0
- app/main.py +31 -0
- app/services/agent_service.py +69 -0
- app/services/rag_service.py +42 -0
- app/services/state_service.py +16 -0
- app/services/tool_service.py +54 -0
- requirements.txt +32 -0
.gitattributes
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
__pycache__
|
| 3 |
+
chat_history.db
|
| 4 |
+
temp_uploads
|
Dockerfile
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
RUN apt-get update && apt-get install -y \
|
| 6 |
+
build-essential \
|
| 7 |
+
libsqlite3-dev \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
COPY requirements.txt .
|
| 11 |
+
|
| 12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
+
|
| 14 |
+
COPY . .
|
| 15 |
+
|
| 16 |
+
ENV PORT=7860
|
| 17 |
+
EXPOSE 7860
|
| 18 |
+
|
| 19 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,11 +1,8 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
---
|
| 10 |
-
|
| 11 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: AI Knowledge Agent
|
| 3 |
+
emoji: 🧠
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
---
|
|
|
|
|
|
|
|
|
app/api/v1/api_router.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter
|
| 2 |
+
from app.api.v1.endpoints import chat, document, history # Ensure these exist
|
| 3 |
+
|
| 4 |
+
api_router = APIRouter() # <--- This name MUST match exactly
|
| 5 |
+
|
| 6 |
+
api_router.include_router(chat.router, prefix="/chat", tags=["Chat"])
|
| 7 |
+
api_router.include_router(document.router, prefix="/documents", tags=["Documents"])
|
| 8 |
+
api_router.include_router(history.router, prefix="/history", tags=["History"])
|
app/api/v1/endpoints/chat.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from fastapi import APIRouter, HTTPException
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from app.services.agent_service import AgentService
|
| 5 |
+
from app.services.state_service import brain_state #
|
| 6 |
+
|
| 7 |
+
router = APIRouter()
|
| 8 |
+
|
| 9 |
+
class ChatRequest(BaseModel):
|
| 10 |
+
message: str
|
| 11 |
+
model: str = "Google Gemini"
|
| 12 |
+
|
| 13 |
+
@router.post("/")
|
| 14 |
+
async def chat(request: ChatRequest):
|
| 15 |
+
try:
|
| 16 |
+
# 1. Initialize the agent with the persistent brain state
|
| 17 |
+
agent = AgentService.get_agent(
|
| 18 |
+
vectordb=brain_state.vectordb,
|
| 19 |
+
dataframes=brain_state.dataframes,
|
| 20 |
+
model_choice=request.model
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# 2. Run the agent logic
|
| 24 |
+
response = agent.run(input=request.message)
|
| 25 |
+
|
| 26 |
+
# 3. Check for generated visualizations (from ToolService.analyze_data)
|
| 27 |
+
image_path = None
|
| 28 |
+
if os.path.exists("visual.png"):
|
| 29 |
+
image_path = "visual.png"
|
| 30 |
+
# Note: The path is relative to the backend root
|
| 31 |
+
|
| 32 |
+
return {
|
| 33 |
+
"response": response,
|
| 34 |
+
"image_path": image_path #
|
| 35 |
+
}
|
| 36 |
+
except Exception as e:
|
| 37 |
+
raise HTTPException(status_code=500, detail=str(e))
|
app/api/v1/endpoints/document.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, UploadFile, File, HTTPException
|
| 2 |
+
from typing import List
|
| 3 |
+
import os
|
| 4 |
+
import shutil
|
| 5 |
+
from app.services.rag_service import RAGService
|
| 6 |
+
from app.services.state_service import brain_state
|
| 7 |
+
from app.core.config import settings
|
| 8 |
+
|
| 9 |
+
router = APIRouter()
|
| 10 |
+
|
| 11 |
+
@router.get("/files") # New endpoint to sync state on refresh
|
| 12 |
+
async def get_files():
|
| 13 |
+
return {"filenames": brain_state.filenames}
|
| 14 |
+
|
| 15 |
+
@router.post("/upload")
|
| 16 |
+
async def upload_documents(files: List[UploadFile] = File(...)):
|
| 17 |
+
if not os.path.exists(settings.UPLOAD_DIR):
|
| 18 |
+
os.makedirs(settings.UPLOAD_DIR)
|
| 19 |
+
|
| 20 |
+
saved_paths = []
|
| 21 |
+
try:
|
| 22 |
+
for file in files:
|
| 23 |
+
path = os.path.join(settings.UPLOAD_DIR, file.filename)
|
| 24 |
+
with open(path, "wb") as buffer:
|
| 25 |
+
shutil.copyfileobj(file.file, buffer)
|
| 26 |
+
saved_paths.append(path)
|
| 27 |
+
|
| 28 |
+
docs, dfs = RAGService.load_files(saved_paths)
|
| 29 |
+
|
| 30 |
+
# Update global state
|
| 31 |
+
brain_state.vectordb = RAGService.create_vector_store(docs)
|
| 32 |
+
brain_state.dataframes.extend(dfs)
|
| 33 |
+
|
| 34 |
+
# Prevent duplicate names in the sidebar
|
| 35 |
+
new_names = [f.filename for f in files]
|
| 36 |
+
brain_state.filenames = list(set(brain_state.filenames + new_names))
|
| 37 |
+
|
| 38 |
+
return {
|
| 39 |
+
"message": "Files processed",
|
| 40 |
+
"filenames": brain_state.filenames
|
| 41 |
+
}
|
| 42 |
+
except Exception as e:
|
| 43 |
+
raise HTTPException(status_code=500, detail=str(e))
|
app/api/v1/endpoints/history.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from app.db.session import get_db_connection
|
| 3 |
+
import os
|
| 4 |
+
import shutil
|
| 5 |
+
import glob
|
| 6 |
+
from fastapi import APIRouter
|
| 7 |
+
from app.services.state_service import brain_state
|
| 8 |
+
from app.core.config import settings
|
| 9 |
+
|
| 10 |
+
router = APIRouter()
|
| 11 |
+
|
| 12 |
+
@router.get("/")
|
| 13 |
+
async def get_chat_history():
|
| 14 |
+
try:
|
| 15 |
+
conn = get_db_connection()
|
| 16 |
+
cursor = conn.cursor()
|
| 17 |
+
cursor.execute('SELECT role, content, timestamp FROM messages ORDER BY id ASC')
|
| 18 |
+
rows = cursor.fetchall()
|
| 19 |
+
conn.close()
|
| 20 |
+
|
| 21 |
+
return [{"role": row["role"], "content": row["content"], "timestamp": row["timestamp"]} for row in rows]
|
| 22 |
+
except Exception as e:
|
| 23 |
+
raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
|
| 24 |
+
|
| 25 |
+
@router.delete("/clear")
|
| 26 |
+
async def clear_history():
|
| 27 |
+
# 1. Reset the AI's internal state
|
| 28 |
+
brain_state.reset()
|
| 29 |
+
|
| 30 |
+
# 2. Delete all uploaded files
|
| 31 |
+
if os.path.exists(settings.UPLOAD_DIR):
|
| 32 |
+
shutil.rmtree(settings.UPLOAD_DIR)
|
| 33 |
+
os.makedirs(settings.UPLOAD_DIR) # Recreate empty folder
|
| 34 |
+
|
| 35 |
+
# 3. Delete generated visualizations (*.png)
|
| 36 |
+
for img in glob.glob("*.png"):
|
| 37 |
+
try:
|
| 38 |
+
os.remove(img)
|
| 39 |
+
except Exception:
|
| 40 |
+
pass
|
| 41 |
+
|
| 42 |
+
# 4. Optional: Clear Chroma DB persistent storage
|
| 43 |
+
if os.path.exists(settings.CHROMA_PERSIST_DIR):
|
| 44 |
+
shutil.rmtree(settings.CHROMA_PERSIST_DIR)
|
| 45 |
+
|
| 46 |
+
return {"message": "Memory, files, and plots have been wiped clean."}
|
app/core/config.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pydantic_settings import BaseSettings
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
class Settings(BaseSettings):
|
| 8 |
+
PROJECT_NAME: str = "AI Brain API"
|
| 9 |
+
GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
|
| 10 |
+
OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
|
| 11 |
+
DATABASE_URL: str = "chat_history.db"
|
| 12 |
+
CHROMA_PERSIST_DIR: str = "./chroma_db"
|
| 13 |
+
UPLOAD_DIR: str = "./temp_uploads"
|
| 14 |
+
|
| 15 |
+
settings = Settings()
|
app/db/session.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
from app.core.config import settings
|
| 3 |
+
|
| 4 |
+
def get_db_connection():
|
| 5 |
+
conn = sqlite3.connect(settings.DATABASE_URL, check_same_thread=False)
|
| 6 |
+
conn.row_factory = sqlite3.Row
|
| 7 |
+
return conn
|
| 8 |
+
|
| 9 |
+
def init_db():
|
| 10 |
+
conn = get_db_connection()
|
| 11 |
+
conn.execute('''
|
| 12 |
+
CREATE TABLE IF NOT EXISTS messages (
|
| 13 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 14 |
+
role TEXT NOT NULL,
|
| 15 |
+
content TEXT NOT NULL,
|
| 16 |
+
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
| 17 |
+
)
|
| 18 |
+
''')
|
| 19 |
+
conn.commit()
|
| 20 |
+
conn.close()
|
app/main.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__import__('pysqlite3')
|
| 2 |
+
import sys
|
| 3 |
+
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
|
| 4 |
+
|
| 5 |
+
from fastapi import FastAPI
|
| 6 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 7 |
+
from app.api.v1.api_router import api_router
|
| 8 |
+
from app.db.session import init_db
|
| 9 |
+
from fastapi.staticfiles import StaticFiles
|
| 10 |
+
|
| 11 |
+
app = FastAPI(title="AI Brain Backend")
|
| 12 |
+
|
| 13 |
+
app.add_middleware(
|
| 14 |
+
CORSMiddleware,
|
| 15 |
+
allow_origins=["*"],
|
| 16 |
+
allow_methods=["*"],
|
| 17 |
+
allow_headers=["*"],
|
| 18 |
+
)
|
| 19 |
+
app.mount("/outputs", StaticFiles(directory="."), name="outputs")
|
| 20 |
+
|
| 21 |
+
@app.on_event("startup")
|
| 22 |
+
def on_startup():
|
| 23 |
+
init_db()
|
| 24 |
+
|
| 25 |
+
app.include_router(api_router, prefix="/api/v1")
|
| 26 |
+
|
| 27 |
+
if __name__ == "__main__":
|
| 28 |
+
import uvicorn
|
| 29 |
+
import os
|
| 30 |
+
port = int(os.environ.get("PORT", 8000))
|
| 31 |
+
uvicorn.run(app, host="0.0.0.0", port=port)
|
app/services/agent_service.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 3 |
+
from langchain_openai import ChatOpenAI
|
| 4 |
+
from langchain_classic.agents import initialize_agent, AgentType
|
| 5 |
+
from langchain_classic.chains import retrieval_qa
|
| 6 |
+
from langchain_classic.memory import ConversationBufferMemory
|
| 7 |
+
from langchain_core.tools import Tool
|
| 8 |
+
|
| 9 |
+
from app.core.config import settings
|
| 10 |
+
from app.services.tool_service import ToolService
|
| 11 |
+
|
| 12 |
+
class AgentService:
|
| 13 |
+
@classmethod
|
| 14 |
+
def get_agent(cls, vectordb=None, dataframes=None, model_choice="Google Gemini"):
|
| 15 |
+
# 1. Initialize LLM
|
| 16 |
+
if model_choice == "Google Gemini":
|
| 17 |
+
llm = ChatGoogleGenerativeAI(
|
| 18 |
+
model="gemini-1.5-flash",
|
| 19 |
+
google_api_key=settings.GOOGLE_API_KEY,
|
| 20 |
+
temperature=0,
|
| 21 |
+
convert_system_message_to_human=True
|
| 22 |
+
)
|
| 23 |
+
else:
|
| 24 |
+
llm = ChatOpenAI(
|
| 25 |
+
model_name="gpt-4o",
|
| 26 |
+
openai_api_key=settings.OPENAI_API_KEY,
|
| 27 |
+
temperature=0
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
# 2. Base Tools
|
| 31 |
+
tools = [
|
| 32 |
+
ToolService.get_web_search_tool(),
|
| 33 |
+
Tool(
|
| 34 |
+
name="YouTube Analyzer",
|
| 35 |
+
func=ToolService.get_youtube_transcript,
|
| 36 |
+
description="Useful for summarizing YouTube videos. Input: full URL."
|
| 37 |
+
)
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
# 3. Add Dynamic Tools (RAG & CSV)
|
| 41 |
+
if vectordb:
|
| 42 |
+
retriever = vectordb.as_retriever(search_kwargs={"k": 3})
|
| 43 |
+
qa_chain = retrieval_qa.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
|
| 44 |
+
tools.append(Tool(
|
| 45 |
+
name="Personal Knowledge Base",
|
| 46 |
+
func=qa_chain.run,
|
| 47 |
+
description="Useful for answering questions based on uploaded documents."
|
| 48 |
+
))
|
| 49 |
+
|
| 50 |
+
if dataframes and len(dataframes) > 0:
|
| 51 |
+
csv_tool = ToolService.get_csv_tool(dataframes[0], llm)
|
| 52 |
+
if csv_tool:
|
| 53 |
+
tools.append(csv_tool)
|
| 54 |
+
|
| 55 |
+
# 4. Memory & Context
|
| 56 |
+
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
| 57 |
+
today = datetime.now().strftime("%A, %B %d, %Y")
|
| 58 |
+
|
| 59 |
+
agent_kwargs = {
|
| 60 |
+
"prefix": f"You are a helpful AI assistant. Today is {today}.\nReturn valid JSON blobs. Escape quotes."
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
# 5. Initialize Agent
|
| 64 |
+
return initialize_agent(
|
| 65 |
+
tools, llm,
|
| 66 |
+
agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
|
| 67 |
+
verbose=True, memory=memory, agent_kwargs=agent_kwargs,
|
| 68 |
+
handle_parsing_errors=True, max_iterations=3
|
| 69 |
+
)
|
app/services/rag_service.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from langchain_community.document_loaders import PyPDFLoader, TextLoader
|
| 3 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 4 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 5 |
+
from langchain_community.vectorstores import Chroma
|
| 6 |
+
from app.core.config import settings
|
| 7 |
+
|
| 8 |
+
class RAGService:
|
| 9 |
+
@staticmethod
|
| 10 |
+
def load_files(file_paths: list):
|
| 11 |
+
docs = []
|
| 12 |
+
dataframes = []
|
| 13 |
+
|
| 14 |
+
for path in file_paths:
|
| 15 |
+
if path.endswith(".pdf"):
|
| 16 |
+
docs.extend(PyPDFLoader(path).load())
|
| 17 |
+
elif path.endswith(".txt"):
|
| 18 |
+
try:
|
| 19 |
+
docs.extend(TextLoader(path, encoding='utf-8').load())
|
| 20 |
+
except UnicodeDecodeError:
|
| 21 |
+
docs.extend(TextLoader(path, encoding='latin-1').load())
|
| 22 |
+
elif path.endswith(".csv"):
|
| 23 |
+
try:
|
| 24 |
+
df = pd.read_csv(path, encoding='utf-8')
|
| 25 |
+
except UnicodeDecodeError:
|
| 26 |
+
df = pd.read_csv(path, encoding='latin-1')
|
| 27 |
+
dataframes.append(df)
|
| 28 |
+
elif path.endswith(".xlsx"):
|
| 29 |
+
dataframes.append(pd.read_excel(path))
|
| 30 |
+
|
| 31 |
+
return docs, dataframes
|
| 32 |
+
|
| 33 |
+
@staticmethod
|
| 34 |
+
def create_vector_store(docs):
|
| 35 |
+
if not docs: return None
|
| 36 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 37 |
+
splits = splitter.split_documents(docs)
|
| 38 |
+
return Chroma.from_documents(
|
| 39 |
+
documents=splits,
|
| 40 |
+
embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"),
|
| 41 |
+
persist_directory=settings.CHROMA_PERSIST_DIR
|
| 42 |
+
)
|
app/services/state_service.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class StateService:
|
| 2 |
+
_instance = None
|
| 3 |
+
|
| 4 |
+
def __new__(cls):
|
| 5 |
+
if cls._instance is None:
|
| 6 |
+
cls._instance = super(StateService, cls).__new__(cls)
|
| 7 |
+
cls.reset(cls._instance)
|
| 8 |
+
return cls._instance
|
| 9 |
+
|
| 10 |
+
def reset(self):
|
| 11 |
+
"""Clears all in-memory references to data."""
|
| 12 |
+
self.vectordb = None
|
| 13 |
+
self.dataframes = []
|
| 14 |
+
self.filenames = [] # Added to track names for the UI
|
| 15 |
+
|
| 16 |
+
brain_state = StateService()
|
app/services/tool_service.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from langchain_core.tools import Tool
|
| 3 |
+
from langchain_community.tools import DuckDuckGoSearchRun
|
| 4 |
+
from langchain_community.document_loaders import YoutubeLoader
|
| 5 |
+
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
|
| 6 |
+
|
| 7 |
+
class ToolService:
|
| 8 |
+
@staticmethod
|
| 9 |
+
def get_web_search_tool():
|
| 10 |
+
search = DuckDuckGoSearchRun()
|
| 11 |
+
return Tool(
|
| 12 |
+
name="Web Search",
|
| 13 |
+
func=search.run,
|
| 14 |
+
description="Useful for finding current information, news, or general knowledge."
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
@staticmethod
|
| 18 |
+
def get_youtube_transcript(video_url: str):
|
| 19 |
+
try:
|
| 20 |
+
loader = YoutubeLoader.from_youtube_url(video_url, add_video_info=False, language=["en", "hi"])
|
| 21 |
+
docs = loader.load()
|
| 22 |
+
return docs[0].page_content[:4000] if docs else "No transcript found."
|
| 23 |
+
except Exception as e:
|
| 24 |
+
return f"Error fetching YouTube transcript: {str(e)}"
|
| 25 |
+
|
| 26 |
+
@staticmethod
|
| 27 |
+
def get_csv_tool(df, llm):
|
| 28 |
+
if df is None:
|
| 29 |
+
return None
|
| 30 |
+
|
| 31 |
+
prefix = """
|
| 32 |
+
You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
|
| 33 |
+
IMPORTANT RULES FOR PLOTTING:
|
| 34 |
+
1. If asked to visualize, use 'matplotlib.pyplot'.
|
| 35 |
+
2. ALWAYS save the plot to a file named 'visual.png'.
|
| 36 |
+
3. DO NOT use plt.show().
|
| 37 |
+
4. WHEN FINISHED, YOU MUST RESPOND WITH: "Final Answer: I have saved the plot to visual.png"
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
pandas_agent = create_pandas_dataframe_agent(
|
| 41 |
+
llm, df, verbose=True, allow_dangerous_code=True,
|
| 42 |
+
prefix=prefix, handle_parsing_errors=True
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
def analyze_data(query):
|
| 46 |
+
if os.path.exists("visual.png"):
|
| 47 |
+
os.remove("visual.png")
|
| 48 |
+
return pandas_agent.run(query)
|
| 49 |
+
|
| 50 |
+
return Tool(
|
| 51 |
+
name="Data Analyst",
|
| 52 |
+
func=analyze_data,
|
| 53 |
+
description="Useful for analyzing structured data (CSV/Excel). Input the math or plotting question directly."
|
| 54 |
+
)
|
requirements.txt
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
python-multipart
|
| 4 |
+
|
| 5 |
+
pydantic
|
| 6 |
+
pydantic-settings
|
| 7 |
+
python-dotenv
|
| 8 |
+
|
| 9 |
+
langchain
|
| 10 |
+
langchain-community
|
| 11 |
+
langchain-core
|
| 12 |
+
langchain-google-genai
|
| 13 |
+
langchain-openai
|
| 14 |
+
langchain-experimental
|
| 15 |
+
|
| 16 |
+
chromadb
|
| 17 |
+
sentence-transformers
|
| 18 |
+
pysqlite3-binary
|
| 19 |
+
|
| 20 |
+
pypdf
|
| 21 |
+
pandas
|
| 22 |
+
openpyxl
|
| 23 |
+
tabulate
|
| 24 |
+
|
| 25 |
+
google-generativeai
|
| 26 |
+
duckduckgo-search
|
| 27 |
+
ddgs
|
| 28 |
+
youtube-transcript-api
|
| 29 |
+
pytube
|
| 30 |
+
|
| 31 |
+
matplotlib
|
| 32 |
+
seaborn
|