GitHub Actions commited on
Commit
1dc0474
·
1 Parent(s): 18448bc

Sync from GitHub commit: bbc03771

Browse files
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .env
2
+ __pycache__
3
+ chat_history.db
4
+ temp_uploads
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ libsqlite3-dev \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ COPY requirements.txt .
11
+
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ COPY . .
15
+
16
+ ENV PORT=7860
17
+ EXPOSE 7860
18
+
19
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,8 @@
1
  ---
2
- title: RAG APP
3
- emoji: 🚀
4
- colorFrom: gray
5
- colorTo: green
6
  sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: AI Knowledge Agent
3
+ emoji: 🧠
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: docker
7
+ app_port: 7860
8
+ ---
 
 
 
app/api/v1/api_router.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.api.v1.endpoints import chat, document, history # Ensure these exist
3
+
4
+ api_router = APIRouter() # <--- This name MUST match exactly
5
+
6
+ api_router.include_router(chat.router, prefix="/chat", tags=["Chat"])
7
+ api_router.include_router(document.router, prefix="/documents", tags=["Documents"])
8
+ api_router.include_router(history.router, prefix="/history", tags=["History"])
app/api/v1/endpoints/chat.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import APIRouter, HTTPException
3
+ from pydantic import BaseModel
4
+ from app.services.agent_service import AgentService
5
+ from app.services.state_service import brain_state #
6
+
7
+ router = APIRouter()
8
+
9
+ class ChatRequest(BaseModel):
10
+ message: str
11
+ model: str = "Google Gemini"
12
+
13
+ @router.post("/")
14
+ async def chat(request: ChatRequest):
15
+ try:
16
+ # 1. Initialize the agent with the persistent brain state
17
+ agent = AgentService.get_agent(
18
+ vectordb=brain_state.vectordb,
19
+ dataframes=brain_state.dataframes,
20
+ model_choice=request.model
21
+ )
22
+
23
+ # 2. Run the agent logic
24
+ response = agent.run(input=request.message)
25
+
26
+ # 3. Check for generated visualizations (from ToolService.analyze_data)
27
+ image_path = None
28
+ if os.path.exists("visual.png"):
29
+ image_path = "visual.png"
30
+ # Note: The path is relative to the backend root
31
+
32
+ return {
33
+ "response": response,
34
+ "image_path": image_path #
35
+ }
36
+ except Exception as e:
37
+ raise HTTPException(status_code=500, detail=str(e))
app/api/v1/endpoints/document.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, HTTPException
2
+ from typing import List
3
+ import os
4
+ import shutil
5
+ from app.services.rag_service import RAGService
6
+ from app.services.state_service import brain_state
7
+ from app.core.config import settings
8
+
9
+ router = APIRouter()
10
+
11
+ @router.get("/files") # New endpoint to sync state on refresh
12
+ async def get_files():
13
+ return {"filenames": brain_state.filenames}
14
+
15
+ @router.post("/upload")
16
+ async def upload_documents(files: List[UploadFile] = File(...)):
17
+ if not os.path.exists(settings.UPLOAD_DIR):
18
+ os.makedirs(settings.UPLOAD_DIR)
19
+
20
+ saved_paths = []
21
+ try:
22
+ for file in files:
23
+ path = os.path.join(settings.UPLOAD_DIR, file.filename)
24
+ with open(path, "wb") as buffer:
25
+ shutil.copyfileobj(file.file, buffer)
26
+ saved_paths.append(path)
27
+
28
+ docs, dfs = RAGService.load_files(saved_paths)
29
+
30
+ # Update global state
31
+ brain_state.vectordb = RAGService.create_vector_store(docs)
32
+ brain_state.dataframes.extend(dfs)
33
+
34
+ # Prevent duplicate names in the sidebar
35
+ new_names = [f.filename for f in files]
36
+ brain_state.filenames = list(set(brain_state.filenames + new_names))
37
+
38
+ return {
39
+ "message": "Files processed",
40
+ "filenames": brain_state.filenames
41
+ }
42
+ except Exception as e:
43
+ raise HTTPException(status_code=500, detail=str(e))
app/api/v1/endpoints/history.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from app.db.session import get_db_connection
3
+ import os
4
+ import shutil
5
+ import glob
6
+ from fastapi import APIRouter
7
+ from app.services.state_service import brain_state
8
+ from app.core.config import settings
9
+
10
+ router = APIRouter()
11
+
12
+ @router.get("/")
13
+ async def get_chat_history():
14
+ try:
15
+ conn = get_db_connection()
16
+ cursor = conn.cursor()
17
+ cursor.execute('SELECT role, content, timestamp FROM messages ORDER BY id ASC')
18
+ rows = cursor.fetchall()
19
+ conn.close()
20
+
21
+ return [{"role": row["role"], "content": row["content"], "timestamp": row["timestamp"]} for row in rows]
22
+ except Exception as e:
23
+ raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
24
+
25
+ @router.delete("/clear")
26
+ async def clear_history():
27
+ # 1. Reset the AI's internal state
28
+ brain_state.reset()
29
+
30
+ # 2. Delete all uploaded files
31
+ if os.path.exists(settings.UPLOAD_DIR):
32
+ shutil.rmtree(settings.UPLOAD_DIR)
33
+ os.makedirs(settings.UPLOAD_DIR) # Recreate empty folder
34
+
35
+ # 3. Delete generated visualizations (*.png)
36
+ for img in glob.glob("*.png"):
37
+ try:
38
+ os.remove(img)
39
+ except Exception:
40
+ pass
41
+
42
+ # 4. Optional: Clear Chroma DB persistent storage
43
+ if os.path.exists(settings.CHROMA_PERSIST_DIR):
44
+ shutil.rmtree(settings.CHROMA_PERSIST_DIR)
45
+
46
+ return {"message": "Memory, files, and plots have been wiped clean."}
app/core/config.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydantic_settings import BaseSettings
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ class Settings(BaseSettings):
8
+ PROJECT_NAME: str = "AI Brain API"
9
+ GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
10
+ OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
11
+ DATABASE_URL: str = "chat_history.db"
12
+ CHROMA_PERSIST_DIR: str = "./chroma_db"
13
+ UPLOAD_DIR: str = "./temp_uploads"
14
+
15
+ settings = Settings()
app/db/session.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ from app.core.config import settings
3
+
4
+ def get_db_connection():
5
+ conn = sqlite3.connect(settings.DATABASE_URL, check_same_thread=False)
6
+ conn.row_factory = sqlite3.Row
7
+ return conn
8
+
9
+ def init_db():
10
+ conn = get_db_connection()
11
+ conn.execute('''
12
+ CREATE TABLE IF NOT EXISTS messages (
13
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
14
+ role TEXT NOT NULL,
15
+ content TEXT NOT NULL,
16
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
17
+ )
18
+ ''')
19
+ conn.commit()
20
+ conn.close()
app/main.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __import__('pysqlite3')
2
+ import sys
3
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
4
+
5
+ from fastapi import FastAPI
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from app.api.v1.api_router import api_router
8
+ from app.db.session import init_db
9
+ from fastapi.staticfiles import StaticFiles
10
+
11
+ app = FastAPI(title="AI Brain Backend")
12
+
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"],
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
18
+ )
19
+ app.mount("/outputs", StaticFiles(directory="."), name="outputs")
20
+
21
+ @app.on_event("startup")
22
+ def on_startup():
23
+ init_db()
24
+
25
+ app.include_router(api_router, prefix="/api/v1")
26
+
27
+ if __name__ == "__main__":
28
+ import uvicorn
29
+ import os
30
+ port = int(os.environ.get("PORT", 8000))
31
+ uvicorn.run(app, host="0.0.0.0", port=port)
app/services/agent_service.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from langchain_openai import ChatOpenAI
4
+ from langchain_classic.agents import initialize_agent, AgentType
5
+ from langchain_classic.chains import retrieval_qa
6
+ from langchain_classic.memory import ConversationBufferMemory
7
+ from langchain_core.tools import Tool
8
+
9
+ from app.core.config import settings
10
+ from app.services.tool_service import ToolService
11
+
12
+ class AgentService:
13
+ @classmethod
14
+ def get_agent(cls, vectordb=None, dataframes=None, model_choice="Google Gemini"):
15
+ # 1. Initialize LLM
16
+ if model_choice == "Google Gemini":
17
+ llm = ChatGoogleGenerativeAI(
18
+ model="gemini-1.5-flash",
19
+ google_api_key=settings.GOOGLE_API_KEY,
20
+ temperature=0,
21
+ convert_system_message_to_human=True
22
+ )
23
+ else:
24
+ llm = ChatOpenAI(
25
+ model_name="gpt-4o",
26
+ openai_api_key=settings.OPENAI_API_KEY,
27
+ temperature=0
28
+ )
29
+
30
+ # 2. Base Tools
31
+ tools = [
32
+ ToolService.get_web_search_tool(),
33
+ Tool(
34
+ name="YouTube Analyzer",
35
+ func=ToolService.get_youtube_transcript,
36
+ description="Useful for summarizing YouTube videos. Input: full URL."
37
+ )
38
+ ]
39
+
40
+ # 3. Add Dynamic Tools (RAG & CSV)
41
+ if vectordb:
42
+ retriever = vectordb.as_retriever(search_kwargs={"k": 3})
43
+ qa_chain = retrieval_qa.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
44
+ tools.append(Tool(
45
+ name="Personal Knowledge Base",
46
+ func=qa_chain.run,
47
+ description="Useful for answering questions based on uploaded documents."
48
+ ))
49
+
50
+ if dataframes and len(dataframes) > 0:
51
+ csv_tool = ToolService.get_csv_tool(dataframes[0], llm)
52
+ if csv_tool:
53
+ tools.append(csv_tool)
54
+
55
+ # 4. Memory & Context
56
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
57
+ today = datetime.now().strftime("%A, %B %d, %Y")
58
+
59
+ agent_kwargs = {
60
+ "prefix": f"You are a helpful AI assistant. Today is {today}.\nReturn valid JSON blobs. Escape quotes."
61
+ }
62
+
63
+ # 5. Initialize Agent
64
+ return initialize_agent(
65
+ tools, llm,
66
+ agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
67
+ verbose=True, memory=memory, agent_kwargs=agent_kwargs,
68
+ handle_parsing_errors=True, max_iterations=3
69
+ )
app/services/rag_service.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from langchain_community.vectorstores import Chroma
6
+ from app.core.config import settings
7
+
8
+ class RAGService:
9
+ @staticmethod
10
+ def load_files(file_paths: list):
11
+ docs = []
12
+ dataframes = []
13
+
14
+ for path in file_paths:
15
+ if path.endswith(".pdf"):
16
+ docs.extend(PyPDFLoader(path).load())
17
+ elif path.endswith(".txt"):
18
+ try:
19
+ docs.extend(TextLoader(path, encoding='utf-8').load())
20
+ except UnicodeDecodeError:
21
+ docs.extend(TextLoader(path, encoding='latin-1').load())
22
+ elif path.endswith(".csv"):
23
+ try:
24
+ df = pd.read_csv(path, encoding='utf-8')
25
+ except UnicodeDecodeError:
26
+ df = pd.read_csv(path, encoding='latin-1')
27
+ dataframes.append(df)
28
+ elif path.endswith(".xlsx"):
29
+ dataframes.append(pd.read_excel(path))
30
+
31
+ return docs, dataframes
32
+
33
+ @staticmethod
34
+ def create_vector_store(docs):
35
+ if not docs: return None
36
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
37
+ splits = splitter.split_documents(docs)
38
+ return Chroma.from_documents(
39
+ documents=splits,
40
+ embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"),
41
+ persist_directory=settings.CHROMA_PERSIST_DIR
42
+ )
app/services/state_service.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class StateService:
2
+ _instance = None
3
+
4
+ def __new__(cls):
5
+ if cls._instance is None:
6
+ cls._instance = super(StateService, cls).__new__(cls)
7
+ cls.reset(cls._instance)
8
+ return cls._instance
9
+
10
+ def reset(self):
11
+ """Clears all in-memory references to data."""
12
+ self.vectordb = None
13
+ self.dataframes = []
14
+ self.filenames = [] # Added to track names for the UI
15
+
16
+ brain_state = StateService()
app/services/tool_service.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_core.tools import Tool
3
+ from langchain_community.tools import DuckDuckGoSearchRun
4
+ from langchain_community.document_loaders import YoutubeLoader
5
+ from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
6
+
7
+ class ToolService:
8
+ @staticmethod
9
+ def get_web_search_tool():
10
+ search = DuckDuckGoSearchRun()
11
+ return Tool(
12
+ name="Web Search",
13
+ func=search.run,
14
+ description="Useful for finding current information, news, or general knowledge."
15
+ )
16
+
17
+ @staticmethod
18
+ def get_youtube_transcript(video_url: str):
19
+ try:
20
+ loader = YoutubeLoader.from_youtube_url(video_url, add_video_info=False, language=["en", "hi"])
21
+ docs = loader.load()
22
+ return docs[0].page_content[:4000] if docs else "No transcript found."
23
+ except Exception as e:
24
+ return f"Error fetching YouTube transcript: {str(e)}"
25
+
26
+ @staticmethod
27
+ def get_csv_tool(df, llm):
28
+ if df is None:
29
+ return None
30
+
31
+ prefix = """
32
+ You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
33
+ IMPORTANT RULES FOR PLOTTING:
34
+ 1. If asked to visualize, use 'matplotlib.pyplot'.
35
+ 2. ALWAYS save the plot to a file named 'visual.png'.
36
+ 3. DO NOT use plt.show().
37
+ 4. WHEN FINISHED, YOU MUST RESPOND WITH: "Final Answer: I have saved the plot to visual.png"
38
+ """
39
+
40
+ pandas_agent = create_pandas_dataframe_agent(
41
+ llm, df, verbose=True, allow_dangerous_code=True,
42
+ prefix=prefix, handle_parsing_errors=True
43
+ )
44
+
45
+ def analyze_data(query):
46
+ if os.path.exists("visual.png"):
47
+ os.remove("visual.png")
48
+ return pandas_agent.run(query)
49
+
50
+ return Tool(
51
+ name="Data Analyst",
52
+ func=analyze_data,
53
+ description="Useful for analyzing structured data (CSV/Excel). Input the math or plotting question directly."
54
+ )
requirements.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+
5
+ pydantic
6
+ pydantic-settings
7
+ python-dotenv
8
+
9
+ langchain
10
+ langchain-community
11
+ langchain-core
12
+ langchain-google-genai
13
+ langchain-openai
14
+ langchain-experimental
15
+
16
+ chromadb
17
+ sentence-transformers
18
+ pysqlite3-binary
19
+
20
+ pypdf
21
+ pandas
22
+ openpyxl
23
+ tabulate
24
+
25
+ google-generativeai
26
+ duckduckgo-search
27
+ ddgs
28
+ youtube-transcript-api
29
+ pytube
30
+
31
+ matplotlib
32
+ seaborn