Spaces:

fazil1234
/

RAG_APP

Sleeping

App Files Files Community

GitHub Actions commited on Jan 31

Commit

1dc0474

1 Parent(s): 18448bc

Sync from GitHub commit: bbc03771

Browse files

Files changed (16) hide show

.gitattributes +0 -35
.gitignore +4 -0
Dockerfile +19 -0
README.md +6 -9
app/api/v1/api_router.py +8 -0
app/api/v1/endpoints/chat.py +37 -0
app/api/v1/endpoints/document.py +43 -0
app/api/v1/endpoints/history.py +46 -0
app/core/config.py +15 -0
app/db/session.py +20 -0
app/main.py +31 -0
app/services/agent_service.py +69 -0
app/services/rag_service.py +42 -0
app/services/state_service.py +16 -0
app/services/tool_service.py +54 -0
requirements.txt +32 -0

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.env
+__pycache__
+chat_history.db
+temp_uploads

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+FROM python:3.10-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    libsqlite3-dev \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+ENV PORT=7860
+EXPOSE 7860
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,11 +1,8 @@
 ---
-title: RAG APP
-emoji: 🚀
-colorFrom: gray
-colorTo: green
 sdk: docker
-pinned: false
-license: apache-2.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: AI Knowledge Agent
+emoji: 🧠
+colorFrom: blue
+colorTo: indigo
 sdk: docker
+app_port: 7860
+---

app/api/v1/api_router.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from fastapi import APIRouter
+from app.api.v1.endpoints import chat, document, history # Ensure these exist
+api_router = APIRouter() # <--- This name MUST match exactly
+api_router.include_router(chat.router, prefix="/chat", tags=["Chat"])
+api_router.include_router(document.router, prefix="/documents", tags=["Documents"])
+api_router.include_router(history.router, prefix="/history", tags=["History"])

app/api/v1/endpoints/chat.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from app.services.agent_service import AgentService
+from app.services.state_service import brain_state #
+router = APIRouter()
+class ChatRequest(BaseModel):
+    message: str
+    model: str = "Google Gemini"
+@router.post("/")
+async def chat(request: ChatRequest):
+    try:
+        # 1. Initialize the agent with the persistent brain state
+        agent = AgentService.get_agent(
+            vectordb=brain_state.vectordb,
+            dataframes=brain_state.dataframes,
+            model_choice=request.model
+        )
+        # 2. Run the agent logic
+        response = agent.run(input=request.message)
+        # 3. Check for generated visualizations (from ToolService.analyze_data)
+        image_path = None
+        if os.path.exists("visual.png"):
+            image_path = "visual.png"
+            # Note: The path is relative to the backend root
+        return {
+            "response": response,
+            "image_path": image_path #
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

app/api/v1/endpoints/document.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from fastapi import APIRouter, UploadFile, File, HTTPException
+from typing import List
+import os
+import shutil
+from app.services.rag_service import RAGService
+from app.services.state_service import brain_state
+from app.core.config import settings
+router = APIRouter()
+@router.get("/files") # New endpoint to sync state on refresh
+async def get_files():
+    return {"filenames": brain_state.filenames}
+@router.post("/upload")
+async def upload_documents(files: List[UploadFile] = File(...)):
+    if not os.path.exists(settings.UPLOAD_DIR):
+        os.makedirs(settings.UPLOAD_DIR)
+    saved_paths = []
+    try:
+        for file in files:
+            path = os.path.join(settings.UPLOAD_DIR, file.filename)
+            with open(path, "wb") as buffer:
+                shutil.copyfileobj(file.file, buffer)
+            saved_paths.append(path)
+        docs, dfs = RAGService.load_files(saved_paths)
+        # Update global state
+        brain_state.vectordb = RAGService.create_vector_store(docs)
+        brain_state.dataframes.extend(dfs)
+        # Prevent duplicate names in the sidebar
+        new_names = [f.filename for f in files]
+        brain_state.filenames = list(set(brain_state.filenames + new_names))
+        return {
+            "message": "Files processed",
+            "filenames": brain_state.filenames
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

app/api/v1/endpoints/history.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from fastapi import APIRouter, HTTPException
+from app.db.session import get_db_connection
+import os
+import shutil
+import glob
+from fastapi import APIRouter
+from app.services.state_service import brain_state
+from app.core.config import settings
+router = APIRouter()
+@router.get("/")
+async def get_chat_history():
+    try:
+        conn = get_db_connection()
+        cursor = conn.cursor()
+        cursor.execute('SELECT role, content, timestamp FROM messages ORDER BY id ASC')
+        rows = cursor.fetchall()
+        conn.close()
+        return [{"role": row["role"], "content": row["content"], "timestamp": row["timestamp"]} for row in rows]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
+@router.delete("/clear")
+async def clear_history():
+    # 1. Reset the AI's internal state
+    brain_state.reset()
+    # 2. Delete all uploaded files
+    if os.path.exists(settings.UPLOAD_DIR):
+        shutil.rmtree(settings.UPLOAD_DIR)
+        os.makedirs(settings.UPLOAD_DIR) # Recreate empty folder
+    # 3. Delete generated visualizations (*.png)
+    for img in glob.glob("*.png"):
+        try:
+            os.remove(img)
+        except Exception:
+            pass
+    # 4. Optional: Clear Chroma DB persistent storage
+    if os.path.exists(settings.CHROMA_PERSIST_DIR):
+        shutil.rmtree(settings.CHROMA_PERSIST_DIR)
+    return {"message": "Memory, files, and plots have been wiped clean."}

app/core/config.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import os
+from pydantic_settings import BaseSettings
+from dotenv import load_dotenv
+load_dotenv()
+class Settings(BaseSettings):
+    PROJECT_NAME: str = "AI Brain API"
+    GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
+    OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
+    DATABASE_URL: str = "chat_history.db"
+    CHROMA_PERSIST_DIR: str = "./chroma_db"
+    UPLOAD_DIR: str = "./temp_uploads"
+settings = Settings()

app/db/session.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import sqlite3
+from app.core.config import settings
+def get_db_connection():
+    conn = sqlite3.connect(settings.DATABASE_URL, check_same_thread=False)
+    conn.row_factory = sqlite3.Row
+    return conn
+def init_db():
+    conn = get_db_connection()
+    conn.execute('''
+        CREATE TABLE IF NOT EXISTS messages (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            role TEXT NOT NULL,
+            content TEXT NOT NULL,
+            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
+        )
+    ''')
+    conn.commit()
+    conn.close()

app/main.py ADDED Viewed

	@@ -0,0 +1,31 @@

+__import__('pysqlite3')
+import sys
+sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from app.api.v1.api_router import api_router
+from app.db.session import init_db
+from fastapi.staticfiles import StaticFiles
+app = FastAPI(title="AI Brain Backend")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.mount("/outputs", StaticFiles(directory="."), name="outputs")
+@app.on_event("startup")
+def on_startup():
+    init_db()
+app.include_router(api_router, prefix="/api/v1")
+if __name__ == "__main__":
+    import uvicorn
+    import os
+    port = int(os.environ.get("PORT", 8000))
+    uvicorn.run(app, host="0.0.0.0", port=port)

app/services/agent_service.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from datetime import datetime
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_openai import ChatOpenAI
+from langchain_classic.agents import initialize_agent, AgentType
+from langchain_classic.chains import retrieval_qa
+from langchain_classic.memory import ConversationBufferMemory
+from langchain_core.tools import Tool
+from app.core.config import settings
+from app.services.tool_service import ToolService
+class AgentService:
+    @classmethod
+    def get_agent(cls, vectordb=None, dataframes=None, model_choice="Google Gemini"):
+        # 1. Initialize LLM
+        if model_choice == "Google Gemini":
+            llm = ChatGoogleGenerativeAI(
+                model="gemini-1.5-flash",
+                google_api_key=settings.GOOGLE_API_KEY,
+                temperature=0,
+                convert_system_message_to_human=True
+            )
+        else:
+            llm = ChatOpenAI(
+                model_name="gpt-4o",
+                openai_api_key=settings.OPENAI_API_KEY,
+                temperature=0
+            )
+        # 2. Base Tools
+        tools = [
+            ToolService.get_web_search_tool(),
+            Tool(
+                name="YouTube Analyzer",
+                func=ToolService.get_youtube_transcript,
+                description="Useful for summarizing YouTube videos. Input: full URL."
+            )
+        ]
+        # 3. Add Dynamic Tools (RAG & CSV)
+        if vectordb:
+            retriever = vectordb.as_retriever(search_kwargs={"k": 3})
+            qa_chain = retrieval_qa.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
+            tools.append(Tool(
+                name="Personal Knowledge Base",
+                func=qa_chain.run,
+                description="Useful for answering questions based on uploaded documents."
+            ))
+        if dataframes and len(dataframes) > 0:
+            csv_tool = ToolService.get_csv_tool(dataframes[0], llm)
+            if csv_tool:
+                tools.append(csv_tool)
+        # 4. Memory & Context
+        memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+        today = datetime.now().strftime("%A, %B %d, %Y")
+        agent_kwargs = {
+            "prefix": f"You are a helpful AI assistant. Today is {today}.\nReturn valid JSON blobs. Escape quotes."
+        }
+        # 5. Initialize Agent
+        return initialize_agent(
+            tools, llm,
+            agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
+            verbose=True, memory=memory, agent_kwargs=agent_kwargs,
+            handle_parsing_errors=True, max_iterations=3
+        )

app/services/rag_service.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import pandas as pd
+from langchain_community.document_loaders import PyPDFLoader, TextLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import Chroma
+from app.core.config import settings
+class RAGService:
+    @staticmethod
+    def load_files(file_paths: list):
+        docs = []
+        dataframes = []
+        for path in file_paths:
+            if path.endswith(".pdf"):
+                docs.extend(PyPDFLoader(path).load())
+            elif path.endswith(".txt"):
+                try:
+                    docs.extend(TextLoader(path, encoding='utf-8').load())
+                except UnicodeDecodeError:
+                    docs.extend(TextLoader(path, encoding='latin-1').load())
+            elif path.endswith(".csv"):
+                try:
+                    df = pd.read_csv(path, encoding='utf-8')
+                except UnicodeDecodeError:
+                    df = pd.read_csv(path, encoding='latin-1')
+                dataframes.append(df)
+            elif path.endswith(".xlsx"):
+                dataframes.append(pd.read_excel(path))
+        return docs, dataframes
+    @staticmethod
+    def create_vector_store(docs):
+        if not docs: return None
+        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        splits = splitter.split_documents(docs)
+        return Chroma.from_documents(
+            documents=splits,
+            embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"),
+            persist_directory=settings.CHROMA_PERSIST_DIR
+        )

app/services/state_service.py ADDED Viewed

	@@ -0,0 +1,16 @@

+class StateService:
+    _instance = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(StateService, cls).__new__(cls)
+            cls.reset(cls._instance)
+        return cls._instance
+    def reset(self):
+        """Clears all in-memory references to data."""
+        self.vectordb = None
+        self.dataframes = []
+        self.filenames = [] # Added to track names for the UI
+brain_state = StateService()

app/services/tool_service.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+from langchain_core.tools import Tool
+from langchain_community.tools import DuckDuckGoSearchRun
+from langchain_community.document_loaders import YoutubeLoader
+from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
+class ToolService:
+    @staticmethod
+    def get_web_search_tool():
+        search = DuckDuckGoSearchRun()
+        return Tool(
+            name="Web Search",
+            func=search.run,
+            description="Useful for finding current information, news, or general knowledge."
+        )
+    @staticmethod
+    def get_youtube_transcript(video_url: str):
+        try:
+            loader = YoutubeLoader.from_youtube_url(video_url, add_video_info=False, language=["en", "hi"])
+            docs = loader.load()
+            return docs[0].page_content[:4000] if docs else "No transcript found."
+        except Exception as e:
+            return f"Error fetching YouTube transcript: {str(e)}"
+    @staticmethod
+    def get_csv_tool(df, llm):
+        if df is None:
+            return None
+        prefix = """
+        You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
+        IMPORTANT RULES FOR PLOTTING:
+        1. If asked to visualize, use 'matplotlib.pyplot'.
+        2. ALWAYS save the plot to a file named 'visual.png'.
+        3. DO NOT use plt.show().
+        4. WHEN FINISHED, YOU MUST RESPOND WITH: "Final Answer: I have saved the plot to visual.png"
+        """
+        pandas_agent = create_pandas_dataframe_agent(
+            llm, df, verbose=True, allow_dangerous_code=True,
+            prefix=prefix, handle_parsing_errors=True
+        )
+        def analyze_data(query):
+            if os.path.exists("visual.png"):
+                os.remove("visual.png")
+            return pandas_agent.run(query)
+        return Tool(
+            name="Data Analyst",
+            func=analyze_data,
+            description="Useful for analyzing structured data (CSV/Excel). Input the math or plotting question directly."
+        )

requirements.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+fastapi
+uvicorn
+python-multipart
+pydantic
+pydantic-settings
+python-dotenv
+langchain
+langchain-community
+langchain-core
+langchain-google-genai
+langchain-openai
+langchain-experimental
+chromadb
+sentence-transformers
+pysqlite3-binary
+pypdf
+pandas
+openpyxl
+tabulate
+google-generativeai
+duckduckgo-search
+ddgs
+youtube-transcript-api
+pytube
+matplotlib
+seaborn