Spaces:

Samagra07
/

my-doc-rag

Sleeping

App Files Files Community

samagra44 commited on Jul 13, 2025

Commit

6e357ca

1 Parent(s): 6067c02

initial commit

Browse files

Files changed (39) hide show

.gitignore +0 -0
Dockerfile +6 -13
README.md +2 -11
app.py +4 -0
config/__init__.py +0 -0
config/__pycache__/__init__.cpython-310.pyc +0 -0
config/__pycache__/nodes.cpython-310.pyc +0 -0
config/nodes.py +20 -0
database/__init__.py +0 -0
database/__pycache__/__init__.cpython-310.pyc +0 -0
database/__pycache__/create_database.cpython-310.pyc +0 -0
database/__pycache__/load_database.cpython-310.pyc +0 -0
database/create_database.py +13 -0
database/load_database.py +15 -0
frontend/__pycache__/chat_gui.cpython-310.pyc +0 -0
frontend/chat_gui.py +58 -0
helper/__init__.py +0 -0
helper/__pycache__/__init__.cpython-310.pyc +0 -0
helper/__pycache__/model_load.cpython-310.pyc +0 -0
helper/model_load.py +15 -0
requirements.txt +11 -3
routes/__init__.py +0 -0
routes/__pycache__/__init__.cpython-310.pyc +0 -0
routes/__pycache__/chat_route.cpython-310.pyc +0 -0
routes/__pycache__/entry_point.cpython-310.pyc +0 -0
routes/__pycache__/upload_file_route.cpython-310.pyc +0 -0
routes/chat_route.py +12 -0
routes/entry_point.py +19 -0
routes/upload_file_route.py +18 -0
src/streamlit_app.py +0 -40
start.sh +5 -0
utils/__init__.py +0 -0
utils/__pycache__/__init__.cpython-310.pyc +0 -0
utils/__pycache__/load_documents.cpython-310.pyc +0 -0
utils/__pycache__/load_llm.cpython-310.pyc +0 -0
utils/__pycache__/load_rag_chain.cpython-310.pyc +0 -0
utils/load_documents.py +42 -0
utils/load_llm.py +24 -0
utils/load_rag_chain.py +10 -0

.gitignore ADDED Viewed

Binary file (52 Bytes). View file

Dockerfile CHANGED Viewed

@@ -1,21 +1,14 @@
-FROM python:3.9-slim
 WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    software-properties-common \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
-EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+FROM python:3.10
 WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+RUN chmod +x
+CMD ["./start.sh"]

README.md CHANGED Viewed

@@ -1,4 +1,3 @@
----
 title: My Doc Rag
 emoji: 🚀
 colorFrom: red
@@ -6,14 +5,6 @@ colorTo: red
 sdk: docker
 app_port: 8501
 tags:
-- streamlit
 pinned: false
-short_description: Streamlit template space
----
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

 title: My Doc Rag
 emoji: 🚀
 colorFrom: red
 sdk: docker
 app_port: 8501
 tags:
+  - streamlit
 pinned: false
+short_description: Streamlit template space

app.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from frontend.chat_gui import main
+if __name__ == "__main__":
+    main()

config/__init__.py ADDED Viewed

File without changes

config/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (128 Bytes). View file

config/__pycache__/nodes.cpython-310.pyc ADDED Viewed

Binary file (489 Bytes). View file

config/nodes.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import os
+from dotenv import load_dotenv
+load_dotenv()
+OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME")
+BASE_URL = os.getenv("BASE_URL")
+LLM_TEMPERATURE_SET = os.getenv("LLM_TEMPERATURE_SET")
+EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME")
+EMBEDDING_MODEL_TASK = os.getenv("EMBEDDING_MODEL_TASK")
+PERSIST_DIRECTORY = os.getenv("PERSIST_DIRECTORY")

database/__init__.py ADDED Viewed

File without changes

database/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (130 Bytes). View file

database/__pycache__/create_database.cpython-310.pyc ADDED Viewed

Binary file (641 Bytes). View file

database/__pycache__/load_database.cpython-310.pyc ADDED Viewed

Binary file (689 Bytes). View file

database/create_database.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from langchain_chroma import Chroma
+from utils.load_llm import TEXT_SPLITTER, EMBEDDING_MODEL
+from config.nodes import PERSIST_DIRECTORY
+def create_or_add_to_collection(collection_name: str, docs):
+    texts = TEXT_SPLITTER.split_documents(docs)
+    database_chroma = Chroma(
+        collection_name=collection_name,
+        persist_directory=PERSIST_DIRECTORY,
+        embedding_function=EMBEDDING_MODEL
+    )
+    database_chroma.add_documents(texts)
+    return database_chroma

database/load_database.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from langchain_chroma import Chroma
+from utils.load_llm import TEXT_SPLITTER, EMBEDDING_MODEL
+from config.nodes import PERSIST_DIRECTORY
+def load_retriever(collection_name: str, score_threshold=0.6):
+    load_database_chroma = Chroma(
+        collection_name=collection_name,
+        persist_directory=PERSIST_DIRECTORY,
+        embedding_function=EMBEDDING_MODEL
+    )
+    return load_database_chroma.as_retriever(
+        search_type="similarity_score_threshold",
+        search_kwargs={"score_threshold": score_threshold}
+    )

frontend/__pycache__/chat_gui.cpython-310.pyc ADDED Viewed

Binary file (1.83 kB). View file

frontend/chat_gui.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import requests
+import streamlit as st
+API_URL = "http://localhost:8000"
+def main():
+    st.set_page_config(page_title="📚 Cat Assistant", layout="wide")
+    st.title("📚 Chat Assistant")
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    collection_name = st.text_input("Name Your Collection", value="my_collection")
+    with st.expander("📤 Upload Documents or URL"):
+        upload_type = st.radio("Choose upload type", ["File", "Web URL"], horizontal=True)
+        if upload_type == "File":
+            uploaded_file = st.file_uploader("Upload .pdf or .txt", type=["pdf", "txt"])
+            if uploaded_file and st.button("Upload File"):
+                res = requests.post(
+                    f"{API_URL}/upload/file",
+                    files={"file": uploaded_file},
+                    data={"collection": collection_name}
+                )
+                st.success(res.json().get("message", "Uploaded"))
+        if upload_type == "Web URL":
+            url = st.text_input("Enter Web URL")
+            if st.button("Upload URL"):
+                res = requests.post(f"{API_URL}/upload/url", json={
+                    "doc_type": "url",
+                    "content": url,
+                    "file_name": collection_name,
+                })
+                st.success(res.json().get("message", "URL Indexed"))
+    for msg in st.session_state.messages:
+        with st.chat_message(msg["role"]):
+            st.markdown(msg["content"])
+    if query := st.chat_input("Ask me anything..."):
+        st.session_state.messages.append({"role": "user", "content": query})
+        with st.chat_message("user"):
+            st.markdown(query)
+        with st.chat_message("assistant"):
+            with st.spinner("Thinking..."):
+                res = requests.post(f"{API_URL}/chat/", json={
+                    "query": query,
+                    "collection": collection_name
+                })
+                answer = res.json()["answer"]
+                st.markdown(answer)
+                st.session_state.messages.append({"role": "assistant", "content": answer})

helper/__init__.py ADDED Viewed

File without changes

helper/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (128 Bytes). View file

helper/__pycache__/model_load.cpython-310.pyc ADDED Viewed

Binary file (912 Bytes). View file

helper/model_load.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from pydantic import BaseModel
+from typing import Optional, List
+class UploadDocRequest(BaseModel):
+    doc_type: str
+    content: Optional[str] = None
+    file_name: Optional[str] = None
+class ChatRequest(BaseModel):
+    query: str
+    collection: str
+class ChatResponse(BaseModel):
+    answer: str
+    sources: Optional[List[str]] = []

requirements.txt CHANGED Viewed

@@ -1,3 +1,11 @@
-altair
-pandas
-streamlit

+fastapi
+streamlit
+python-dotenv
+chromadb
+langchain
+uvicorn
+langchain-huggingface
+langchain-openai
+langchain-core
+langchain-community
+langchain-chroma

routes/__init__.py ADDED Viewed

File without changes

routes/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (128 Bytes). View file

routes/__pycache__/chat_route.cpython-310.pyc ADDED Viewed

Binary file (764 Bytes). View file

routes/__pycache__/entry_point.cpython-310.pyc ADDED Viewed

Binary file (370 Bytes). View file

routes/__pycache__/upload_file_route.cpython-310.pyc ADDED Viewed

Binary file (1.03 kB). View file

routes/chat_route.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from fastapi import APIRouter, UploadFile, File, Form
+from helper.model_load import ChatRequest, ChatResponse
+from database.load_database import load_retriever
+from utils.load_rag_chain import generate_response_from_context
+chat_router = APIRouter(prefix="/chat", tags=['Chat'])
+@chat_router.post("/", response_model=ChatResponse)
+async def chat_rag(data: ChatRequest):
+    retriever = load_retriever(data.collection)
+    answer = generate_response_from_context(retriever=retriever, question=data.query)
+    return ChatResponse(answer=answer)

routes/entry_point.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from .chat_route import chat_router
+from .upload_file_route import upload_file_router
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+app = FastAPI(title="RAG API")
+app.include_router(router=chat_router)
+app.include_router(router=upload_file_router)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# if __name__ == "__main__":
+#     import uvicorn
+#     uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)

routes/upload_file_route.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from fastapi import APIRouter, UploadFile, File, Form
+from helper.model_load import UploadDocRequest
+from utils.load_documents import load_document_from_file, load_document_from_url
+from database.create_database import create_or_add_to_collection
+upload_file_router = APIRouter(prefix="/upload", tags=['Upload'])
+@upload_file_router.post("/file")
+async def upload(file: UploadFile = File(...), collection: str = Form(...)):
+    docs = load_document_from_file(file)
+    create_or_add_to_collection(collection, docs)
+    return {"message": f"Uploaded and Indexed in {collection}"}
+@upload_file_router.post("/url")
+async def upload_url(data: UploadDocRequest):
+    docs = load_document_from_url(data.content)
+    create_or_add_to_collection(data.file_name, docs)
+    return  {"message": f"Indexed URL to {data.file_name}"}

src/streamlit_app.py DELETED Viewed

@@ -1,40 +0,0 @@
-import altair as alt
-import numpy as np
-import pandas as pd
-import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

start.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+#!/bin/bash
+uvicorn routes.entry_point:app --host 0.0.0.0 --port 8000 &
+streamlit run app.py --server.port 7860 --server.address 0.0.0.0

utils/__init__.py ADDED Viewed

File without changes

utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (127 Bytes). View file

utils/__pycache__/load_documents.cpython-310.pyc ADDED Viewed

Binary file (954 Bytes). View file

utils/__pycache__/load_llm.cpython-310.pyc ADDED Viewed

Binary file (847 Bytes). View file

utils/__pycache__/load_rag_chain.cpython-310.pyc ADDED Viewed

Binary file (668 Bytes). View file

utils/load_documents.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from langchain_community.document_loaders import PyPDFLoader, TextLoader, WebBaseLoader
+from langchain_core.documents import Document
+import tempfile
+def load_document_from_file(file) -> list[Document]:
+    suffix = "." + file.filename.split(".")[-1]
+    temp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+    temp.write(file.file.read())
+    temp.close()
+    if suffix == ".pdf":
+        loader = PyPDFLoader(temp.name)
+    elif suffix == ".txt":
+        loader = TextLoader(temp.name)
+    else:
+        raise ValueError("Unsupported File Type")
+    return loader.load()
+def load_document_from_url(url: str) -> list[Document]:
+    loader = WebBaseLoader(url)
+    return loader.load()
+# def load_document_file_or_url(file_or_url) -> list[Document]:
+#     # If input is a URL string
+#     if isinstance(file_or_url, str) and re.match(r'^https?://', file_or_url):
+#         loader = WebBaseLoader(file_or_url)
+#         return loader.load()
+#     # Otherwise treat it as a file-like object (e.g. from file upload)
+#     suffix = "." + file_or_url.filename.split(".")[-1]
+#     temp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+#     temp.write(file_or_url.file.read())
+#     temp.close()
+#     if suffix == ".pdf":
+#         loader = PyPDFLoader(temp.name)
+#     elif suffix == ".txt":
+#         loader = TextLoader(temp.name)
+#     else:
+#         raise ValueError("Unsupported File Type")
+#     return loader.load()

utils/load_llm.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from langchain_openai import ChatOpenAI
+from langchain_huggingface import HuggingFaceEndpointEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from config.nodes import OPENROUTER_API_KEY, HUGGINGFACEHUB_API_TOKEN, BASE_URL, LLM_MODEL_NAME, LLM_TEMPERATURE_SET, EMBEDDING_MODEL_NAME, EMBEDDING_MODEL_TASK
+from dotenv import load_dotenv
+import os
+LLM_MODEL = ChatOpenAI(
+    temperature=LLM_TEMPERATURE_SET,
+    model=LLM_MODEL_NAME,
+    base_url=BASE_URL,
+    api_key=OPENROUTER_API_KEY
+)
+EMBEDDING_MODEL = HuggingFaceEndpointEmbeddings(
+    model=EMBEDDING_MODEL_NAME,
+    task=EMBEDDING_MODEL_TASK,
+    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
+)
+TEXT_SPLITTER = RecursiveCharacterTextSplitter(
+    chunk_size=1000,
+    chunk_overlap=200
+)

utils/load_rag_chain.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from .load_llm import LLM_MODEL
+def generate_response_from_context(retriever, question: str):
+    prompt = ChatPromptTemplate.from_messages([
+        ("human", "Answer this using context:\n{context}\n\nQuestion:\n{question}")
+    ])
+    rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | LLM_MODEL
+    return rag_chain.invoke(question).content