Spaces:

kviraj722
/

rag-reader

Runtime error

App Files Files Community

viraj commited on May 12, 2025

Commit

f09e297

·

1 Parent(s): 05bc227

RAG app upload

Files changed (19) hide show

.env +2 -0
Dockerfile +10 -0
files/1503253b-bb6d-40fa-a6df-77c6cd0c765a_Payments_.pdf +0 -0
files/1874da6d-0662-4e5e-9a6f-16801dd493ff_Payments_.pdf +0 -0
files/2701f754-a207-4e02-9cf1-9480aaabfafa_Payments_.pdf +0 -0
files/32d0ce30-093d-436a-8e0d-b12b251d1cf8_Payments_.pdf +0 -0
files/41975b82-5573-4f1d-9990-eea2364936f8_Payments_.pdf +0 -0
files/926c842a-bce7-47e8-a84a-5378677cad7e_Payments_.pdf +0 -0
files/a97aff56-f666-4e12-b6b2-3ea600b041ba_Payments_.pdf +0 -0
files/b0b0cd28-a5b7-4fa0-885b-4ed99a61ada9_Vehicle_Maintenance_SRS.pdf +0 -0
files/c0b8e4bb-914a-4475-aa6b-e4a67e4d8d87_Vehicle_Maintenance_SRS.pdf +0 -0
files/cf883c42-f8f5-4d36-a757-2946a8fb5d91_Payments_.pdf +0 -0
files/cfd14df9-21d4-49cf-9213-a644d3ebda41_Payments_.pdf +0 -0
files/ec91f7a5-e958-4b12-9dc5-05954ff579c8_Payments_.pdf +0 -0
files/ed4592f4-e79e-4c25-8355-e19fe09b51d9_Payments_.pdf +0 -0
files/f6ac9b57-252f-4b45-bef0-ffbd8cdc7d7e_Payments_.pdf +0 -0
main.py +81 -0
rag_pipeline.py +57 -0
requirments.txt +11 -0

.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ OPENAI_API_KEY="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"
2	+ GROQ_API_KEY="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"

Dockerfile ADDED Viewed

	@@ -0,0 +1,10 @@

+FROM python:3.10-slim
+WORKDIR /app
+COPY ./app /app
+RUN pip install --upgrade pip
+RUN pip install -r requirements.txt
+EXPOSE 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

files/1503253b-bb6d-40fa-a6df-77c6cd0c765a_Payments_.pdf ADDED Viewed

Binary file (57.5 kB). View file

files/1874da6d-0662-4e5e-9a6f-16801dd493ff_Payments_.pdf ADDED Viewed

Binary file (57.5 kB). View file

files/2701f754-a207-4e02-9cf1-9480aaabfafa_Payments_.pdf ADDED Viewed

Binary file (57.5 kB). View file

files/32d0ce30-093d-436a-8e0d-b12b251d1cf8_Payments_.pdf ADDED Viewed

Binary file (57.5 kB). View file

files/41975b82-5573-4f1d-9990-eea2364936f8_Payments_.pdf ADDED Viewed

Binary file (57.5 kB). View file

files/926c842a-bce7-47e8-a84a-5378677cad7e_Payments_.pdf ADDED Viewed

Binary file (57.5 kB). View file

files/a97aff56-f666-4e12-b6b2-3ea600b041ba_Payments_.pdf ADDED Viewed

Binary file (57.5 kB). View file

files/b0b0cd28-a5b7-4fa0-885b-4ed99a61ada9_Vehicle_Maintenance_SRS.pdf ADDED Viewed

Binary file (4.9 kB). View file

files/c0b8e4bb-914a-4475-aa6b-e4a67e4d8d87_Vehicle_Maintenance_SRS.pdf ADDED Viewed

Binary file (4.9 kB). View file

files/cf883c42-f8f5-4d36-a757-2946a8fb5d91_Payments_.pdf ADDED Viewed

Binary file (57.5 kB). View file

files/cfd14df9-21d4-49cf-9213-a644d3ebda41_Payments_.pdf ADDED Viewed

Binary file (57.5 kB). View file

files/ec91f7a5-e958-4b12-9dc5-05954ff579c8_Payments_.pdf ADDED Viewed

Binary file (57.5 kB). View file

files/ed4592f4-e79e-4c25-8355-e19fe09b51d9_Payments_.pdf ADDED Viewed

Binary file (57.5 kB). View file

files/f6ac9b57-252f-4b45-bef0-ffbd8cdc7d7e_Payments_.pdf ADDED Viewed

Binary file (57.5 kB). View file

main.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from rag_pipeline import process_file, answer_query
+from pydantic import BaseModel
+class QueryRequest(BaseModel):
+    file_id: str
+    question: str
+    page: int
+    explainLike5: bool = False
+from fastapi import FastAPI, UploadFile, File
+from fastapi.middleware.cors import CORSMiddleware
+from dotenv import load_dotenv
+from fastapi import Body
+import uuid
+import os
+from langchain_chroma import Chroma
+from langchain_huggingface import HuggingFaceEmbeddings
+import re
+load_dotenv()
+CHROMA_DIR = "./chroma_db"
+embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+app = FastAPI()
+BASE_DIR = "files"
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+file_store = {}
+@app.get("/test")
+async def test():
+    return {"message": "hello world!"}
+@app.post("/upload")
+async def upload(file: UploadFile = File(...)):
+    content = await file.read()
+    file_id = str(uuid.uuid4())
+    safe_filename = file.filename.replace(" ", "_")
+    full_filename = f"{file_id}_{safe_filename}"
+    save_path = os.path.join(BASE_DIR, full_filename)
+    os.makedirs(BASE_DIR, exist_ok=True)
+    with open(save_path, "wb") as f:
+        f.write(content)
+    retriever = process_file(content, safe_filename, file_id)
+    file_store[file_id] = retriever
+    return {"message": "File processed", "file_id": file_id}
+@app.post("/query")
+async def query_endpoint(request = Body(...)):
+    file_id = request.get("file_id")
+    question = request.get("question")
+    selected_text = request.get("selectedText")
+    explain_like_5 = request.get("explainLike5", False)
+    if not file_id or not question:
+        return {"error": "Missing file_id or question"}
+    retriever_path = f"{CHROMA_DIR}/{file_id}"
+    # Load retriever from disk
+    if not os.path.exists(retriever_path):
+        return {"error": "Vectorstore for this file_id not found."}
+    vectorstore = Chroma(
+        embedding_function=embedding_model,
+        persist_directory=retriever_path
+    )
+    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4})
+    retrieved_docs = retriever.invoke(selected_text or question)
+    retrieved_context = "\n\n".join(
+        re.sub(r"\s+", " ", doc.page_content.strip()) for doc in retrieved_docs
+    )
+    combined_context = f"User selected this:\n\"{selected_text}\"\n\nRelated parts from the document:\n{retrieved_context}"
+    print("Combined context", combined_context)
+    answer = answer_query(question, combined_context, explain_like_5)
+    return {"answer": answer}

rag_pipeline.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import tempfile
+from langchain_chroma import Chroma
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import os
+from langchain_huggingface import HuggingFaceEmbeddings
+import re
+from groq import Groq
+from dotenv import load_dotenv
+load_dotenv()
+client = Groq(api_key=os.environ["GROQ_API_KEY"])
+CHROMA_DIR = "./chroma_db"
+# Embedding model (free HuggingFace)
+embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+def process_file(file_bytes, filename, file_id):
+    ext = filename.split('.')[-1].lower()
+    with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp:
+        tmp.write(file_bytes)
+        tmp_path = tmp.name
+    loader = PyPDFLoader(tmp_path) if ext == 'pdf' else None
+    docs = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    chunks = text_splitter.split_documents(docs)
+    vectorstore = Chroma.from_documents(
+        documents=chunks,
+        embedding=embedding_model,
+        persist_directory=f"{CHROMA_DIR}/{file_id}"
+    )
+    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4})
+    os.unlink(tmp_path)
+    return retriever
+def answer_query(question, context, explain_like_5=False):
+    system_prompt = (
+        "You are a helpful assistant answering user queries based on provided document chunks.\n"
+        "Only use the given context. If the answer is not found, respond with 'I don't know.'"
+    )
+    if explain_like_5:
+        system_prompt += "\nExplain the answer in a simple way, like you're talking to a 5-year-old."
+    # Step 2: Send to LLM
+    print("context\======================================================================", context)
+    print("question =====================================================================", question)
+    response = client.chat.completions.create(
+        model="llama-3.3-70b-versatile",
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": f"Context:\n{context}\n\nQuestion:\n{question}"}
+        ]
+    )
+    return response.choices[0].message.content

requirments.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi
+uvicorn
+python-multipart
+pydantic
+langchain
+langchain-community
+langchain-chroma
+huggingface-hub
+sentence-transformers
+groq
+python-dotenv