Spaces:

Dewasheesh
/

HelpDev

Sleeping

App Files Files Community

Chaitaniya commited on Jun 28, 2025

Commit

4810f6f

1 Parent(s): 897015c

hh

Browse files

Files changed (9) hide show

.gitignore +25 -0
Dockerfile +21 -0
app/main.py +28 -0
app/pdf_handler.py +14 -0
app/rag_pipeline.py +85 -0
app/test.py +21 -0
app/vector_store.py +17 -0
requirements.txt +10 -0
ui/ui_app.py +35 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,25 @@

+# Python cache and environment
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.env
+.venv/
+# Data folders (PDFs, ChromaDB, etc.)
+data/*
+!data/__init__.py
+# Large file types
+*.pdf
+*.pt
+*.bin
+# Streamlit UI cache
+.ui/
+.streamlit/
+# OS/IDE junk
+.DS_Store
+*.swp
+.vscode/

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+# FROM python:3.13.0
+# WORKDIR /app
+# COPY . /app
+# RUN pip install --upgrade pip
+# RUN pip install -r requirements.txt
+# CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
+FROM python:3.13.0
+WORKDIR /app
+COPY . .
+RUN apt-get update && apt-get install -y git
+RUN pip install --upgrade pip
+RUN pip install -r requirements.txt
+EXPOSE 7860
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app/main.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from pydantic import BaseModel
+from fastapi import FastAPI, UploadFile, File
+from app.pdf_handler import process_pdf
+from app.rag_pipeline import generate_answer
+from app.test import query_test
+app = FastAPI()
+class ChatRequest(BaseModel):
+    query: str
+    top_k: int = 3
+@app.post("/chat/")
+async def chat(request: ChatRequest):
+    return generate_answer(request.query, request.top_k)
+@app.post("/upload/")
+async def upload_pdf(file: UploadFile = File(...)):
+    return process_pdf(file)
+@app.post("/test/")
+async def chat(query: str, top_k: int = 3):
+    return query_test(query)

app/pdf_handler.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import os
+from fastapi import UploadFile
+from app.vector_store import store_pdf
+UPLOAD_DIR = "data/uploaded_pdfs"
+def process_pdf(file: UploadFile):
+    os.makedirs(UPLOAD_DIR, exist_ok=True)
+    filepath = os.path.join(UPLOAD_DIR, file.filename)
+    with open(filepath, "wb") as f:
+        f.write(file.file.read())
+    store_pdf(filepath)
+    return {"status": "uploaded", "filename": file.filename}

app/rag_pipeline.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import os
+from dotenv import load_dotenv
+import requests
+from langchain.vectorstores import Chroma
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.llms import HuggingFacePipeline
+from langchain.vectorstores import Chroma
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.embeddings import SentenceTransformerEmbeddings
+CHROMA_DIR = "data/chroma_db"
+load_dotenv()
+HUGGINGFACE_API_KEY = os.getenv("HF_API_KEY")  # set this in .env or directly
+qna_system_message = """
+You are an assistant whose work is to review the report and provide the appropriate answers from the context.
+User input will have the context required by you to answer user questions.
+This context will begin with the token: ###Context.
+The context contains references to specific portions of a document relevant to the user query.
+User questions will begin with the token: ###Question.
+Please answer only using the context provided in the input. Do not mention anything about the context in your final answer.
+If the answer is not found in the context, respond "I don't know".
+"""
+qna_user_message_template = """
+###Context
+Here are some documents that are relevant to the question mentioned below.
+{context}
+###Question
+{question}
+"""
+def call_huggingface_mistral(prompt: str):
+    api_url = "https://router.huggingface.co/featherless-ai/v1/chat/completions"
+    headers = {"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"}
+    payload = {
+        "messages": [
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ],
+        "model": 'mistralai/Mistral-7B-Instruct-v0.2'
+    }
+    response = requests.post(api_url, headers=headers, json=payload)
+    if response.status_code != 200:
+        return f"[Error {response.status_code}] {response.text}"
+    return response.json()["choices"][0]["message"]
+def generate_answer(query, top_k=3):
+    CHROMA_DIR = "data/chroma_db"
+    embeddings = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
+    db = Chroma(persist_directory=CHROMA_DIR, embedding_function=embeddings)
+    retriever = db.as_retriever(
+        search_type='similarity',
+        search_kwargs={'k': 4}
+    )
+    relevant_document_chunks = retriever.get_relevant_documents(
+        query=query, k=top_k)
+    context_list = [d.page_content for d in relevant_document_chunks]
+    print(f'context_list: {context_list}')
+    # Combine document chunks into a single context
+    context = ". ".join(context_list)
+    user_message = qna_user_message_template.replace(
+        '{context}', context)
+    user_message = user_message.replace('{question}', query)
+    prompt = qna_system_message + '\n' + user_message
+    print(f'Prompt: {prompt}')
+    answer = call_huggingface_mistral(prompt)
+    return {"answer": answer}

app/test.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+import requests
+API_URL = "https://router.huggingface.co/featherless-ai/v1/chat/completions"
+headers = {
+    "Authorization": f"Bearer {os.environ['HF_API_KEY']}",
+}
+def query_test(payload):
+    payload = {
+        "messages": [
+            {
+                "role": "user",
+                "content": "What is the capital of France?"
+            }
+        ],
+        "model": "mistralai/Mistral-7B-Instruct-v0.2"
+    }
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()["choices"][0]["message"]

app/vector_store.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import os
+from langchain.vectorstores import Chroma
+from langchain.embeddings import SentenceTransformerEmbeddings
+from langchain.document_loaders import PyMuPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+CHROMA_DIR = "data/chroma_db"
+def store_pdf(pdf_path):
+    loader = PyMuPDFLoader(pdf_path)
+    docs = loader.load()
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500, chunk_overlap=100)
+    chunks = splitter.split_documents(docs)
+    embeddings = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
+    Chroma.from_documents(chunks, embeddings, persist_directory=CHROMA_DIR)

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+fastapi
+uvicorn
+langchain
+chromadb
+transformers
+sentence-transformers
+PyMuPDF
+langchain-community
+python-multipart
+streamlit

ui/ui_app.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import streamlit as st
+import requests
+API_CHAT_URL = "http://localhost:8000/chat/"
+API_UPLOAD_URL = "http://localhost:8000/upload/"
+st.title("📚 HelpDevelopers RAG Chatbot")
+# PDF Upload Section
+st.header("📤 Upload a PDF")
+uploaded_file = st.file_uploader("Choose a PDF to upload", type="pdf")
+if uploaded_file is not None:
+    if st.button("Upload"):
+        with st.spinner("Uploading..."):
+            files = {"file": (uploaded_file.name,
+                              uploaded_file, "application/pdf")}
+            res = requests.post(API_UPLOAD_URL, files=files)
+            if res.status_code == 200:
+                st.success(f"{uploaded_file.name} uploaded successfully!")
+            else:
+                st.error("Upload failed.")
+# Chat Section
+st.header("💬 Ask a Question")
+query = st.text_input("Your question:")
+if st.button("Submit"):
+    if query:
+        with st.spinner("Thinking..."):
+            res = requests.post(API_CHAT_URL, json={
+                                "query": query, "top_k": 3})
+            if res.status_code == 200:
+                st.success(res.json().get("answer", "No response."))
+            else:
+                st.error(f"Error: {res.text}")