Spaces:

TANVEERMAKHDOOM
/

rag_application

No application file

App Files Files Community

TANVEERMAKHDOOM commited on May 6, 2025

Commit

6df6272

verified ·

1 Parent(s): 359706e

Rename Dockerfile to app.py

Browse files

Files changed (2) hide show

Dockerfile +0 -21
app.py +92 -0

Dockerfile DELETED Viewed

@@ -1,21 +0,0 @@
-FROM python:3.9-slim
-WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    software-properties-common \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
-EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import streamlit as st
+from PyPDF2 import PdfReader
+from sentence_transformers import SentenceTransformer
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import faiss
+import numpy as np
+import requests
+import os
+# Load embedding model from Hugging Face
+embedder = SentenceTransformer('all-MiniLM-L6-v2')
+# Set your Groq API key (in HF Spaces use Secrets tab to set "GROQ_API_KEY")
+GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "your-groq-api-key")
+GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
+# --- Functions ---
+# 1. Load and extract text from PDF
+def load_pdf(file):
+    reader = PdfReader(file)
+    text = ""
+    for page in reader.pages:
+        page_text = page.extract_text()
+        if page_text:
+            text += page_text + "\n"
+    return text
+# 2. Chunk text using LangChain splitter
+def chunk_text(text, chunk_size=500, chunk_overlap=100):
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap
+    )
+    return splitter.split_text(text)
+# 3. Create embeddings for chunks
+def create_embeddings(chunks):
+    return embedder.encode(chunks, show_progress_bar=False)
+# 4. Store embeddings in FAISS index
+def store_index(embeddings):
+    dim = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dim)
+    index.add(embeddings)
+    return index
+# 5. Query FAISS index to find most relevant chunks
+def query_index(query, index, chunks, top_k=3):
+    query_embedding = embedder.encode([query])
+    D, I = index.search(np.array(query_embedding), top_k)
+    return [chunks[i] for i in I[0]]
+# 6. Generate answer using Groq + LLaMA 3
+def generate_answer(context, query):
+    headers = {
+        "Authorization": f"Bearer {GROQ_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "model": "llama3-8b-8192",
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": f"Context:\n{context}\n\nQuestion:\n{query}"}
+        ]
+    }
+    response = requests.post(GROQ_API_URL, headers=headers, json=data)
+    result = response.json()
+    return result['choices'][0]['message']['content']
+# --- Streamlit UI ---
+st.set_page_config(page_title="RAG PDF Chatbot", layout="centered")
+st.title("📄 RAG Chatbot with Groq LLaMA 3")
+uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
+if uploaded_file:
+    with st.spinner("Processing PDF..."):
+        text = load_pdf(uploaded_file)
+        chunks = chunk_text(text)
+        embeddings = create_embeddings(chunks)
+        index = store_index(np.array(embeddings))
+    st.success("✅ PDF processed! Ask your question below:")
+    query = st.text_input("❓ Ask a question about the PDF:")
+    if query:
+        with st.spinner("Generating answer..."):
+            relevant_chunks = query_index(query, index, chunks)
+            context = "\n\n".join(relevant_chunks)
+            answer = generate_answer(context, query)
+        st.subheader("💡 Answer")
+        st.write(answer)