TANVEERMAKHDOOM commited on
Commit
6df6272
Β·
verified Β·
1 Parent(s): 359706e

Rename Dockerfile to app.py

Browse files
Files changed (2) hide show
  1. Dockerfile +0 -21
  2. app.py +92 -0
Dockerfile DELETED
@@ -1,21 +0,0 @@
1
- FROM python:3.9-slim
2
-
3
- WORKDIR /app
4
-
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- software-properties-common \
9
- git \
10
- && rm -rf /var/lib/apt/lists/*
11
-
12
- COPY requirements.txt ./
13
- COPY src/ ./src/
14
-
15
- RUN pip3 install -r requirements.txt
16
-
17
- EXPOSE 8501
18
-
19
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
-
21
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from sentence_transformers import SentenceTransformer
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ import faiss
6
+ import numpy as np
7
+ import requests
8
+ import os
9
+
10
+ # Load embedding model from Hugging Face
11
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
12
+
13
+ # Set your Groq API key (in HF Spaces use Secrets tab to set "GROQ_API_KEY")
14
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "your-groq-api-key")
15
+ GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
16
+
17
+ # --- Functions ---
18
+
19
+ # 1. Load and extract text from PDF
20
+ def load_pdf(file):
21
+ reader = PdfReader(file)
22
+ text = ""
23
+ for page in reader.pages:
24
+ page_text = page.extract_text()
25
+ if page_text:
26
+ text += page_text + "\n"
27
+ return text
28
+
29
+ # 2. Chunk text using LangChain splitter
30
+ def chunk_text(text, chunk_size=500, chunk_overlap=100):
31
+ splitter = RecursiveCharacterTextSplitter(
32
+ chunk_size=chunk_size,
33
+ chunk_overlap=chunk_overlap
34
+ )
35
+ return splitter.split_text(text)
36
+
37
+ # 3. Create embeddings for chunks
38
+ def create_embeddings(chunks):
39
+ return embedder.encode(chunks, show_progress_bar=False)
40
+
41
+ # 4. Store embeddings in FAISS index
42
+ def store_index(embeddings):
43
+ dim = embeddings.shape[1]
44
+ index = faiss.IndexFlatL2(dim)
45
+ index.add(embeddings)
46
+ return index
47
+
48
+ # 5. Query FAISS index to find most relevant chunks
49
+ def query_index(query, index, chunks, top_k=3):
50
+ query_embedding = embedder.encode([query])
51
+ D, I = index.search(np.array(query_embedding), top_k)
52
+ return [chunks[i] for i in I[0]]
53
+
54
+ # 6. Generate answer using Groq + LLaMA 3
55
+ def generate_answer(context, query):
56
+ headers = {
57
+ "Authorization": f"Bearer {GROQ_API_KEY}",
58
+ "Content-Type": "application/json"
59
+ }
60
+ data = {
61
+ "model": "llama3-8b-8192",
62
+ "messages": [
63
+ {"role": "system", "content": "You are a helpful assistant."},
64
+ {"role": "user", "content": f"Context:\n{context}\n\nQuestion:\n{query}"}
65
+ ]
66
+ }
67
+ response = requests.post(GROQ_API_URL, headers=headers, json=data)
68
+ result = response.json()
69
+ return result['choices'][0]['message']['content']
70
+
71
+ # --- Streamlit UI ---
72
+
73
+ st.set_page_config(page_title="RAG PDF Chatbot", layout="centered")
74
+ st.title("πŸ“„ RAG Chatbot with Groq LLaMA 3")
75
+ uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
76
+
77
+ if uploaded_file:
78
+ with st.spinner("Processing PDF..."):
79
+ text = load_pdf(uploaded_file)
80
+ chunks = chunk_text(text)
81
+ embeddings = create_embeddings(chunks)
82
+ index = store_index(np.array(embeddings))
83
+ st.success("βœ… PDF processed! Ask your question below:")
84
+
85
+ query = st.text_input("❓ Ask a question about the PDF:")
86
+ if query:
87
+ with st.spinner("Generating answer..."):
88
+ relevant_chunks = query_index(query, index, chunks)
89
+ context = "\n\n".join(relevant_chunks)
90
+ answer = generate_answer(context, query)
91
+ st.subheader("πŸ’‘ Answer")
92
+ st.write(answer)