Mehak900 commited on
Commit
fad3f30
·
verified ·
1 Parent(s): 5d66b63

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from groq import Groq
4
+ from PyPDF2 import PdfReader
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.docstore.document import Document
9
+
10
+ # Set your Groq API key directly (recommended for Hugging Face Spaces)
11
+ GROQ_API_KEY = "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" # 🔁 Replace this with your actual API key
12
+
13
+ # Initialize Groq client
14
+ groq_client = Groq(api_key=GROQ_API_KEY)
15
+
16
+ # Load embedding model
17
+ embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
18
+
19
+ # Function to extract text from PDF
20
+ def extract_text_from_pdf(uploaded_file):
21
+ reader = PdfReader(uploaded_file)
22
+ text = ""
23
+ for page in reader.pages:
24
+ page_text = page.extract_text()
25
+ if page_text:
26
+ text += page_text
27
+ return text
28
+
29
+ # Function to split text into chunks
30
+ def chunk_text(text):
31
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
32
+ chunks = splitter.split_text(text)
33
+ return [Document(page_content=chunk) for chunk in chunks]
34
+
35
+ # Create FAISS vector index
36
+ def create_faiss_index(documents):
37
+ return FAISS.from_documents(documents, embedding_model)
38
+
39
+ # Search similar chunks
40
+ def search_faiss_index(query, index, k=3):
41
+ return index.similarity_search(query, k=k)
42
+
43
+ # Generate answer using Groq model
44
+ def generate_answer(query, context_chunks):
45
+ context = "\n".join([doc.page_content for doc in context_chunks])
46
+ prompt = f"""Answer the following question based on the context:\n\n{context}\n\nQuestion: {query}"""
47
+
48
+ response = groq_client.chat.completions.create(
49
+ messages=[{"role": "user", "content": prompt}],
50
+ model="llama-3.1-8b-instant" # ✅ Correct current model name on Groq
51
+ )
52
+ return response.choices[0].message.content
53
+
54
+ # Streamlit UI
55
+ st.title("📄 RAG-based PDF QA App (Groq + FAISS)")
56
+
57
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
58
+
59
+ if uploaded_file:
60
+ with st.spinner("Reading and processing document..."):
61
+ raw_text = extract_text_from_pdf(uploaded_file)
62
+ documents = chunk_text(raw_text)
63
+ vector_index = create_faiss_index(documents)
64
+ st.success("Document processed and indexed successfully!")
65
+
66
+ question = st.text_input("Ask a question based on the uploaded document:")
67
+ if question:
68
+ with st.spinner("Searching and generating answer..."):
69
+ related_chunks = search_faiss_index(question, vector_index)
70
+ answer = generate_answer(question, related_chunks)
71
+ st.subheader("📌 Answer:")
72
+ st.write(answer)
73
+