Spaces:
Sleeping
Sleeping
File size: 2,161 Bytes
9a2dfaa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | import os
import PyPDF2
import faiss
import streamlit as st
from sentence_transformers import SentenceTransformer
from groq import Groq
# Set up Groq client
client = Groq(api_key="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")
# Initialize model and FAISS index
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
index = faiss.IndexFlatL2(384) # Adjust dimension to match the embedding size
# PDF text extraction
def extract_text_from_pdf(pdf_file):
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Text chunking
def chunk_text(text, chunk_size=500):
return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
# Embed and store in FAISS
def embed_and_store(chunks):
embeddings = embedding_model.encode(chunks)
index.add(embeddings)
return embeddings
# Retrieve relevant chunks
def retrieve_chunks(query, top_k=5):
query_embedding = embedding_model.encode([query])
distances, indices = index.search(query_embedding, top_k)
return indices
# Query Groq API
def query_groq(prompt):
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-8b-8192"
)
return chat_completion.choices[0].message.content
# Streamlit UI
def main():
st.title("RAG-based PDF QA System")
st.sidebar.header("Upload and Interact")
uploaded_file = st.sidebar.file_uploader("Upload a PDF", type=["pdf"])
if uploaded_file:
st.sidebar.success("PDF Uploaded Successfully!")
text = extract_text_from_pdf(uploaded_file)
chunks = chunk_text(text)
embed_and_store(chunks)
st.write("PDF content has been processed and stored.")
query = st.text_input("Enter your question:")
if query:
indices = retrieve_chunks(query)
relevant_chunks = [chunks[i] for i in indices[0]]
prompt = " ".join(relevant_chunks) + f"\n\nQuestion: {query}"
answer = query_groq(prompt)
st.write("### Answer:")
st.write(answer)
if __name__ == "__main__":
main()
|