Spaces:
Sleeping
Sleeping
| # app.py | |
| import os | |
| import faiss | |
| import streamlit as st | |
| from PyPDF2 import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| from groq import Groq | |
| import numpy as np | |
| # --- SETUP GROQ --- | |
| client = Groq(api_key=os.getenv("GROQ_API_KEY")) | |
| # --- LOAD EMBEDDING MODEL --- | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| # --- STREAMLIT UI --- | |
| st.set_page_config(page_title="RAG App with Groq", layout="wide") | |
| st.title("ππ§ RAG-Based Question Answering App") | |
| # --- UPLOAD PDF --- | |
| uploaded_file = st.file_uploader("Upload a PDF", type="pdf") | |
| if uploaded_file: | |
| reader = PdfReader(uploaded_file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| # --- CHUNKING --- | |
| def chunk_text(text, chunk_size=500): | |
| words = text.split() | |
| return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] | |
| chunks = chunk_text(text) | |
| # --- VECTORIZE CHUNKS --- | |
| embeddings = embedder.encode(chunks) | |
| dim = embeddings[0].shape[0] | |
| index = faiss.IndexFlatL2(dim) | |
| index.add(np.array(embeddings)) | |
| st.success("β Document uploaded and indexed!") | |
| # --- USER QUERY --- | |
| user_query = st.text_input("Ask something about the document") | |
| if user_query: | |
| query_embedding = embedder.encode([user_query]) | |
| _, I = index.search(np.array(query_embedding), k=3) | |
| retrieved_chunks = "\n\n".join([chunks[i] for i in I[0]]) | |
| # --- GROQ COMPLETION --- | |
| response = client.chat.completions.create( | |
| model="llama-3.1-8b-instant", | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant who answers based on the provided context."}, | |
| {"role": "user", "content": f"Context: {retrieved_chunks}\n\nQuestion: {user_query}"} | |
| ] | |
| ) | |
| st.subheader("π’ Answer") | |
| st.write(response.choices[0].message.content) | |