Spaces:
Sleeping
Sleeping
| import os | |
| import faiss | |
| import numpy as np | |
| import streamlit as st | |
| from PyPDF2 import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| from groq import Groq | |
| GROQ_API_KEY = os.environ['GROQ_API_KEY'] | |
| # Initialize Groq Client | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # Initialize embedder | |
| # embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
| embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
| # App UI | |
| st.set_page_config(page_title="π§ RAG Chat with PDF", layout="wide") | |
| st.title("π Chat with your PDF") | |
| # Function to read PDF | |
| def read_pdf(file): | |
| reader = PdfReader(file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # Function to chunk text | |
| def chunk_text(text, max_length=500): | |
| words = text.split() | |
| chunks = [' '.join(words[i:i + max_length]) for i in range(0, len(words), max_length)] | |
| return chunks | |
| # Function to embed and create FAISS index | |
| def create_faiss_index(chunks): | |
| embeddings = embedder.encode(chunks) | |
| dim = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dim) | |
| index.add(np.array(embeddings)) | |
| return index, embeddings | |
| # Function to search from index | |
| def search_index(index, query, chunks, top_k=3): | |
| query_embedding = embedder.encode([query]) | |
| D, I = index.search(np.array(query_embedding), top_k) | |
| return [chunks[i] for i in I[0]] | |
| # File uploader | |
| uploaded_file = st.file_uploader("Upload a PDF document", type="pdf") | |
| if uploaded_file: | |
| with st.spinner("π Reading and processing PDF..."): | |
| text = read_pdf(uploaded_file) | |
| chunks = chunk_text(text) | |
| index, embeddings = create_faiss_index(chunks) | |
| st.success("β PDF processed. You can now ask questions!") | |
| query = st.text_input("Ask a question from the PDF:") | |
| if query: | |
| with st.spinner("π Retrieving context..."): | |
| context_chunks = search_index(index, query, chunks, top_k=3) | |
| prompt = "\n".join(context_chunks) + f"\n\nQuestion: {query}" | |
| with st.spinner("π€ Getting answer from Groq..."): | |
| response = client.chat.completions.create( | |
| messages=[ | |
| {"role": "user", "content": prompt} | |
| ], | |
| model="llama-3.3-70b-versatile" | |
| ) | |
| answer = response.choices[0].message.content | |
| st.markdown(f"**Answer:** {answer}") |