Spaces:
Build error
Build error
| app_code = ''' | |
| import os | |
| import streamlit as st | |
| import tempfile | |
| import fitz # PyMuPDF | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import numpy as np | |
| from groq import Groq | |
| # Get API Key securely from Hugging Face Secrets | |
| os.environ["GROQ_API_KEY"] = st.secrets["GROQ_API_KEY"] | |
| # Initialize Groq client | |
| client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| # Load sentence transformer model | |
| embed_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Text Chunking | |
| def chunk_text(text, chunk_size=500, overlap=100): | |
| chunks = [] | |
| start = 0 | |
| while start < len(text): | |
| end = start + chunk_size | |
| chunks.append(text[start:end]) | |
| start += chunk_size - overlap | |
| return chunks | |
| # PDF Text Extraction | |
| def extract_text_from_pdf(uploaded_file): | |
| text = "" | |
| with tempfile.NamedTemporaryFile(delete=False) as tmp_file: | |
| tmp_file.write(uploaded_file.read()) | |
| doc = fitz.open(tmp_file.name) | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| # Build FAISS index | |
| def build_faiss_index(chunks): | |
| embeddings = embed_model.encode(chunks) | |
| dim = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dim) | |
| index.add(np.array(embeddings)) | |
| return index, embeddings | |
| # Retrieve top-k chunks | |
| def retrieve_chunks(query, chunks, index, embeddings, top_k=3): | |
| query_embed = embed_model.encode([query]) | |
| D, I = index.search(np.array(query_embed), top_k) | |
| return [chunks[i] for i in I[0]] | |
| # Ask Groq with context | |
| def ask_groq(query, context): | |
| prompt = f"Context:\\n{context}\\n\\nQuestion: {query}" | |
| response = client.chat.completions.create( | |
| messages=[{"role": "user", "content": prompt}], | |
| model="llama-3-70b-8192", | |
| ) | |
| return response.choices[0].message.content | |
| # Streamlit App UI | |
| st.title("🧠 PDF Q&A with RAG (Groq + FAISS)") | |
| uploaded_file = st.file_uploader("Upload a PDF", type="pdf") | |
| query = st.text_input("Ask a question about the PDF") | |
| if uploaded_file: | |
| st.info("Processing PDF...") | |
| full_text = extract_text_from_pdf(uploaded_file) | |
| chunks = chunk_text(full_text) | |
| index, embeddings = build_faiss_index(chunks) | |
| st.success("PDF processed. Ask a question now.") | |
| if query: | |
| st.info("Retrieving relevant information...") | |
| top_chunks = retrieve_chunks(query, chunks, index, embeddings) | |
| context = "\\n\\n".join(top_chunks) | |
| response = ask_groq(query, context) | |
| st.markdown("### 💬 Answer") | |
| st.write(response) | |
| ''' | |
| with open("app.py", "w") as f: | |
| f.write(app_code) | |