Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import PyPDF2 | |
| import tiktoken | |
| import faiss | |
| import numpy as np | |
| import os | |
| from sentence_transformers import SentenceTransformer | |
| import requests | |
| # Load embedding model | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # GROQ API configuration (π loaded securely from environment variable) | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| GROQ_URL = "https://api.groq.com/openai/v1/chat/completions" | |
| LLAMA3_MODEL = "llama3-8b-8192" | |
| # Extract text from PDF | |
| def load_pdf(pdf_file): | |
| reader = PyPDF2.PdfReader(pdf_file) | |
| return "".join(page.extract_text() for page in reader.pages) | |
| # Chunk text | |
| def chunk_text(text, chunk_size=500): | |
| words = text.split() | |
| return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] | |
| # Generate embeddings | |
| def get_embeddings(chunks): | |
| return embedding_model.encode(chunks) | |
| # Create FAISS index | |
| def create_faiss_index(embeddings): | |
| index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| index.add(np.array(embeddings)) | |
| return index | |
| # Search index | |
| def search_index(index, query, chunks, top_k=3): | |
| q_embed = embedding_model.encode([query]) | |
| _, indices = index.search(np.array(q_embed), top_k) | |
| return [chunks[i] for i in indices[0]] | |
| # Generate answer using GROQ | |
| def generate_answer(prompt): | |
| if not GROQ_API_KEY: | |
| return "π« GROQ API key not found. Please set it in environment variables." | |
| headers = { | |
| "Authorization": f"Bearer {GROQ_API_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| data = { | |
| "model": LLAMA3_MODEL, | |
| "messages": [ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| } | |
| response = requests.post(GROQ_URL, headers=headers, json=data) | |
| response.raise_for_status() | |
| return response.json()["choices"][0]["message"]["content"] | |
| # Streamlit UI | |
| def main(): | |
| st.set_page_config("RAG App", layout="centered") | |
| st.title("π PDF QA App with LLaMA 3 & GROQ") | |
| uploaded_file = st.file_uploader("Upload a PDF", type="pdf") | |
| if uploaded_file and st.button("Process PDF"): | |
| with st.spinner("Processing..."): | |
| text = load_pdf(uploaded_file) | |
| chunks = chunk_text(text) | |
| embeddings = get_embeddings(chunks) | |
| index = create_faiss_index(embeddings) | |
| st.session_state.chunks = chunks | |
| st.session_state.index = index | |
| st.success("β PDF processed and indexed.") | |
| if "index" in st.session_state: | |
| query = st.text_input("Ask a question about the PDF:") | |
| if st.button("Get Answer"): | |
| with st.spinner("Thinking..."): | |
| top_chunks = search_index(st.session_state.index, query, st.session_state.chunks) | |
| context = "\n\n".join(top_chunks) | |
| prompt = f"Use the following context to answer the question:\n\n{context}\n\nQuestion: {query}" | |
| try: | |
| answer = generate_answer(prompt) | |
| st.markdown("### π§ Answer:") | |
| st.write(answer) | |
| except requests.exceptions.HTTPError as e: | |
| st.error(f"β API Error: {e}") | |
| if __name__ == "__main__": | |
| main() | |