Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| from groq import Groq | |
| from PyPDF2 import PdfReader | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.docstore.document import Document | |
| # Set your Groq API key directly (recommended for Hugging Face Spaces) | |
| GROQ_API_KEY = os.getenv("RAG_API") # Use your custom environment variable name | |
| # Initialize Groq client | |
| groq_client = Groq(api_key=GROQ_API_KEY) | |
| # Load embedding model | |
| embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| # Function to extract text from PDF | |
| def extract_text_from_pdf(uploaded_file): | |
| reader = PdfReader(uploaded_file) | |
| text = "" | |
| for page in reader.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text | |
| return text | |
| # Function to split text into chunks | |
| def chunk_text(text): | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| chunks = splitter.split_text(text) | |
| return [Document(page_content=chunk) for chunk in chunks] | |
| # Create FAISS vector index | |
| def create_faiss_index(documents): | |
| return FAISS.from_documents(documents, embedding_model) | |
| # Search similar chunks | |
| def search_faiss_index(query, index, k=3): | |
| return index.similarity_search(query, k=k) | |
| # Generate answer using Groq model | |
| def generate_answer(query, context_chunks): | |
| context = "\n".join([doc.page_content for doc in context_chunks]) | |
| prompt = f"""Answer the following question based on the context:\n\n{context}\n\nQuestion: {query}""" | |
| response = groq_client.chat.completions.create( | |
| messages=[{"role": "user", "content": prompt}], | |
| model="llama-3.1-8b-instant" # β Correct current model name on Groq | |
| ) | |
| return response.choices[0].message.content | |
| # Streamlit UI | |
| st.title("π RAG-based PDF QA App (Groq + FAISS)") | |
| uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) | |
| if uploaded_file: | |
| with st.spinner("Reading and processing document..."): | |
| raw_text = extract_text_from_pdf(uploaded_file) | |
| documents = chunk_text(raw_text) | |
| vector_index = create_faiss_index(documents) | |
| st.success("Document processed and indexed successfully!") | |
| question = st.text_input("Ask a question based on the uploaded document:") | |
| if question: | |
| with st.spinner("Searching and generating answer..."): | |
| related_chunks = search_faiss_index(question, vector_index) | |
| answer = generate_answer(question, related_chunks) | |
| st.subheader("π Answer:") | |
| st.write(answer) | |