File size: 2,677 Bytes
fad3f30
 
 
 
 
 
 
 
 
 
593fe27
 
fad3f30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import streamlit as st
import os
from groq import Groq
from PyPDF2 import PdfReader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

# Set your Groq API key directly (recommended for Hugging Face Spaces)

GROQ_API_KEY = os.getenv("RAG_API")  # Use your custom environment variable name

# Initialize Groq client
groq_client = Groq(api_key=GROQ_API_KEY)

# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Function to extract text from PDF
def extract_text_from_pdf(uploaded_file):
    reader = PdfReader(uploaded_file)
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text
    return text

# Function to split text into chunks
def chunk_text(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = splitter.split_text(text)
    return [Document(page_content=chunk) for chunk in chunks]

# Create FAISS vector index
def create_faiss_index(documents):
    return FAISS.from_documents(documents, embedding_model)

# Search similar chunks
def search_faiss_index(query, index, k=3):
    return index.similarity_search(query, k=k)

# Generate answer using Groq model
def generate_answer(query, context_chunks):
    context = "\n".join([doc.page_content for doc in context_chunks])
    prompt = f"""Answer the following question based on the context:\n\n{context}\n\nQuestion: {query}"""

    response = groq_client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model="llama-3.1-8b-instant"  # โœ… Correct current model name on Groq
    )
    return response.choices[0].message.content

# Streamlit UI
st.title("๐Ÿ“„ RAG-based PDF QA App (Groq + FAISS)")

uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

if uploaded_file:
    with st.spinner("Reading and processing document..."):
        raw_text = extract_text_from_pdf(uploaded_file)
        documents = chunk_text(raw_text)
        vector_index = create_faiss_index(documents)
        st.success("Document processed and indexed successfully!")

    question = st.text_input("Ask a question based on the uploaded document:")
    if question:
        with st.spinner("Searching and generating answer..."):
            related_chunks = search_faiss_index(question, vector_index)
            answer = generate_answer(question, related_chunks)
            st.subheader("๐Ÿ“Œ Answer:")
            st.write(answer)