File size: 2,758 Bytes
e4f5822
 
11798ad
5a07078
2fd0797
e4f5822
 
 
5a07078
e4f5822
 
 
2fd0797
e4f5822
 
 
 
 
2fd0797
00f823c
 
 
 
 
2fd0797
5a07078
97f8372
e4f5822
d9c5c29
5a07078
 
d9c5c29
5a07078
 
 
e4f5822
5a07078
d9c5c29
5a07078
 
11798ad
5a07078
11798ad
 
 
5a07078
11798ad
 
 
 
 
 
 
 
1fb2c6f
 
11798ad
 
 
 
 
 
 
 
 
 
 
 
5a07078
11798ad
5a07078
11798ad
5a07078
11798ad
 
 
5a07078
11798ad
 
5a07078
11798ad
5a07078
11798ad
 
 
 
5a07078
e4f5822
11798ad
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gradio as gr
import os
import fitz  # PyMuPDF
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from groq import Groq

# βœ… Load Groq API key securely
groq_api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=groq_api_key)

# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

stored_chunks = []
stored_index = None

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def handle_pdf(file_path):
    global stored_chunks, stored_index

    try:
        # Read text
        text = extract_text_from_pdf(file_path)

        # Simple chunking by 500 words
        words = text.split()
        chunks = [' '.join(words[i:i+500]) for i in range(0, len(words), 500)]

        # Embed and build FAISS index
        embeddings = model.encode(chunks)
        index = faiss.IndexFlatL2(embeddings.shape[1])
        index.add(np.array(embeddings))

        # Store for later use
        stored_chunks = chunks
        stored_index = index

        return "βœ… PDF successfully processed. Ready for questions."
    except Exception as e:
        return f"❌ Error during PDF processing: {str(e)}"

def answer_query(query):
    if not stored_chunks or stored_index is None:
        return "❌ Please upload and process a PDF first."

    try:
        query_vec = model.encode(query).reshape(1, -1)
        D, I = stored_index.search(query_vec, k=3)
        top_chunks = [stored_chunks[i] for i in I[0]]

        context = "\n\n".join(top_chunks)
        prompt = f"""Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:"""

        response = client.chat.completions.create(
            model="llama3-8b-8192",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"❌ Error during answering: {str(e)}"

# 🧠 Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# πŸ“„ PDF Q&A using Groq + LLaMA3")

    with gr.Row():
        file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        process_output = gr.Textbox(label="Processing Status")
        process_button = gr.Button("πŸ“₯ Process PDF")

    process_button.click(fn=handle_pdf, inputs=[file_input], outputs=[process_output])

    gr.Markdown("## πŸ’¬ Ask a Question from the PDF")
    question_input = gr.Textbox(label="Your Question")
    ask_button = gr.Button("πŸ€– Ask")
    answer_output = gr.Textbox(label="Answer", lines=5)

    ask_button.click(fn=answer_query, inputs=[question_input], outputs=[answer_output])

demo.launch()