File size: 2,835 Bytes
698a416
 
 
 
 
 
 
 
 
3758b92
698a416
 
 
 
 
 
 
 
 
 
 
2b6093a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
698a416
 
 
 
 
 
 
 
 
 
2b6093a
698a416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b6093a
698a416
 
 
 
 
2b6093a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import gradio as gr
import faiss
import numpy as np
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq

# πŸ” Groq API Key (embed securely for private use only)
GROQ_API_KEY = "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"

# πŸ“¦ Load embedding model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# πŸ“‚ Global storage for vector index and chunks
chunks = []
index = None

# πŸ“„ PDF Text Extraction & Processing
def process_pdf(file):
    global chunks, index
    try:
        reader = PdfReader(file.name)
        text = "\n".join(page.extract_text() or "" for page in reader.pages)

        if not text.strip():
            return "❌ No text found in the PDF. Please upload a different file."

        # πŸ“ Chunking
        chunk_size = 300
        chunk_overlap = 50
        words = text.split()
        chunks = [
            " ".join(words[i:i + chunk_size])
            for i in range(0, len(words), chunk_size - chunk_overlap)
        ]

        # πŸ“Š Embeddings + FAISS
        embeddings = model.encode(chunks)
        dimension = embeddings.shape[1]
        index = faiss.IndexFlatL2(dimension)
        index.add(np.array(embeddings))

        return f"βœ… Processed {len(chunks)} chunks from uploaded PDF. You can now ask questions."
    except Exception as e:
        return f"❌ Error processing the PDF: {str(e)}"

# ❓ Ask a Question
def ask_question(query):
    if not chunks or index is None:
        return "⚠️ Please upload and process a PDF first."

    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding), k=3)
    context = "\n".join([chunks[i] for i in indices[0]])

    # Use Groq API for question answering
    client = Groq(api_key=GROQ_API_KEY)
    prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"

    try:
        response = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model="llama3-70b-8192"
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"❌ Error from Groq API: {str(e)}"

# πŸŽ›οΈ Gradio Interface
file_input = gr.File(label="πŸ“„ Upload PDF")
question_input = gr.Textbox(label="❓ Ask a Question about the PDF")
answer_output = gr.Textbox(label="πŸ“˜ Answer")

pdf_processor = gr.Interface(fn=process_pdf, inputs=file_input, outputs="text")
pdf_qa = gr.Interface(fn=ask_question, inputs=question_input, outputs=answer_output)

# Create a tabbed interface with "Upload PDF" and "Ask a Question" tabs
app = gr.TabbedInterface(
    [pdf_processor, pdf_qa],
    tab_names=["Upload PDF", "Ask a Question"]
)

# Launch the app
if __name__ == "__main__":
    app.launch()