Spaces:
Sleeping
Sleeping
File size: 2,835 Bytes
698a416 3758b92 698a416 2b6093a 698a416 2b6093a 698a416 2b6093a 698a416 2b6093a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import os
import gradio as gr
import faiss
import numpy as np
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq
# π Groq API Key (embed securely for private use only)
GROQ_API_KEY = "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"
# π¦ Load embedding model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# π Global storage for vector index and chunks
chunks = []
index = None
# π PDF Text Extraction & Processing
def process_pdf(file):
global chunks, index
try:
reader = PdfReader(file.name)
text = "\n".join(page.extract_text() or "" for page in reader.pages)
if not text.strip():
return "β No text found in the PDF. Please upload a different file."
# π Chunking
chunk_size = 300
chunk_overlap = 50
words = text.split()
chunks = [
" ".join(words[i:i + chunk_size])
for i in range(0, len(words), chunk_size - chunk_overlap)
]
# π Embeddings + FAISS
embeddings = model.encode(chunks)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))
return f"β
Processed {len(chunks)} chunks from uploaded PDF. You can now ask questions."
except Exception as e:
return f"β Error processing the PDF: {str(e)}"
# β Ask a Question
def ask_question(query):
if not chunks or index is None:
return "β οΈ Please upload and process a PDF first."
query_embedding = model.encode([query])
distances, indices = index.search(np.array(query_embedding), k=3)
context = "\n".join([chunks[i] for i in indices[0]])
# Use Groq API for question answering
client = Groq(api_key=GROQ_API_KEY)
prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"
try:
response = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-70b-8192"
)
return response.choices[0].message.content
except Exception as e:
return f"β Error from Groq API: {str(e)}"
# ποΈ Gradio Interface
file_input = gr.File(label="π Upload PDF")
question_input = gr.Textbox(label="β Ask a Question about the PDF")
answer_output = gr.Textbox(label="π Answer")
pdf_processor = gr.Interface(fn=process_pdf, inputs=file_input, outputs="text")
pdf_qa = gr.Interface(fn=ask_question, inputs=question_input, outputs=answer_output)
# Create a tabbed interface with "Upload PDF" and "Ask a Question" tabs
app = gr.TabbedInterface(
[pdf_processor, pdf_qa],
tab_names=["Upload PDF", "Ask a Question"]
)
# Launch the app
if __name__ == "__main__":
app.launch()
|