Spaces:
Sleeping
Sleeping
File size: 2,758 Bytes
e4f5822 11798ad 5a07078 2fd0797 e4f5822 5a07078 e4f5822 2fd0797 e4f5822 2fd0797 00f823c 2fd0797 5a07078 97f8372 e4f5822 d9c5c29 5a07078 d9c5c29 5a07078 e4f5822 5a07078 d9c5c29 5a07078 11798ad 5a07078 11798ad 5a07078 11798ad 1fb2c6f 11798ad 5a07078 11798ad 5a07078 11798ad 5a07078 11798ad 5a07078 11798ad 5a07078 11798ad 5a07078 11798ad 5a07078 e4f5822 11798ad | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | import gradio as gr
import os
import fitz # PyMuPDF
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from groq import Groq
# β
Load Groq API key securely
groq_api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=groq_api_key)
# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')
stored_chunks = []
stored_index = None
def extract_text_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
text = ""
for page in doc:
text += page.get_text()
return text
def handle_pdf(file_path):
global stored_chunks, stored_index
try:
# Read text
text = extract_text_from_pdf(file_path)
# Simple chunking by 500 words
words = text.split()
chunks = [' '.join(words[i:i+500]) for i in range(0, len(words), 500)]
# Embed and build FAISS index
embeddings = model.encode(chunks)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))
# Store for later use
stored_chunks = chunks
stored_index = index
return "β
PDF successfully processed. Ready for questions."
except Exception as e:
return f"β Error during PDF processing: {str(e)}"
def answer_query(query):
if not stored_chunks or stored_index is None:
return "β Please upload and process a PDF first."
try:
query_vec = model.encode(query).reshape(1, -1)
D, I = stored_index.search(query_vec, k=3)
top_chunks = [stored_chunks[i] for i in I[0]]
context = "\n\n".join(top_chunks)
prompt = f"""Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:"""
response = client.chat.completions.create(
model="llama3-8b-8192",
messages=[{"role": "user", "content": prompt}],
temperature=0.2
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"β Error during answering: {str(e)}"
# π§ Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# π PDF Q&A using Groq + LLaMA3")
with gr.Row():
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
process_output = gr.Textbox(label="Processing Status")
process_button = gr.Button("π₯ Process PDF")
process_button.click(fn=handle_pdf, inputs=[file_input], outputs=[process_output])
gr.Markdown("## π¬ Ask a Question from the PDF")
question_input = gr.Textbox(label="Your Question")
ask_button = gr.Button("π€ Ask")
answer_output = gr.Textbox(label="Answer", lines=5)
ask_button.click(fn=answer_query, inputs=[question_input], outputs=[answer_output])
demo.launch()
|