Spaces:
Sleeping
Sleeping
File size: 3,223 Bytes
43f0f9a 433f762 43f0f9a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | import os
import gradio as gr
import faiss
import numpy as np
from groq import Groq
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
# -----------------------
# Load models
# -----------------------
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
groq_client = Groq(
api_key=os.environ.get("GROQ_API_KEY")
# api_key=userdata.get('RAG')
)
# -----------------------
# Global storage
# -----------------------
faiss_index = None
document_chunks = []
# -----------------------
# Helper functions
# -----------------------
def read_pdf(file):
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
def chunk_text(text, chunk_size=500, overlap=50):
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
chunk = text[start:end]
chunks.append(chunk)
start = end - overlap
return chunks
def create_faiss_index(chunks):
global faiss_index, document_chunks
document_chunks = chunks
embeddings = embedding_model.encode(chunks)
embeddings = np.array(embeddings).astype("float32")
dimension = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)
faiss_index.add(embeddings)
def search_chunks(query, top_k=3):
query_embedding = embedding_model.encode([query])
query_embedding = np.array(query_embedding).astype("float32")
distances, indices = faiss_index.search(query_embedding, top_k)
results = []
for idx in indices[0]:
results.append(document_chunks[idx])
return results
def ask_groq(context, question):
prompt = f"""
Use the text below to answer the question.
Text:
{context}
Question:
{question}
"""
response = groq_client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{"role": "user", "content": prompt}
]
)
return response.choices[0].message.content
# -----------------------
# Gradio functions
# -----------------------
def upload_pdf(file):
if file is None:
return "Please upload a PDF first."
text = read_pdf(file)
chunks = chunk_text(text)
create_faiss_index(chunks)
return "PDF processed. You can now ask questions."
def answer_question(question):
if faiss_index is None:
return "Please upload a PDF first."
relevant_chunks = search_chunks(question)
context = "\n".join(relevant_chunks)
answer = ask_groq(context, question)
return answer
# -----------------------
# Gradio UI
# -----------------------
with gr.Blocks() as demo:
gr.Markdown("# PDF Question Answer App")
pdf_file = gr.File(label="Upload PDF")
upload_btn = gr.Button("Process PDF")
status = gr.Textbox(label="Status", lines=1)
question = gr.Textbox(
label="Your Question",
lines=2,
placeholder="Type your question here"
)
ask_btn = gr.Button("Ask Question")
answer = gr.Textbox(
label="Answer",
lines=10
)
upload_btn.click(upload_pdf, inputs=pdf_file, outputs=status)
ask_btn.click(answer_question, inputs=question, outputs=answer)
demo.launch() |