Spaces:

Punit1
/

pdf-analyzer

Sleeping

App Files Files Community

Punit1 commited on 9 days ago

Commit

e2d2e34

verified ·

1 Parent(s): 40b81d1

Create app.py

Browse files

Files changed (1) hide show

app.py +79 -0

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+from pypdf import PdfReader
+# Load embedding model
+embed_model = SentenceTransformer("all-MiniLM-L6-v2")
+# Load Phi-3-mini
+model_name = "microsoft/Phi-3-mini-4k-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+# Global storage
+chunks = []
+index = None
+def process_pdf(pdf_file):
+    global chunks, index
+    reader = PdfReader(pdf_file)
+    text = ""
+    for page in reader.pages:
+        text += page.extract_text()
+    # Chunking
+    chunks = [text[i:i+500] for i in range(0, len(text), 500)]
+    embeddings = embed_model.encode(chunks)
+    dimension = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dimension)
+    index.add(np.array(embeddings))
+    return "PDF processed successfully!"
+def ask_question(query):
+    global chunks, index
+    query_embedding = embed_model.encode([query])
+    D, I = index.search(np.array(query_embedding), k=3)
+    context = "\n".join([chunks[i] for i in I[0]])
+    prompt = f"""
+    Use the context below to answer the question.
+    Context:
+    {context}
+    Question:
+    {query}
+    Answer:
+    """
+    inputs = tokenizer(prompt, return_tensors="pt")
+    outputs = model.generate(**inputs, max_new_tokens=200)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response
+with gr.Blocks() as demo:
+    gr.Markdown("# 📚 Minimal RAG with Phi-3-mini")
+    pdf_input = gr.File(label="Upload PDF")
+    upload_btn = gr.Button("Process PDF")
+    status = gr.Textbox()
+    question = gr.Textbox(label="Ask a question")
+    answer = gr.Textbox(label="Answer")
+    upload_btn.click(process_pdf, inputs=pdf_input, outputs=status)
+    question.submit(ask_question, inputs=question, outputs=answer)
+demo.launch()