Punit1 commited on
Commit
e2d2e34
·
verified ·
1 Parent(s): 40b81d1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from sentence_transformers import SentenceTransformer
5
+ import faiss
6
+ import numpy as np
7
+ from pypdf import PdfReader
8
+
9
+ # Load embedding model
10
+ embed_model = SentenceTransformer("all-MiniLM-L6-v2")
11
+
12
+ # Load Phi-3-mini
13
+ model_name = "microsoft/Phi-3-mini-4k-instruct"
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ model = AutoModelForCausalLM.from_pretrained(model_name)
16
+
17
+ # Global storage
18
+ chunks = []
19
+ index = None
20
+
21
+ def process_pdf(pdf_file):
22
+ global chunks, index
23
+
24
+ reader = PdfReader(pdf_file)
25
+ text = ""
26
+ for page in reader.pages:
27
+ text += page.extract_text()
28
+
29
+ # Chunking
30
+ chunks = [text[i:i+500] for i in range(0, len(text), 500)]
31
+
32
+ embeddings = embed_model.encode(chunks)
33
+ dimension = embeddings.shape[1]
34
+
35
+ index = faiss.IndexFlatL2(dimension)
36
+ index.add(np.array(embeddings))
37
+
38
+ return "PDF processed successfully!"
39
+
40
+ def ask_question(query):
41
+ global chunks, index
42
+
43
+ query_embedding = embed_model.encode([query])
44
+ D, I = index.search(np.array(query_embedding), k=3)
45
+
46
+ context = "\n".join([chunks[i] for i in I[0]])
47
+
48
+ prompt = f"""
49
+ Use the context below to answer the question.
50
+
51
+ Context:
52
+ {context}
53
+
54
+ Question:
55
+ {query}
56
+
57
+ Answer:
58
+ """
59
+
60
+ inputs = tokenizer(prompt, return_tensors="pt")
61
+ outputs = model.generate(**inputs, max_new_tokens=200)
62
+
63
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
64
+ return response
65
+
66
+ with gr.Blocks() as demo:
67
+ gr.Markdown("# 📚 Minimal RAG with Phi-3-mini")
68
+
69
+ pdf_input = gr.File(label="Upload PDF")
70
+ upload_btn = gr.Button("Process PDF")
71
+ status = gr.Textbox()
72
+
73
+ question = gr.Textbox(label="Ask a question")
74
+ answer = gr.Textbox(label="Answer")
75
+
76
+ upload_btn.click(process_pdf, inputs=pdf_input, outputs=status)
77
+ question.submit(ask_question, inputs=question, outputs=answer)
78
+
79
+ demo.launch()