Punit1 commited on
Commit
e6c2792
·
verified ·
1 Parent(s): e2d2e34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -26
app.py CHANGED
@@ -1,33 +1,70 @@
1
  import gradio as gr
2
  import torch
 
 
 
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from sentence_transformers import SentenceTransformer
5
- import faiss
6
- import numpy as np
7
  from pypdf import PdfReader
8
 
9
- # Load embedding model
10
- embed_model = SentenceTransformer("all-MiniLM-L6-v2")
11
-
12
- # Load Phi-3-mini
 
 
 
 
 
 
 
 
 
 
 
13
  model_name = "microsoft/Phi-3-mini-4k-instruct"
 
 
14
  tokenizer = AutoTokenizer.from_pretrained(model_name)
15
- model = AutoModelForCausalLM.from_pretrained(model_name)
16
 
17
- # Global storage
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  chunks = []
19
  index = None
20
 
 
 
 
21
  def process_pdf(pdf_file):
22
  global chunks, index
23
-
 
24
  reader = PdfReader(pdf_file)
25
  text = ""
 
26
  for page in reader.pages:
27
- text += page.extract_text()
 
 
 
 
 
 
28
 
29
- # Chunking
30
- chunks = [text[i:i+500] for i in range(0, len(text), 500)]
31
 
32
  embeddings = embed_model.encode(chunks)
33
  dimension = embeddings.shape[1]
@@ -35,40 +72,88 @@ def process_pdf(pdf_file):
35
  index = faiss.IndexFlatL2(dimension)
36
  index.add(np.array(embeddings))
37
 
38
- return "PDF processed successfully!"
39
 
 
 
 
 
 
40
  def ask_question(query):
41
  global chunks, index
42
 
 
 
 
 
 
 
 
43
  query_embedding = embed_model.encode([query])
44
- D, I = index.search(np.array(query_embedding), k=3)
 
 
45
 
46
  context = "\n".join([chunks[i] for i in I[0]])
47
 
48
- prompt = f"""
49
- Use the context below to answer the question.
 
 
 
 
 
 
50
 
51
- Context:
52
- {context}
 
53
 
54
- Question:
55
- {query}
 
56
 
57
- Answer:
58
- """
59
 
60
  inputs = tokenizer(prompt, return_tensors="pt")
61
- outputs = model.generate(**inputs, max_new_tokens=200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
64
- return response
65
 
 
 
 
 
 
 
 
 
 
 
 
66
  with gr.Blocks() as demo:
67
- gr.Markdown("# 📚 Minimal RAG with Phi-3-mini")
68
 
69
  pdf_input = gr.File(label="Upload PDF")
70
  upload_btn = gr.Button("Process PDF")
71
- status = gr.Textbox()
72
 
73
  question = gr.Textbox(label="Ask a question")
74
  answer = gr.Textbox(label="Answer")
 
1
  import gradio as gr
2
  import torch
3
+ import time
4
+ import logging
5
+ import numpy as np
6
+ import faiss
7
  from transformers import AutoTokenizer, AutoModelForCausalLM
8
  from sentence_transformers import SentenceTransformer
 
 
9
  from pypdf import PdfReader
10
 
11
+ # ==============================
12
+ # Logging Setup
13
+ # ==============================
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # ==============================
18
+ # Load Embedding Model
19
+ # ==============================
20
+ logger.info("Loading embedding model...")
21
+ embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
22
+
23
+ # ==============================
24
+ # Load Phi-3 Mini (CPU Optimized)
25
+ # ==============================
26
  model_name = "microsoft/Phi-3-mini-4k-instruct"
27
+
28
+ logger.info("Loading Phi-3-mini model...")
29
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
30
 
31
+ model = AutoModelForCausalLM.from_pretrained(
32
+ model_name,
33
+ torch_dtype=torch.float32,
34
+ low_cpu_mem_usage=True
35
+ )
36
+
37
+ model.to("cpu")
38
+ model.eval()
39
+
40
+ logger.info("Model loaded successfully.")
41
+
42
+ # ==============================
43
+ # Global Storage
44
+ # ==============================
45
  chunks = []
46
  index = None
47
 
48
+ # ==============================
49
+ # PDF Processing
50
+ # ==============================
51
  def process_pdf(pdf_file):
52
  global chunks, index
53
+
54
+ logger.info("Processing PDF...")
55
  reader = PdfReader(pdf_file)
56
  text = ""
57
+
58
  for page in reader.pages:
59
+ content = page.extract_text()
60
+ if content:
61
+ text += content
62
+
63
+ # Smaller chunks = faster generation
64
+ chunk_size = 350
65
+ chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
66
 
67
+ logger.info(f"Total chunks created: {len(chunks)}")
 
68
 
69
  embeddings = embed_model.encode(chunks)
70
  dimension = embeddings.shape[1]
 
72
  index = faiss.IndexFlatL2(dimension)
73
  index.add(np.array(embeddings))
74
 
75
+ logger.info("FAISS index built successfully.")
76
 
77
+ return "✅ PDF processed successfully!"
78
+
79
+ # ==============================
80
+ # Smart RAG Question Answering
81
+ # ==============================
82
  def ask_question(query):
83
  global chunks, index
84
 
85
+ if index is None:
86
+ return "Please upload and process a PDF first."
87
+
88
+ start_total = time.time()
89
+ logger.info("Received question.")
90
+
91
+ # Embed query
92
  query_embedding = embed_model.encode([query])
93
+
94
+ # Retrieve top 2 relevant chunks
95
+ D, I = index.search(np.array(query_embedding), k=2)
96
 
97
  context = "\n".join([chunks[i] for i in I[0]])
98
 
99
+ # Phi-3 Instruct Template (CRITICAL)
100
+ prompt = f"""<|system|>
101
+ You are an expert AI assistant.
102
+ Answer clearly, accurately, and concisely.
103
+ Use structured explanation when helpful.
104
+ Avoid repeating the question.
105
+ If answer not in context, say so.
106
+ <|end|>
107
 
108
+ <|user|>
109
+ Context:
110
+ {context}
111
 
112
+ Question:
113
+ {query}
114
+ <|end|>
115
 
116
+ <|assistant|>
117
+ """
118
 
119
  inputs = tokenizer(prompt, return_tensors="pt")
120
+
121
+ logger.info(f"Prompt token length: {len(inputs['input_ids'][0])}")
122
+
123
+ with torch.no_grad():
124
+ start_gen = time.time()
125
+
126
+ outputs = model.generate(
127
+ **inputs,
128
+ max_new_tokens=120,
129
+ temperature=0.6,
130
+ top_p=0.9,
131
+ do_sample=True,
132
+ repetition_penalty=1.15,
133
+ use_cache=True
134
+ )
135
+
136
+ logger.info(f"Generation time: {time.time() - start_gen:.2f}s")
137
 
138
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
139
 
140
+ # Remove prompt from response
141
+ answer = response.split("<|assistant|>")[-1].strip()
142
+
143
+ logger.info(f"Total response time: {time.time() - start_total:.2f}s")
144
+
145
+ return answer
146
+
147
+
148
+ # ==============================
149
+ # Gradio UI
150
+ # ==============================
151
  with gr.Blocks() as demo:
152
+ gr.Markdown("# 📚 Optimized RAG with Phi-3-mini")
153
 
154
  pdf_input = gr.File(label="Upload PDF")
155
  upload_btn = gr.Button("Process PDF")
156
+ status = gr.Textbox(label="Status")
157
 
158
  question = gr.Textbox(label="Ask a question")
159
  answer = gr.Textbox(label="Answer")