Punit1 commited on
Commit
fd5c04a
·
verified ·
1 Parent(s): e6c2792

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -66
app.py CHANGED
@@ -1,31 +1,29 @@
1
  import gradio as gr
2
  import torch
3
- import time
4
- import logging
5
  import numpy as np
6
  import faiss
 
 
7
  from transformers import AutoTokenizer, AutoModelForCausalLM
8
  from sentence_transformers import SentenceTransformer
9
  from pypdf import PdfReader
10
 
11
- # ==============================
12
- # Logging Setup
13
- # ==============================
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
- # ==============================
18
  # Load Embedding Model
19
- # ==============================
20
- logger.info("Loading embedding model...")
21
  embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
22
 
23
- # ==============================
24
  # Load Phi-3 Mini (CPU Optimized)
25
- # ==============================
26
  model_name = "microsoft/Phi-3-mini-4k-instruct"
27
 
28
- logger.info("Loading Phi-3-mini model...")
29
  tokenizer = AutoTokenizer.from_pretrained(model_name)
30
 
31
  model = AutoModelForCausalLM.from_pretrained(
@@ -37,22 +35,20 @@ model = AutoModelForCausalLM.from_pretrained(
37
  model.to("cpu")
38
  model.eval()
39
 
40
- logger.info("Model loaded successfully.")
41
-
42
- # ==============================
43
  # Global Storage
44
- # ==============================
45
  chunks = []
46
  index = None
47
 
48
- # ==============================
49
- # PDF Processing
50
- # ==============================
51
- def process_pdf(pdf_file):
 
52
  global chunks, index
53
 
54
- logger.info("Processing PDF...")
55
- reader = PdfReader(pdf_file)
56
  text = ""
57
 
58
  for page in reader.pages:
@@ -60,49 +56,42 @@ def process_pdf(pdf_file):
60
  if content:
61
  text += content
62
 
63
- # Smaller chunks = faster generation
64
  chunk_size = 350
65
  chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
66
 
67
- logger.info(f"Total chunks created: {len(chunks)}")
68
-
69
  embeddings = embed_model.encode(chunks)
70
  dimension = embeddings.shape[1]
71
 
72
  index = faiss.IndexFlatL2(dimension)
73
  index.add(np.array(embeddings))
74
 
75
- logger.info("FAISS index built successfully.")
76
 
77
- return "✅ PDF processed successfully!"
78
 
79
- # ==============================
80
- # Smart RAG Question Answering
81
- # ==============================
82
- def ask_question(query):
83
  global chunks, index
84
 
85
  if index is None:
86
- return "Please upload and process a PDF first."
87
 
88
- start_total = time.time()
89
- logger.info("Received question.")
90
 
91
  # Embed query
92
- query_embedding = embed_model.encode([query])
93
-
94
- # Retrieve top 2 relevant chunks
95
  D, I = index.search(np.array(query_embedding), k=2)
96
 
97
  context = "\n".join([chunks[i] for i in I[0]])
98
 
99
- # Phi-3 Instruct Template (CRITICAL)
100
  prompt = f"""<|system|>
101
- You are an expert AI assistant.
102
- Answer clearly, accurately, and concisely.
103
- Use structured explanation when helpful.
104
  Avoid repeating the question.
105
- If answer not in context, say so.
106
  <|end|>
107
 
108
  <|user|>
@@ -110,7 +99,7 @@ Context:
110
  {context}
111
 
112
  Question:
113
- {query}
114
  <|end|>
115
 
116
  <|assistant|>
@@ -118,11 +107,7 @@ Question:
118
 
119
  inputs = tokenizer(prompt, return_tensors="pt")
120
 
121
- logger.info(f"Prompt token length: {len(inputs['input_ids'][0])}")
122
-
123
  with torch.no_grad():
124
- start_gen = time.time()
125
-
126
  outputs = model.generate(
127
  **inputs,
128
  max_new_tokens=120,
@@ -133,32 +118,45 @@ Question:
133
  use_cache=True
134
  )
135
 
136
- logger.info(f"Generation time: {time.time() - start_gen:.2f}s")
137
-
138
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
139
-
140
- # Remove prompt from response
141
  answer = response.split("<|assistant|>")[-1].strip()
142
 
143
- logger.info(f"Total response time: {time.time() - start_total:.2f}s")
144
 
145
  return answer
146
 
147
 
148
- # ==============================
149
- # Gradio UI
150
- # ==============================
151
- with gr.Blocks() as demo:
152
- gr.Markdown("# 📚 Optimized RAG with Phi-3-mini")
153
-
154
- pdf_input = gr.File(label="Upload PDF")
155
- upload_btn = gr.Button("Process PDF")
156
- status = gr.Textbox(label="Status")
157
-
158
- question = gr.Textbox(label="Ask a question")
159
- answer = gr.Textbox(label="Answer")
160
-
161
- upload_btn.click(process_pdf, inputs=pdf_input, outputs=status)
162
- question.submit(ask_question, inputs=question, outputs=answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  demo.launch()
 
1
  import gradio as gr
2
  import torch
 
 
3
  import numpy as np
4
  import faiss
5
+ import time
6
+ import logging
7
  from transformers import AutoTokenizer, AutoModelForCausalLM
8
  from sentence_transformers import SentenceTransformer
9
  from pypdf import PdfReader
10
 
11
+ # ==========================
12
+ # Logging
13
+ # ==========================
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
+ # ==========================
18
  # Load Embedding Model
19
+ # ==========================
 
20
  embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
21
 
22
+ # ==========================
23
  # Load Phi-3 Mini (CPU Optimized)
24
+ # ==========================
25
  model_name = "microsoft/Phi-3-mini-4k-instruct"
26
 
 
27
  tokenizer = AutoTokenizer.from_pretrained(model_name)
28
 
29
  model = AutoModelForCausalLM.from_pretrained(
 
35
  model.to("cpu")
36
  model.eval()
37
 
38
+ # ==========================
 
 
39
  # Global Storage
40
+ # ==========================
41
  chunks = []
42
  index = None
43
 
44
+
45
+ # ==========================
46
+ # Process PDF
47
+ # ==========================
48
+ def process_pdf(file):
49
  global chunks, index
50
 
51
+ reader = PdfReader(file)
 
52
  text = ""
53
 
54
  for page in reader.pages:
 
56
  if content:
57
  text += content
58
 
 
59
  chunk_size = 350
60
  chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
61
 
 
 
62
  embeddings = embed_model.encode(chunks)
63
  dimension = embeddings.shape[1]
64
 
65
  index = faiss.IndexFlatL2(dimension)
66
  index.add(np.array(embeddings))
67
 
68
+ return " PDF processed successfully. You can now start chatting."
69
 
 
70
 
71
+ # ==========================
72
+ # Chat Function (RAG + Phi-3)
73
+ # ==========================
74
+ def chat_fn(message, history):
75
  global chunks, index
76
 
77
  if index is None:
78
+ return "Please upload and process a PDF first."
79
 
80
+ start_time = time.time()
 
81
 
82
  # Embed query
83
+ query_embedding = embed_model.encode([message])
 
 
84
  D, I = index.search(np.array(query_embedding), k=2)
85
 
86
  context = "\n".join([chunks[i] for i in I[0]])
87
 
88
+ # Proper Phi-3 Instruct Template
89
  prompt = f"""<|system|>
90
+ You are a professional AI assistant.
91
+ Answer clearly, concisely and intelligently.
92
+ Use structured explanation if helpful.
93
  Avoid repeating the question.
94
+ If answer not found in context, say so.
95
  <|end|>
96
 
97
  <|user|>
 
99
  {context}
100
 
101
  Question:
102
+ {message}
103
  <|end|>
104
 
105
  <|assistant|>
 
107
 
108
  inputs = tokenizer(prompt, return_tensors="pt")
109
 
 
 
110
  with torch.no_grad():
 
 
111
  outputs = model.generate(
112
  **inputs,
113
  max_new_tokens=120,
 
118
  use_cache=True
119
  )
120
 
 
 
121
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
122
  answer = response.split("<|assistant|>")[-1].strip()
123
 
124
+ logger.info(f"Response time: {time.time() - start_time:.2f}s")
125
 
126
  return answer
127
 
128
 
129
+ # ==========================
130
+ # Beautiful Chat UI
131
+ # ==========================
132
+ with gr.Blocks(theme=gr.themes.Soft(), css="""
133
+ #chatbot {height: 600px}
134
+ """) as demo:
135
+
136
+ gr.Markdown(
137
+ """
138
+ # 🤖 Smart RAG Assistant
139
+ Powered by Phi-3 Mini + FAISS
140
+ Upload a PDF and start chatting like ChatGPT.
141
+ """
142
+ )
143
+
144
+ with gr.Row():
145
+ with gr.Column(scale=1):
146
+ pdf_file = gr.File(label="Upload PDF")
147
+ upload_btn = gr.Button("Process PDF")
148
+ status = gr.Markdown()
149
+
150
+ with gr.Column(scale=3):
151
+ chatbot = gr.ChatInterface(
152
+ fn=chat_fn,
153
+ chatbot=gr.Chatbot(elem_id="chatbot"),
154
+ textbox=gr.Textbox(placeholder="Ask something about the document...", container=False),
155
+ title="📘 Document Chat",
156
+ retry_btn="🔄 Retry",
157
+ clear_btn="🗑 Clear Chat"
158
+ )
159
+
160
+ upload_btn.click(process_pdf, inputs=pdf_file, outputs=status)
161
 
162
  demo.launch()