heerjtdev commited on
Commit
229e510
Β·
verified Β·
1 Parent(s): d5b0c87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -179,7 +179,6 @@
179
 
180
 
181
 
182
-
183
  import gradio as gr
184
  import fitz # PyMuPDF
185
  import torch
@@ -191,16 +190,19 @@ from langchain_community.vectorstores import FAISS
191
  from langchain_core.embeddings import Embeddings
192
 
193
  # --- ONNX & MODEL IMPORTS ---
194
- from transformers import AutoTokenizer, Pipeline
195
  from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM
 
196
 
197
  # ---------------------------------------------------------
198
  # 1. Custom ONNX Embedding Class (BGE-Large)
199
  # ---------------------------------------------------------
200
  class OnnxBgeEmbeddings(Embeddings):
201
- def __init__(self, model_name="BAAI/bge-large-en-v1.5", file_name="model.onnx"):
202
  print(f"πŸ”„ Loading Embeddings: {model_name}...")
203
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
204
  self.model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
205
 
206
  def _process_batch(self, texts):
@@ -218,26 +220,31 @@ class OnnxBgeEmbeddings(Embeddings):
218
  def embed_query(self, text):
219
  return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
220
 
221
- # ---------------------------------------------------------
222
- # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
223
- # ---------------------------------------------------------
224
  # ---------------------------------------------------------
225
  # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
226
  # ---------------------------------------------------------
227
  class LLMEvaluator:
228
  def __init__(self):
229
- # Using the ONNX Community version of Llama 3.2 1B
230
- self.model_id = "onnx-community/Llama-3.2-1B-Instruct"
231
- print(f"πŸ”„ Loading LLM: {self.model_id}...")
232
 
233
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
 
 
 
 
 
 
 
 
 
 
234
 
235
- # FIX: Removed 'decoder_file_name' argument
236
- # The library now automatically finds 'model.onnx' in the repo
237
  self.model = ORTModelForCausalLM.from_pretrained(
238
- self.model_id,
239
  use_cache=True,
240
- use_io_binding=False # Safe for CPU
241
  )
242
 
243
  def evaluate(self, context, question, student_answer):
@@ -286,6 +293,7 @@ class LLMEvaluator:
286
  skip_special_tokens=True
287
  )
288
  return response
 
289
  # ---------------------------------------------------------
290
  # 3. Main Application Logic
291
  # ---------------------------------------------------------
 
179
 
180
 
181
 
 
182
  import gradio as gr
183
  import fitz # PyMuPDF
184
  import torch
 
190
  from langchain_core.embeddings import Embeddings
191
 
192
  # --- ONNX & MODEL IMPORTS ---
193
+ from transformers import AutoTokenizer
194
  from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM
195
+ from huggingface_hub import snapshot_download
196
 
197
  # ---------------------------------------------------------
198
  # 1. Custom ONNX Embedding Class (BGE-Large)
199
  # ---------------------------------------------------------
200
  class OnnxBgeEmbeddings(Embeddings):
201
+ def __init__(self, model_name="BAAI/bge-large-en-v1.5"):
202
  print(f"πŸ”„ Loading Embeddings: {model_name}...")
203
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
204
+ # Note: export=True will re-convert on every restart.
205
+ # For production, you'd want to save this permanently, but this works for now.
206
  self.model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
207
 
208
  def _process_batch(self, texts):
 
220
  def embed_query(self, text):
221
  return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
222
 
 
 
 
223
  # ---------------------------------------------------------
224
  # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
225
  # ---------------------------------------------------------
226
  class LLMEvaluator:
227
  def __init__(self):
228
+ self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
229
+ print(f"πŸ”„ Preparing LLM: {self.repo_id}...")
 
230
 
231
+ # [CRITICAL FIX]
232
+ # Download model to a specific LOCAL directory to avoid cache symlink errors
233
+ print("πŸ“₯ Downloading model to local directory (this fixes the filesystem error)...")
234
+ local_model_path = snapshot_download(
235
+ repo_id=self.repo_id,
236
+ local_dir="onnx_llama_local", # Downloads to ./onnx_llama_local/
237
+ local_dir_use_symlinks=False # Forces real files, not symlinks
238
+ )
239
+ print("βœ… Download complete.")
240
+
241
+ self.tokenizer = AutoTokenizer.from_pretrained(local_model_path)
242
 
243
+ # Load from the local folder
 
244
  self.model = ORTModelForCausalLM.from_pretrained(
245
+ local_model_path,
246
  use_cache=True,
247
+ use_io_binding=False
248
  )
249
 
250
  def evaluate(self, context, question, student_answer):
 
293
  skip_special_tokens=True
294
  )
295
  return response
296
+
297
  # ---------------------------------------------------------
298
  # 3. Main Application Logic
299
  # ---------------------------------------------------------