heerjtdev commited on
Commit
d5b0c87
·
verified ·
1 Parent(s): 49671b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -13
app.py CHANGED
@@ -218,29 +218,32 @@ class OnnxBgeEmbeddings(Embeddings):
218
  def embed_query(self, text):
219
  return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
220
 
 
 
 
221
  # ---------------------------------------------------------
222
  # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
223
  # ---------------------------------------------------------
224
  class LLMEvaluator:
225
  def __init__(self):
226
- # Using the ONNX Community version of Llama 3.2 1B (Optimized for CPU)
227
  self.model_id = "onnx-community/Llama-3.2-1B-Instruct"
228
  print(f"🔄 Loading LLM: {self.model_id}...")
229
 
230
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
231
 
232
- # Load the ONNX model for text generation
 
233
  self.model = ORTModelForCausalLM.from_pretrained(
234
  self.model_id,
235
- decoder_file_name="model.onnx", # Standard ONNX filename
236
  use_cache=True,
237
- use_io_binding=False # Safer for CPU spaces
238
  )
239
 
240
  def evaluate(self, context, question, student_answer):
241
  # Prompt Engineering for Llama 3
242
  messages = [
243
- {"role": "system", "content": "You are a strict but helpful academic grader. You will be given a context, a question, and a student's answer. Your job is to grade the answer based ONLY on the provided context."},
244
  {"role": "user", "content": f"""
245
  ### CONTEXT:
246
  {context}
@@ -252,14 +255,19 @@ class LLMEvaluator:
252
  {student_answer}
253
 
254
  ### INSTRUCTIONS:
255
- 1. Determine if the student answer is correct based on the context.
256
- 2. Give a score out of 10.
257
- 3. Provide a brief explanation.
258
  """}
259
  ]
260
 
261
  # Format input using the chat template
262
- input_text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
 
 
 
263
  inputs = self.tokenizer(input_text, return_tensors="pt")
264
 
265
  # Generate response
@@ -267,15 +275,17 @@ class LLMEvaluator:
267
  outputs = self.model.generate(
268
  **inputs,
269
  max_new_tokens=256,
270
- temperature=0.3, # Low temp for factual grading
271
  do_sample=True,
272
  top_p=0.9
273
  )
274
 
275
- # Decode and strip the prompt
276
- response = self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
 
 
 
277
  return response
278
-
279
  # ---------------------------------------------------------
280
  # 3. Main Application Logic
281
  # ---------------------------------------------------------
 
218
  def embed_query(self, text):
219
  return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
220
 
221
+ # ---------------------------------------------------------
222
+ # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
223
+ # ---------------------------------------------------------
224
  # ---------------------------------------------------------
225
  # 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
226
  # ---------------------------------------------------------
227
  class LLMEvaluator:
228
  def __init__(self):
229
+ # Using the ONNX Community version of Llama 3.2 1B
230
  self.model_id = "onnx-community/Llama-3.2-1B-Instruct"
231
  print(f"🔄 Loading LLM: {self.model_id}...")
232
 
233
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
234
 
235
+ # FIX: Removed 'decoder_file_name' argument
236
+ # The library now automatically finds 'model.onnx' in the repo
237
  self.model = ORTModelForCausalLM.from_pretrained(
238
  self.model_id,
 
239
  use_cache=True,
240
+ use_io_binding=False # Safe for CPU
241
  )
242
 
243
  def evaluate(self, context, question, student_answer):
244
  # Prompt Engineering for Llama 3
245
  messages = [
246
+ {"role": "system", "content": "You are a helpful academic grader. Grade the student answer based ONLY on the provided context."},
247
  {"role": "user", "content": f"""
248
  ### CONTEXT:
249
  {context}
 
255
  {student_answer}
256
 
257
  ### INSTRUCTIONS:
258
+ 1. Is the answer correct?
259
+ 2. Score out of 10.
260
+ 3. Explanation.
261
  """}
262
  ]
263
 
264
  # Format input using the chat template
265
+ input_text = self.tokenizer.apply_chat_template(
266
+ messages,
267
+ tokenize=False,
268
+ add_generation_prompt=True
269
+ )
270
+
271
  inputs = self.tokenizer(input_text, return_tensors="pt")
272
 
273
  # Generate response
 
275
  outputs = self.model.generate(
276
  **inputs,
277
  max_new_tokens=256,
278
+ temperature=0.3,
279
  do_sample=True,
280
  top_p=0.9
281
  )
282
 
283
+ # Decode response
284
+ response = self.tokenizer.decode(
285
+ outputs[0][inputs.input_ids.shape[1]:],
286
+ skip_special_tokens=True
287
+ )
288
  return response
 
289
  # ---------------------------------------------------------
290
  # 3. Main Application Logic
291
  # ---------------------------------------------------------