Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -218,29 +218,32 @@ class OnnxBgeEmbeddings(Embeddings):
|
|
| 218 |
def embed_query(self, text):
|
| 219 |
return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
|
| 220 |
|
|
|
|
|
|
|
|
|
|
| 221 |
# ---------------------------------------------------------
|
| 222 |
# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
|
| 223 |
# ---------------------------------------------------------
|
| 224 |
class LLMEvaluator:
|
| 225 |
def __init__(self):
|
| 226 |
-
# Using the ONNX Community version of Llama 3.2 1B
|
| 227 |
self.model_id = "onnx-community/Llama-3.2-1B-Instruct"
|
| 228 |
print(f"🔄 Loading LLM: {self.model_id}...")
|
| 229 |
|
| 230 |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
| 231 |
|
| 232 |
-
#
|
|
|
|
| 233 |
self.model = ORTModelForCausalLM.from_pretrained(
|
| 234 |
self.model_id,
|
| 235 |
-
decoder_file_name="model.onnx", # Standard ONNX filename
|
| 236 |
use_cache=True,
|
| 237 |
-
use_io_binding=False #
|
| 238 |
)
|
| 239 |
|
| 240 |
def evaluate(self, context, question, student_answer):
|
| 241 |
# Prompt Engineering for Llama 3
|
| 242 |
messages = [
|
| 243 |
-
{"role": "system", "content": "You are a
|
| 244 |
{"role": "user", "content": f"""
|
| 245 |
### CONTEXT:
|
| 246 |
{context}
|
|
@@ -252,14 +255,19 @@ class LLMEvaluator:
|
|
| 252 |
{student_answer}
|
| 253 |
|
| 254 |
### INSTRUCTIONS:
|
| 255 |
-
1.
|
| 256 |
-
2.
|
| 257 |
-
3.
|
| 258 |
"""}
|
| 259 |
]
|
| 260 |
|
| 261 |
# Format input using the chat template
|
| 262 |
-
input_text = self.tokenizer.apply_chat_template(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
inputs = self.tokenizer(input_text, return_tensors="pt")
|
| 264 |
|
| 265 |
# Generate response
|
|
@@ -267,15 +275,17 @@ class LLMEvaluator:
|
|
| 267 |
outputs = self.model.generate(
|
| 268 |
**inputs,
|
| 269 |
max_new_tokens=256,
|
| 270 |
-
temperature=0.3,
|
| 271 |
do_sample=True,
|
| 272 |
top_p=0.9
|
| 273 |
)
|
| 274 |
|
| 275 |
-
# Decode
|
| 276 |
-
response = self.tokenizer.decode(
|
|
|
|
|
|
|
|
|
|
| 277 |
return response
|
| 278 |
-
|
| 279 |
# ---------------------------------------------------------
|
| 280 |
# 3. Main Application Logic
|
| 281 |
# ---------------------------------------------------------
|
|
|
|
| 218 |
def embed_query(self, text):
|
| 219 |
return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
|
| 220 |
|
| 221 |
+
# ---------------------------------------------------------
|
| 222 |
+
# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
|
| 223 |
+
# ---------------------------------------------------------
|
| 224 |
# ---------------------------------------------------------
|
| 225 |
# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
|
| 226 |
# ---------------------------------------------------------
|
| 227 |
class LLMEvaluator:
|
| 228 |
def __init__(self):
|
| 229 |
+
# Using the ONNX Community version of Llama 3.2 1B
|
| 230 |
self.model_id = "onnx-community/Llama-3.2-1B-Instruct"
|
| 231 |
print(f"🔄 Loading LLM: {self.model_id}...")
|
| 232 |
|
| 233 |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
| 234 |
|
| 235 |
+
# FIX: Removed 'decoder_file_name' argument
|
| 236 |
+
# The library now automatically finds 'model.onnx' in the repo
|
| 237 |
self.model = ORTModelForCausalLM.from_pretrained(
|
| 238 |
self.model_id,
|
|
|
|
| 239 |
use_cache=True,
|
| 240 |
+
use_io_binding=False # Safe for CPU
|
| 241 |
)
|
| 242 |
|
| 243 |
def evaluate(self, context, question, student_answer):
|
| 244 |
# Prompt Engineering for Llama 3
|
| 245 |
messages = [
|
| 246 |
+
{"role": "system", "content": "You are a helpful academic grader. Grade the student answer based ONLY on the provided context."},
|
| 247 |
{"role": "user", "content": f"""
|
| 248 |
### CONTEXT:
|
| 249 |
{context}
|
|
|
|
| 255 |
{student_answer}
|
| 256 |
|
| 257 |
### INSTRUCTIONS:
|
| 258 |
+
1. Is the answer correct?
|
| 259 |
+
2. Score out of 10.
|
| 260 |
+
3. Explanation.
|
| 261 |
"""}
|
| 262 |
]
|
| 263 |
|
| 264 |
# Format input using the chat template
|
| 265 |
+
input_text = self.tokenizer.apply_chat_template(
|
| 266 |
+
messages,
|
| 267 |
+
tokenize=False,
|
| 268 |
+
add_generation_prompt=True
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
inputs = self.tokenizer(input_text, return_tensors="pt")
|
| 272 |
|
| 273 |
# Generate response
|
|
|
|
| 275 |
outputs = self.model.generate(
|
| 276 |
**inputs,
|
| 277 |
max_new_tokens=256,
|
| 278 |
+
temperature=0.3,
|
| 279 |
do_sample=True,
|
| 280 |
top_p=0.9
|
| 281 |
)
|
| 282 |
|
| 283 |
+
# Decode response
|
| 284 |
+
response = self.tokenizer.decode(
|
| 285 |
+
outputs[0][inputs.input_ids.shape[1]:],
|
| 286 |
+
skip_special_tokens=True
|
| 287 |
+
)
|
| 288 |
return response
|
|
|
|
| 289 |
# ---------------------------------------------------------
|
| 290 |
# 3. Main Application Logic
|
| 291 |
# ---------------------------------------------------------
|