Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -37,7 +37,6 @@ class OnnxBgeEmbeddings(Embeddings):
|
|
| 37 |
|
| 38 |
def _process_batch(self, texts):
|
| 39 |
inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
|
| 40 |
-
# On CPU, we don't need to manually move to device, but it's good practice
|
| 41 |
with torch.no_grad():
|
| 42 |
outputs = self.model(**inputs)
|
| 43 |
embeddings = outputs.last_hidden_state[:, 0]
|
|
@@ -51,7 +50,7 @@ class OnnxBgeEmbeddings(Embeddings):
|
|
| 51 |
return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
|
| 52 |
|
| 53 |
# ---------------------------------------------------------
|
| 54 |
-
# 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) -
|
| 55 |
# ---------------------------------------------------------
|
| 56 |
class LLMEvaluator:
|
| 57 |
def __init__(self):
|
|
@@ -71,7 +70,6 @@ class LLMEvaluator:
|
|
| 71 |
|
| 72 |
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
|
| 73 |
|
| 74 |
-
# CRITICAL: Disable Graph Optimizations to prevent crash
|
| 75 |
sess_options = SessionOptions()
|
| 76 |
sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
|
| 77 |
|
|
@@ -80,24 +78,42 @@ class LLMEvaluator:
|
|
| 80 |
subfolder="onnx",
|
| 81 |
file_name="model_fp16.onnx",
|
| 82 |
use_cache=True,
|
| 83 |
-
use_io_binding=False,
|
| 84 |
provider=PROVIDERS[0],
|
| 85 |
session_options=sess_options
|
| 86 |
)
|
| 87 |
|
| 88 |
def evaluate(self, context, question, student_answer, max_marks):
|
|
|
|
| 89 |
messages = [
|
| 90 |
-
{"role": "system", "content": "You are a
|
| 91 |
{"role": "user", "content": f"""
|
| 92 |
-
|
| 93 |
-
QUESTION: {question}
|
| 94 |
-
ANSWER: {student_answer}
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
"""}
|
| 102 |
]
|
| 103 |
|
|
@@ -107,12 +123,12 @@ class LLMEvaluator:
|
|
| 107 |
with torch.no_grad():
|
| 108 |
outputs = self.model.generate(
|
| 109 |
**inputs,
|
| 110 |
-
max_new_tokens=
|
| 111 |
-
temperature=0.
|
| 112 |
-
do_sample=False
|
|
|
|
| 113 |
)
|
| 114 |
|
| 115 |
-
# FIX: Access input_ids correctly
|
| 116 |
input_length = inputs['input_ids'].shape[1]
|
| 117 |
response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
|
| 118 |
return response
|
|
|
|
| 37 |
|
| 38 |
def _process_batch(self, texts):
|
| 39 |
inputs = self.tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
|
|
|
|
| 40 |
with torch.no_grad():
|
| 41 |
outputs = self.model(**inputs)
|
| 42 |
embeddings = outputs.last_hidden_state[:, 0]
|
|
|
|
| 50 |
return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
|
| 51 |
|
| 52 |
# ---------------------------------------------------------
|
| 53 |
+
# 2. OPTIMIZED LLM (Qwen 2.5 - 0.5B) - STRICT GRADING
|
| 54 |
# ---------------------------------------------------------
|
| 55 |
class LLMEvaluator:
|
| 56 |
def __init__(self):
|
|
|
|
| 70 |
|
| 71 |
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir)
|
| 72 |
|
|
|
|
| 73 |
sess_options = SessionOptions()
|
| 74 |
sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
|
| 75 |
|
|
|
|
| 78 |
subfolder="onnx",
|
| 79 |
file_name="model_fp16.onnx",
|
| 80 |
use_cache=True,
|
| 81 |
+
use_io_binding=False,
|
| 82 |
provider=PROVIDERS[0],
|
| 83 |
session_options=sess_options
|
| 84 |
)
|
| 85 |
|
| 86 |
def evaluate(self, context, question, student_answer, max_marks):
|
| 87 |
+
# OPTIMIZED PROMPT FOR SMALL MODELS (0.5B)
|
| 88 |
messages = [
|
| 89 |
+
{"role": "system", "content": "You are a strictest, literal academic grader. You ONLY grade based on the provided text. You DO NOT use outside knowledge."},
|
| 90 |
{"role": "user", "content": f"""
|
| 91 |
+
Task: Grade the student answer based ONLY on the Reference Text.
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
REFERENCE TEXT:
|
| 94 |
+
{context}
|
| 95 |
+
|
| 96 |
+
QUESTION:
|
| 97 |
+
{question}
|
| 98 |
+
|
| 99 |
+
STUDENT ANSWER:
|
| 100 |
+
{student_answer}
|
| 101 |
+
|
| 102 |
+
-----------------------------
|
| 103 |
+
GRADING LOGIC:
|
| 104 |
+
1. READ the Reference Text. What does it actually say about the Question?
|
| 105 |
+
2. COMPARE it to the Student Answer.
|
| 106 |
+
3 START with 0 marks and IF the answers line up to the reference text in a meaningful way, then add marks porportionally. ONLY GIVE MARKS FOR CORRECT STATEMENT STRICTLY BASED ON THE REFERENCE TEXT AND NOTHING ELSE IN THIS WORLD.
|
| 107 |
+
4. IF the Student Answer claims things not found in the text , he is incorrect and HALLUCINATING. Do not give marks for that statment/phrase
|
| 108 |
+
5. IF the Student Answer contradicts the text (e.g., Text says "hide personality" but Student says "show personality"), Do not give marks for that statment/phrase
|
| 109 |
+
|
| 110 |
+
VERDICT:
|
| 111 |
+
- If wrong: 0/{max_marks}
|
| 112 |
+
- If correct: {max_marks}/{max_marks}
|
| 113 |
+
|
| 114 |
+
OUTPUT FORMAT:
|
| 115 |
+
Score: [X]/{max_marks}
|
| 116 |
+
Feedback: [Brief explanation citing the text]
|
| 117 |
"""}
|
| 118 |
]
|
| 119 |
|
|
|
|
| 123 |
with torch.no_grad():
|
| 124 |
outputs = self.model.generate(
|
| 125 |
**inputs,
|
| 126 |
+
max_new_tokens=100,
|
| 127 |
+
temperature=0.05, # 0.0 = logic only, no creativity
|
| 128 |
+
do_sample=False,
|
| 129 |
+
repetition_penalty=1.2
|
| 130 |
)
|
| 131 |
|
|
|
|
| 132 |
input_length = inputs['input_ids'].shape[1]
|
| 133 |
response = self.tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
|
| 134 |
return response
|