heerjtdev commited on
Commit
76cfdea
·
verified ·
1 Parent(s): feb61bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -21
app.py CHANGED
@@ -478,34 +478,27 @@ class LLMEvaluator:
478
  )
479
 
480
  def evaluate(self, context, question, student_answer):
481
- # STRICT PROMPT (As requested)
482
- system_prompt = """You are a strict academic grader.
 
483
 
484
  RULES:
485
- 1. BASE YOUR SCORE ONLY ON THE CONTEXT PROVIDED.
486
- 2. If the student answer contradicts the context, give a score of 0-2.
487
- 3. If the context says 'A is B', and the student says 'A is C', the student is WRONG.
488
- 4. Be harsh. Do not give credit for vague or hallucinatory answers."""
489
 
490
  user_prompt = f"""
491
- ### SOURCE MATERIAL (CONTEXT):
492
  {context}
493
 
494
- ### EXAM QUESTION:
495
  {question}
496
 
497
  ### STUDENT ANSWER:
498
  {student_answer}
499
 
500
- ### INSTRUCTIONS:
501
- Compare the Student Answer to the Source Material.
502
- - Does the student explicitly mention the key points found in the text?
503
- - If the student describes something NOT in the text (e.g., "looking in" vs "looking out"), mark it wrong.
504
-
505
- OUTPUT FORMAT:
506
- Score: [0-10]
507
- Verdict: [Correct/Incorrect/Partially Correct]
508
- Explanation: [1-2 sentences explaining why, citing the text]
509
  """
510
 
511
  messages = [
@@ -521,13 +514,18 @@ class LLMEvaluator:
521
 
522
  inputs = self.tokenizer(input_text, return_tensors="pt")
523
 
 
524
  with torch.no_grad():
525
  outputs = self.model.generate(
526
  **inputs,
527
  max_new_tokens=200,
528
- temperature=0.1,
529
- do_sample=True,
530
- top_p=0.9
 
 
 
 
531
  )
532
 
533
  response = self.tokenizer.decode(
@@ -535,7 +533,6 @@ class LLMEvaluator:
535
  skip_special_tokens=True
536
  )
537
  return response
538
-
539
  # ---------------------------------------------------------
540
  # 3. Main Application Logic
541
  # ---------------------------------------------------------
 
478
  )
479
 
480
  def evaluate(self, context, question, student_answer):
481
+ # 3. STRICT PROMPT
482
+ system_prompt = """You are a strict academic grader.
483
+ Your goal is to check if the student's answer is supported by the context.
484
 
485
  RULES:
486
+ 1. If the answer contradicts the context, score it 0-3.
487
+ 2. If the answer describes things NOT in the text, mark it wrong.
488
+ 3. Be direct. Do not repeat yourself."""
 
489
 
490
  user_prompt = f"""
491
+ ### CONTEXT:
492
  {context}
493
 
494
+ ### QUESTION:
495
  {question}
496
 
497
  ### STUDENT ANSWER:
498
  {student_answer}
499
 
500
+ ### TASK:
501
+ Grade the answer (0-10) and verify if it matches the text provided.
 
 
 
 
 
 
 
502
  """
503
 
504
  messages = [
 
514
 
515
  inputs = self.tokenizer(input_text, return_tensors="pt")
516
 
517
+ # 4. FIXED GENERATION PARAMETERS
518
  with torch.no_grad():
519
  outputs = self.model.generate(
520
  **inputs,
521
  max_new_tokens=200,
522
+
523
+ # [CRITICAL FIXES]
524
+ do_sample=False, # Greedy Search (Faster, more deterministic)
525
+ repetition_penalty=1.2, # Kills the "####. ####." loops
526
+ min_length=10, # Forces it to write at least something
527
+
528
+ # Removed 'temperature' and 'top_p' because do_sample=False ignores them
529
  )
530
 
531
  response = self.tokenizer.decode(
 
533
  skip_special_tokens=True
534
  )
535
  return response
 
536
  # ---------------------------------------------------------
537
  # 3. Main Application Logic
538
  # ---------------------------------------------------------