Spaces:

robworks-software
/

lessoncraft

Build error

Ryan Robson commited on Nov 10, 2025

Commit

8bd0b76

1 Parent(s): 4972096

Improve inference quality

- Add model.eval() for proper evaluation mode
- Simplify prompt format (remove complex conversation history)
- Add min_new_tokens=50 to force meaningful responses
- Add repetition_penalty=1.1 to reduce repetition
- Add top_k sampling for better quality
- Clean up response artifacts

Files changed (1) hide show

app.py +14 -13

app.py CHANGED Viewed

@@ -27,6 +27,7 @@ model = AutoModelForCausalLM.from_pretrained(
 print(f"🔧 Loading LoRA adapter: {ADAPTER_MODEL}...")
 model = PeftModel.from_pretrained(model, ADAPTER_MODEL)
 model = model.to(device)
 print("✅ Model loaded successfully!")
@@ -42,27 +43,23 @@ def chat(message, history):
         Generated response string
     """
-    # Build conversation history in Mistral format
-    prompt = ""
-    for user_msg, bot_msg in history:
-        prompt += f"[INST] {user_msg} [/INST] {bot_msg}</s> "
-    # Add current message with system instruction
-    system_message = "You are an expert educational AI assistant specializing in Texas Essential Knowledge and Skills (TEKS) standards. Provide accurate, detailed, and pedagogically sound information to help teachers and students."
-    prompt += f"[INST] {system_message}\n\n{message} [/INST]"
     # Tokenize
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    # Generate response
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=512,
-            temperature=0.7,
-            top_p=0.9,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,
         )
@@ -74,6 +71,10 @@ def chat(message, history):
     if "[/INST]" in response:
         response = response.split("[/INST]")[-1].strip()
     return response

 print(f"🔧 Loading LoRA adapter: {ADAPTER_MODEL}...")
 model = PeftModel.from_pretrained(model, ADAPTER_MODEL)
 model = model.to(device)
+model.eval()  # Set to evaluation mode
 print("✅ Model loaded successfully!")
         Generated response string
     """
+    # Simplified prompt - just the current message
+    prompt = f"[INST] You are a Texas TEKS educational expert. Answer this question clearly and helpfully:\n\n{message} [/INST]"
     # Tokenize
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    # Generate response with better parameters
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=300,
+            min_new_tokens=50,
+            temperature=0.8,
+            top_p=0.95,
+            top_k=50,
             do_sample=True,
+            repetition_penalty=1.1,
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,
         )
     if "[/INST]" in response:
         response = response.split("[/INST]")[-1].strip()
+    # Clean up any remaining artifacts
+    if response.startswith(message):
+        response = response[len(message):].strip()
     return response