Sandesh2027
/

MathTutor-7B-H_v0.1

+---
+library_name: transformers
+tags: []
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model: MathTutor RL version (Lambda = 1.0) (no Think) (HARD)
+## Usage:
+```
+import torch
+import json
+from huggingface_hub import hf_hub_download
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
+# --- Configuration ---
+model_weights_id = "Sandesh-Zenteiq/MathTutor-7B-H_v0.1"
+tokenizer_id = "Qwen/Qwen2.5-7B-Instruct"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Loading weights from: {model_weights_id}")
+print(f"Loading tokenizer from: {tokenizer_id}")
+print(f"Using device: {device}")
+# --- Loading Logic ---
+print("\nLoading model config...")
+config = AutoConfig.from_pretrained(model_weights_id, trust_remote_code=True)
+print("\nLoading tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, trust_remote_code=True)
+print("Loading model weights...")
+model = AutoModelForCausalLM.from_pretrained(
+    model_weights_id,
+    config=config,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    trust_remote_code=True
+)
+print("Model loaded successfully!")
+# --- Interactive Socratic Chat Loop ---
+conversation_history = [
+    {"role": "system", "content": "You are a Socratic teacher. Guide the student to solve the problem by asking heuristic questions. Do not give direct answers or calculations. Ask one question at a time."},
+    {"role": "user", "content": "YOUR QUESTION HERE"}
+]
+print("\n--- Starting Interactive Socratic Session ---")
+print("You are the student. The model is the teacher.")
+print("Type 'quit' or 'exit' to end the conversation.\n")
+# Generate the very first response from the teacher
+prompt_parts = []
+for message in conversation_history:
+    prompt_parts.append(f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>")
+# Signal to the model that it's its turn to generate
+prompt_parts.append("<|im_start|>assistant")
+manual_prompt = "\n".join(prompt_parts)
+inputs = tokenizer(manual_prompt, return_tensors="pt").to(model.device)
+outputs = model.generate(**inputs, max_new_tokens=1000, temperature=0.7, do_sample=True)
+initial_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
+# Extract only the assistant's part of the response
+teacher_response_text = initial_response.split('<|im_start|>assistant')[1].replace('<|im_end|>', '').strip()
+print(f"Teacher: {teacher_response_text}")
+conversation_history.append({"role": "assistant", "content": teacher_response_text})
+# Now start the interactive loop for back-and-forth
+while True:
+    student_input = input("Student: ")
+    if student_input.lower() in ["quit", "exit"]:
+        print("--- Session Ended ---")
+        break
+    # Add the user's new message to the history
+    conversation_history.append({"role": "user", "content": student_input})
+    # --- Manually build the prompt with the FULL history ---
+    prompt_parts = []
+    for message in conversation_history:
+        prompt_parts.append(f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>")
+    prompt_parts.append("<|im_start|>assistant")
+    manual_prompt = "\n".join(prompt_parts)
+    # Generate the next response based on the full history
+    inputs = tokenizer(manual_prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(**inputs, max_new_tokens=1000, temperature=0.7, do_sample=True)
+    full_generation = tokenizer.decode(outputs[0], skip_special_tokens=False)
+    # Cleanly extract only the *newest* assistant response
+    try:
+        new_response_part = full_generation.split(manual_prompt)[1]
+        teacher_response_text = new_response_part.replace('<|im_end|>', '').strip()
+    except IndexError:
+        # Fallback if splitting fails
+        teacher_response_text = "I'm sorry, I seem to have lost my train of thought. Could you please repeat your question?"
+    print(f"\nTeacher: {teacher_response_text}")
+    # Add the model's new response to the history for the next turn
+    conversation_history.append({"role": "assistant", "content": teacher_response_text})
+```