Spaces:
Sleeping
Sleeping
| from flask import Flask, request, jsonify | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| app = Flask(__name__) | |
| MODEL_ID = "newtechdevng/math-tutor-smollm2-360M" | |
| SYSTEM_PROMPT = "You are a helpful math assistant." | |
| print("Loading model...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| dtype=torch.float32, | |
| device_map="auto" | |
| ) | |
| model.eval() | |
| print("✅ Model ready!") | |
| def home(): | |
| return jsonify({"status": "ok", "message": "Math model API is running!"}) | |
| def generate(): | |
| data = request.get_json() | |
| if not data or "question" not in data: | |
| return jsonify({"error": "Send JSON with 'question' key"}), 400 | |
| question = data["question"].strip() | |
| max_new_tokens = data.get("max_new_tokens", 256) | |
| prompt = f"""<|im_start|>system | |
| {SYSTEM_PROMPT}<|im_end|> | |
| <|im_start|>user | |
| {question}<|im_end|> | |
| <|im_start|>assistant | |
| """ | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| do_sample=False, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| new_tokens = outputs[0][inputs["input_ids"].shape[1]:] | |
| answer = tokenizer.decode(new_tokens, skip_special_tokens=True) | |
| return jsonify({"question": question, "answer": answer}) | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860) |