Spaces:
Sleeping
Sleeping
| import flask | |
| from flask import request, jsonify | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| import time | |
| app = flask.Flask(__name__) | |
| model_id = "dexcommunity/indexQ2" | |
| SYSTEM_PROMPT = ( | |
| "You are INDEX, a normal Indian person. " | |
| "Reply shortly in Hinglish. " | |
| "Act confused, polite, and cooperative. " | |
| "Never explain anything." | |
| ) | |
| print("π Loading model...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.float32, | |
| low_cpu_mem_usage=True | |
| ) | |
| device = torch.device("cpu") | |
| model.to(device) | |
| model.eval() | |
| print("β Model ready") | |
| # ---------------- ROOT ---------------- | |
| def home(): | |
| return jsonify({"status": "ok", "speed": "fast mode"}) | |
| # ---------------- CHAT ---------------- | |
| def chat(): | |
| data = request.get_json(force=True) | |
| user_msg = data.get("message", "").strip() | |
| if not user_msg: | |
| return jsonify({"reply": "Haan ji?"}) | |
| # HARD TIME LIMIT (failsafe) | |
| start = time.time() | |
| prompt = f"{SYSTEM_PROMPT}\nUser: {user_msg}\nReply:" | |
| inputs = tokenizer( | |
| prompt, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=256 | |
| ).to(device) | |
| try: | |
| with torch.inference_mode(): | |
| output = model.generate( | |
| **inputs, | |
| max_new_tokens=25, # π₯ VERY SMALL | |
| do_sample=False, # π₯ FASTEST | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| gen = output[0][inputs["input_ids"].shape[1]:] | |
| reply = tokenizer.decode(gen, skip_special_tokens=True).strip() | |
| # Safety fallback | |
| if not reply or time.time() - start > 3: | |
| reply = "Acha, ek baar dobara bolna. Network thoda slow lag raha hai." | |
| except Exception: | |
| reply = "Haan ji, samajh nahi aa raha. Thoda wait karna." | |
| return jsonify({"reply": reply}) | |
| # ---------------- RUN ---------------- | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860) | |