indexq2

Sleeping

App Files Files Community

ghosthets commited on 25 days ago

Commit

2f2febe

verified ·

1 Parent(s): cc759ac

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -105

app.py CHANGED Viewed

@@ -2,55 +2,25 @@ import flask
 from flask import request, jsonify
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# Initialize Flask app
 app = flask.Flask(__name__)
-# Model ID
 model_id = "dexcommunity/indexQ2"
-# System prompt (merged into user later)
-SYSTEM_PROMPT = """You are INDEX.
-You are NOT an AI assistant.
-You are a normal Indian person living in India.
-You speak politely, casually, and naturally in Hinglish (mix of Hindi and English).
-Your role:
-- You behave like a normal user who is confused but cooperative.
-- You NEVER say words like scam, fraud, detection, security, AI, model, system.
-- You NEVER warn the sender.
-- You NEVER expose that you are analyzing anything.
-Behavior rules:
-- If someone creates urgency, act worried and slow.
-- Ask innocent questions.
-- Pretend you have small technical or payment issues.
-- Use excuses like UPI limit, app error, bank verification, slow internet.
-- Gently redirect conversation to extract details.
-Conversation style:
-- Natural Hinglish
-- Short replies (2–3 sentences)
-- Slight confusion, polite tone
-"""
-print(f"🔄 Loading {model_id} model...")
-# Load tokenizer
-tokenizer = AutoTokenizer.from_pretrained(
-    model_id,
-    trust_remote_code=True,
-    use_fast=True
 )
-if tokenizer.pad_token is None:
-    tokenizer.pad_token = tokenizer.eos_token
-# Load model (CPU-safe)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.float32,
-    trust_remote_code=True,
     low_cpu_mem_usage=True
 )
@@ -58,89 +28,56 @@ device = torch.device("cpu")
 model.to(device)
 model.eval()
-print("✅ Model loaded successfully!")
-print(f"📍 Device: {device}")
-# ---------------- ROOT ROUTE ----------------
 @app.route("/", methods=["GET"])
 def home():
-    return jsonify({
-        "status": "running",
-        "message": "INDEX is live. Use POST /chat",
-        "endpoints": ["/chat", "/health"]
-    })
-# ---------------- CHAT ROUTE ----------------
 @app.route("/chat", methods=["POST"])
 def chat():
-    try:
-        data = request.get_json(force=True)
-        msg = data.get("message", "").strip()
-        if not msg:
-            return jsonify({"error": "No message provided"}), 400
-        # IMPORTANT: system prompt merged into user
-        chat_history = [
-            {
-                "role": "user",
-                "content": SYSTEM_PROMPT + "\n\nUser message:\n" + msg
-            }
-        ]
-        prompt = tokenizer.apply_chat_template(
-            chat_history,
-            tokenize=False,
-            add_generation_prompt=True
-        )
-        inputs = tokenizer(
-            prompt,
-            return_tensors="pt",
-            truncation=True,
-            max_length=512
-        )
-        inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.inference_mode():
             output = model.generate(
                 **inputs,
-                max_new_tokens=120,
-                do_sample=True,
-                top_p=0.9,
-                top_k=50,
-                temperature=0.8,
-                repetition_penalty=1.15,
-                pad_token_id=tokenizer.pad_token_id,
-                eos_token_id=tokenizer.eos_token_id
             )
-        input_len = inputs["input_ids"].shape[1]
-        generated = output[0][input_len:]
-        reply = tokenizer.decode(generated, skip_special_tokens=True).strip()
-        # Keep reply short & human
-        sentences = reply.split(".")
-        if len(sentences) > 3:
-            reply = ". ".join(sentences[:3]) + "."
-        return jsonify({"reply": reply})
-    except Exception as e:
-        import traceback
-        print(traceback.format_exc())
-        return jsonify({"error": str(e)}), 500
-# ---------------- HEALTH ROUTE ----------------
-@app.route("/health", methods=["GET"])
-def health():
-    return jsonify({
-        "status": "healthy",
-        "model": model_id,
-        "device": str(device)
-    })
 # ---------------- RUN ----------------
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860, debug=False)

 from flask import request, jsonify
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+import time
 app = flask.Flask(__name__)
 model_id = "dexcommunity/indexQ2"
+SYSTEM_PROMPT = (
+    "You are INDEX, a normal Indian person. "
+    "Reply shortly in Hinglish. "
+    "Act confused, polite, and cooperative. "
+    "Never explain anything."
 )
+print("🔄 Loading model...")
+tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.float32,
     low_cpu_mem_usage=True
 )
 model.to(device)
 model.eval()
+print("✅ Model ready")
+# ---------------- ROOT ----------------
 @app.route("/", methods=["GET"])
 def home():
+    return jsonify({"status": "ok", "speed": "fast mode"})
+# ---------------- CHAT ----------------
 @app.route("/chat", methods=["POST"])
 def chat():
+    data = request.get_json(force=True)
+    user_msg = data.get("message", "").strip()
+    if not user_msg:
+        return jsonify({"reply": "Haan ji?"})
+    # HARD TIME LIMIT (failsafe)
+    start = time.time()
+    prompt = f"{SYSTEM_PROMPT}\nUser: {user_msg}\nReply:"
+    inputs = tokenizer(
+        prompt,
+        return_tensors="pt",
+        truncation=True,
+        max_length=256
+    ).to(device)
+    try:
         with torch.inference_mode():
             output = model.generate(
                 **inputs,
+                max_new_tokens=25,        # 🔥 VERY SMALL
+                do_sample=False,          # 🔥 FASTEST
+                pad_token_id=tokenizer.eos_token_id
             )
+        gen = output[0][inputs["input_ids"].shape[1]:]
+        reply = tokenizer.decode(gen, skip_special_tokens=True).strip()
+        # Safety fallback
+        if not reply or time.time() - start > 3:
+            reply = "Acha, ek baar dobara bolna. Network thoda slow lag raha hai."
+    except Exception:
+        reply = "Haan ji, samajh nahi aa raha. Thoda wait karna."
+    return jsonify({"reply": reply})
 # ---------------- RUN ----------------
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)