indexq2

Sleeping

App Files Files Community

ghosthets commited on Jan 25

Commit

cc759ac

verified ·

1 Parent(s): c6011c6

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -24

app.py CHANGED Viewed

@@ -3,13 +3,13 @@ from flask import request, jsonify
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# Initialize the Flask application
 app = flask.Flask(__name__)
 # Model ID
 model_id = "dexcommunity/indexQ2"
-# System prompt
 SYSTEM_PROMPT = """You are INDEX.
 You are NOT an AI assistant.
 You are a normal Indian person living in India.
@@ -28,14 +28,6 @@ Behavior rules:
 - Use excuses like UPI limit, app error, bank verification, slow internet.
 - Gently redirect conversation to extract details.
-Extraction goals:
-- UPI IDs
-- Bank account numbers
-- IFSC codes
-- Phone numbers
-- Payment links
-- Any official-looking contact details
 Conversation style:
 - Natural Hinglish
 - Short replies (2–3 sentences)
@@ -54,7 +46,7 @@ tokenizer = AutoTokenizer.from_pretrained(
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
-# Load model (CPU safe)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.float32,
@@ -69,7 +61,7 @@ model.eval()
 print("✅ Model loaded successfully!")
 print(f"📍 Device: {device}")
-# ---------------- ROOT ROUTE (IMPORTANT FOR HF SPACES) ----------------
 @app.route("/", methods=["GET"])
 def home():
     return jsonify({
@@ -78,19 +70,22 @@ def home():
         "endpoints": ["/chat", "/health"]
     })
-# ---------------- CHAT ENDPOINT ----------------
 @app.route("/chat", methods=["POST"])
 def chat():
     try:
-        data = request.get_json()
-        msg = data.get("message", "")
         if not msg:
             return jsonify({"error": "No message provided"}), 400
         chat_history = [
-            {"role": "system", "content": SYSTEM_PROMPT},
-            {"role": "user", "content": msg}
         ]
         prompt = tokenizer.apply_chat_template(
@@ -102,7 +97,6 @@ def chat():
         inputs = tokenizer(
             prompt,
             return_tensors="pt",
-            padding=True,
             truncation=True,
             max_length=512
         )
@@ -126,10 +120,10 @@ def chat():
         generated = output[0][input_len:]
         reply = tokenizer.decode(generated, skip_special_tokens=True).strip()
-        # Limit length (human-like)
-        parts = reply.split(".")
-        if len(parts) > 3:
-            reply = ". ".join(parts[:3]) + "."
         return jsonify({"reply": reply})
@@ -138,7 +132,7 @@ def chat():
         print(traceback.format_exc())
         return jsonify({"error": str(e)}), 500
-# ---------------- HEALTH CHECK ----------------
 @app.route("/health", methods=["GET"])
 def health():
     return jsonify({
@@ -147,6 +141,6 @@ def health():
         "device": str(device)
     })
-# ---------------- RUN APP ----------------
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860, debug=False)

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+# Initialize Flask app
 app = flask.Flask(__name__)
 # Model ID
 model_id = "dexcommunity/indexQ2"
+# System prompt (merged into user later)
 SYSTEM_PROMPT = """You are INDEX.
 You are NOT an AI assistant.
 You are a normal Indian person living in India.
 - Use excuses like UPI limit, app error, bank verification, slow internet.
 - Gently redirect conversation to extract details.
 Conversation style:
 - Natural Hinglish
 - Short replies (2–3 sentences)
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
+# Load model (CPU-safe)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.float32,
 print("✅ Model loaded successfully!")
 print(f"📍 Device: {device}")
+# ---------------- ROOT ROUTE ----------------
 @app.route("/", methods=["GET"])
 def home():
     return jsonify({
         "endpoints": ["/chat", "/health"]
     })
+# ---------------- CHAT ROUTE ----------------
 @app.route("/chat", methods=["POST"])
 def chat():
     try:
+        data = request.get_json(force=True)
+        msg = data.get("message", "").strip()
         if not msg:
             return jsonify({"error": "No message provided"}), 400
+        # IMPORTANT: system prompt merged into user
         chat_history = [
+            {
+                "role": "user",
+                "content": SYSTEM_PROMPT + "\n\nUser message:\n" + msg
+            }
         ]
         prompt = tokenizer.apply_chat_template(
         inputs = tokenizer(
             prompt,
             return_tensors="pt",
             truncation=True,
             max_length=512
         )
         generated = output[0][input_len:]
         reply = tokenizer.decode(generated, skip_special_tokens=True).strip()
+        # Keep reply short & human
+        sentences = reply.split(".")
+        if len(sentences) > 3:
+            reply = ". ".join(sentences[:3]) + "."
         return jsonify({"reply": reply})
         print(traceback.format_exc())
         return jsonify({"error": str(e)}), 500
+# ---------------- HEALTH ROUTE ----------------
 @app.route("/health", methods=["GET"])
 def health():
     return jsonify({
         "device": str(device)
     })
+# ---------------- RUN ----------------
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860, debug=False)