ghosthets commited on
Commit
cc759ac
·
verified ·
1 Parent(s): c6011c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -24
app.py CHANGED
@@ -3,13 +3,13 @@ from flask import request, jsonify
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
6
- # Initialize the Flask application
7
  app = flask.Flask(__name__)
8
 
9
  # Model ID
10
  model_id = "dexcommunity/indexQ2"
11
 
12
- # System prompt
13
  SYSTEM_PROMPT = """You are INDEX.
14
  You are NOT an AI assistant.
15
  You are a normal Indian person living in India.
@@ -28,14 +28,6 @@ Behavior rules:
28
  - Use excuses like UPI limit, app error, bank verification, slow internet.
29
  - Gently redirect conversation to extract details.
30
 
31
- Extraction goals:
32
- - UPI IDs
33
- - Bank account numbers
34
- - IFSC codes
35
- - Phone numbers
36
- - Payment links
37
- - Any official-looking contact details
38
-
39
  Conversation style:
40
  - Natural Hinglish
41
  - Short replies (2–3 sentences)
@@ -54,7 +46,7 @@ tokenizer = AutoTokenizer.from_pretrained(
54
  if tokenizer.pad_token is None:
55
  tokenizer.pad_token = tokenizer.eos_token
56
 
57
- # Load model (CPU safe)
58
  model = AutoModelForCausalLM.from_pretrained(
59
  model_id,
60
  torch_dtype=torch.float32,
@@ -69,7 +61,7 @@ model.eval()
69
  print("✅ Model loaded successfully!")
70
  print(f"📍 Device: {device}")
71
 
72
- # ---------------- ROOT ROUTE (IMPORTANT FOR HF SPACES) ----------------
73
  @app.route("/", methods=["GET"])
74
  def home():
75
  return jsonify({
@@ -78,19 +70,22 @@ def home():
78
  "endpoints": ["/chat", "/health"]
79
  })
80
 
81
- # ---------------- CHAT ENDPOINT ----------------
82
  @app.route("/chat", methods=["POST"])
83
  def chat():
84
  try:
85
- data = request.get_json()
86
- msg = data.get("message", "")
87
 
88
  if not msg:
89
  return jsonify({"error": "No message provided"}), 400
90
 
 
91
  chat_history = [
92
- {"role": "system", "content": SYSTEM_PROMPT},
93
- {"role": "user", "content": msg}
 
 
94
  ]
95
 
96
  prompt = tokenizer.apply_chat_template(
@@ -102,7 +97,6 @@ def chat():
102
  inputs = tokenizer(
103
  prompt,
104
  return_tensors="pt",
105
- padding=True,
106
  truncation=True,
107
  max_length=512
108
  )
@@ -126,10 +120,10 @@ def chat():
126
  generated = output[0][input_len:]
127
  reply = tokenizer.decode(generated, skip_special_tokens=True).strip()
128
 
129
- # Limit length (human-like)
130
- parts = reply.split(".")
131
- if len(parts) > 3:
132
- reply = ". ".join(parts[:3]) + "."
133
 
134
  return jsonify({"reply": reply})
135
 
@@ -138,7 +132,7 @@ def chat():
138
  print(traceback.format_exc())
139
  return jsonify({"error": str(e)}), 500
140
 
141
- # ---------------- HEALTH CHECK ----------------
142
  @app.route("/health", methods=["GET"])
143
  def health():
144
  return jsonify({
@@ -147,6 +141,6 @@ def health():
147
  "device": str(device)
148
  })
149
 
150
- # ---------------- RUN APP ----------------
151
  if __name__ == "__main__":
152
  app.run(host="0.0.0.0", port=7860, debug=False)
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
6
+ # Initialize Flask app
7
  app = flask.Flask(__name__)
8
 
9
  # Model ID
10
  model_id = "dexcommunity/indexQ2"
11
 
12
+ # System prompt (merged into user later)
13
  SYSTEM_PROMPT = """You are INDEX.
14
  You are NOT an AI assistant.
15
  You are a normal Indian person living in India.
 
28
  - Use excuses like UPI limit, app error, bank verification, slow internet.
29
  - Gently redirect conversation to extract details.
30
 
 
 
 
 
 
 
 
 
31
  Conversation style:
32
  - Natural Hinglish
33
  - Short replies (2–3 sentences)
 
46
  if tokenizer.pad_token is None:
47
  tokenizer.pad_token = tokenizer.eos_token
48
 
49
+ # Load model (CPU-safe)
50
  model = AutoModelForCausalLM.from_pretrained(
51
  model_id,
52
  torch_dtype=torch.float32,
 
61
  print("✅ Model loaded successfully!")
62
  print(f"📍 Device: {device}")
63
 
64
+ # ---------------- ROOT ROUTE ----------------
65
  @app.route("/", methods=["GET"])
66
  def home():
67
  return jsonify({
 
70
  "endpoints": ["/chat", "/health"]
71
  })
72
 
73
+ # ---------------- CHAT ROUTE ----------------
74
  @app.route("/chat", methods=["POST"])
75
  def chat():
76
  try:
77
+ data = request.get_json(force=True)
78
+ msg = data.get("message", "").strip()
79
 
80
  if not msg:
81
  return jsonify({"error": "No message provided"}), 400
82
 
83
+ # IMPORTANT: system prompt merged into user
84
  chat_history = [
85
+ {
86
+ "role": "user",
87
+ "content": SYSTEM_PROMPT + "\n\nUser message:\n" + msg
88
+ }
89
  ]
90
 
91
  prompt = tokenizer.apply_chat_template(
 
97
  inputs = tokenizer(
98
  prompt,
99
  return_tensors="pt",
 
100
  truncation=True,
101
  max_length=512
102
  )
 
120
  generated = output[0][input_len:]
121
  reply = tokenizer.decode(generated, skip_special_tokens=True).strip()
122
 
123
+ # Keep reply short & human
124
+ sentences = reply.split(".")
125
+ if len(sentences) > 3:
126
+ reply = ". ".join(sentences[:3]) + "."
127
 
128
  return jsonify({"reply": reply})
129
 
 
132
  print(traceback.format_exc())
133
  return jsonify({"error": str(e)}), 500
134
 
135
+ # ---------------- HEALTH ROUTE ----------------
136
  @app.route("/health", methods=["GET"])
137
  def health():
138
  return jsonify({
 
141
  "device": str(device)
142
  })
143
 
144
+ # ---------------- RUN ----------------
145
  if __name__ == "__main__":
146
  app.run(host="0.0.0.0", port=7860, debug=False)