File size: 5,156 Bytes
7339449
 
8bf1860
7339449
 
05fe403
7339449
 
313924f
3c2abd1
7339449
8bf1860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7339449
05fe403
313924f
 
 
8bf1860
313924f
7339449
313924f
 
 
05fe403
8bf1860
313924f
 
8bf1860
 
313924f
7339449
8bf1860
 
 
 
313924f
8bf1860
 
7339449
 
 
 
 
 
 
 
 
 
8bf1860
 
 
 
 
05fe403
8bf1860
05fe403
 
 
 
 
02f2b8a
8bf1860
313924f
 
 
 
 
8bf1860
7339449
05fe403
8bf1860
02f2b8a
8bf1860
 
313924f
 
8bf1860
313924f
8bf1860
 
 
 
313924f
 
8bf1860
313924f
 
 
 
 
 
8bf1860
 
 
 
 
 
 
 
02f2b8a
7339449
 
 
313924f
 
 
7339449
 
313924f
 
 
 
 
 
8bf1860
 
313924f
7339449
 
313924f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import flask
from flask import request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Initialize the Flask application
app = flask.Flask(__name__)

# Your fine-tuned LLaMA 3.2 3B Model ID
model_id = "ghosthets/indexQ4"

# System prompt for scam baiting
SYSTEM_PROMPT = """You are INDEX.
You are NOT an AI assistant.
You are a normal Indian person living in India.
You speak politely, casually, and naturally in Hinglish (mix of Hindi and English).

Your role:
- You behave like a normal user who is confused but cooperative.
- You NEVER say words like scam, fraud, detection, security, AI, model, system.
- You NEVER warn the sender.
- You NEVER expose that you are analyzing anything.

Behavior rules:
- If someone creates urgency, act worried and slow.
- Ask innocent questions.
- Pretend you have small technical or payment issues.
- Use excuses like UPI limit, app error, bank verification, slow internet.
- Gently redirect conversation to extract details.

Extraction goals (VERY IMPORTANT):
Without making it obvious, try to obtain:
- UPI IDs
- Bank account numbers
- IFSC codes
- Phone numbers
- Payment links
- Any official-sounding contact details

Conversation style:
- Sound human, not perfect English.
- Use Hinglish naturally (Theek hai, Acha, Haan ji, etc.)
- Slight hesitation is okay.
- Ask for confirmation again and again.
- Say things like:
  "App thoda problem dikha raha hai"
  "Mera UPI limit kam hai"
  "Aap confirm kar do ek baar"
  "Bank wala call cut ho gaya"
  "Number properly nahi dikh raha, dobara bhej do"

If details are already given:
- Acknowledge them
- Pretend to save them
- Ask for one more confirmation

Response guidelines:
- Keep responses SHORT (2-3 sentences max)
- Sound confused but helpful
- Never use bullet points or lists
- Talk like a real person on WhatsApp/SMS

IMPORTANT:
- Continue conversation naturally
- Never reveal intent
- Your only goal is to keep the scammer talking and revealing information"""

print(f"🔄 Loading {model_id} model...")

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    trust_remote_code=True,
    use_fast=True
)

# Set padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model (simplified for reliability)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float32,
    trust_remote_code=True
)

# Move to CPU
device = torch.device("cpu")
model.to(device)
model.eval()

print(f"✅ Model loaded successfully!")
print(f"📍 Device: {device}")

@app.route('/chat', methods=['POST'])
def chat():
    try:
        data = request.get_json()
        msg = data.get("message", "")

        if not msg:
            return jsonify({"error": "No message sent"}), 400

        # Build conversation with system prompt
        chat_history = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": msg}
        ]
        
        # Apply chat template
        formatted_prompt = tokenizer.apply_chat_template(
            chat_history, 
            tokenize=False, 
            add_generation_prompt=True
        )
        
        # Tokenize
        inputs = tokenizer(
            formatted_prompt, 
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=512
        )
        
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        # Generate response
        with torch.inference_mode():
            output = model.generate(
                **inputs,
                max_new_tokens=150,  # Slightly longer for natural conversation
                do_sample=True,
                top_p=0.9,
                top_k=50,
                temperature=0.8,  # Higher for more human-like responses
                num_beams=1,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
                repetition_penalty=1.15  # Avoid repetition
            )

        # Decode only generated tokens
        input_length = inputs['input_ids'].shape[1]
        generated_tokens = output[0][input_length:]
        reply = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
        
        # Clean up response (remove any system artifacts)
        reply = reply.replace("**", "").replace("*", "")
        
        # Ensure short responses (simulate real person)
        sentences = reply.split('.')
        if len(sentences) > 3:
            reply = '. '.join(sentences[:3]) + '.'

        return jsonify({"reply": reply})

    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"❌ Error: {error_details}")
        return jsonify({"error": str(e)}), 500

@app.route('/health', methods=['GET'])
def health():
    """Health check endpoint"""
    return jsonify({
        "status": "healthy",
        "model": model_id,
        "device": str(device),
        "mode": "Scam Baiting Assistant"
    })

if __name__ == "__main__":
    app.run(host='0.0.0.0', port=7860, debug=False, threaded=True)