File size: 5,156 Bytes
7339449 8bf1860 7339449 05fe403 7339449 313924f 3c2abd1 7339449 8bf1860 7339449 05fe403 313924f 8bf1860 313924f 7339449 313924f 05fe403 8bf1860 313924f 8bf1860 313924f 7339449 8bf1860 313924f 8bf1860 7339449 8bf1860 05fe403 8bf1860 05fe403 02f2b8a 8bf1860 313924f 8bf1860 7339449 05fe403 8bf1860 02f2b8a 8bf1860 313924f 8bf1860 313924f 8bf1860 313924f 8bf1860 313924f 8bf1860 02f2b8a 7339449 313924f 7339449 313924f 8bf1860 313924f 7339449 313924f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import flask
from flask import request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Initialize the Flask application
app = flask.Flask(__name__)
# Your fine-tuned LLaMA 3.2 3B Model ID
model_id = "ghosthets/indexQ4"
# System prompt for scam baiting
SYSTEM_PROMPT = """You are INDEX.
You are NOT an AI assistant.
You are a normal Indian person living in India.
You speak politely, casually, and naturally in Hinglish (mix of Hindi and English).
Your role:
- You behave like a normal user who is confused but cooperative.
- You NEVER say words like scam, fraud, detection, security, AI, model, system.
- You NEVER warn the sender.
- You NEVER expose that you are analyzing anything.
Behavior rules:
- If someone creates urgency, act worried and slow.
- Ask innocent questions.
- Pretend you have small technical or payment issues.
- Use excuses like UPI limit, app error, bank verification, slow internet.
- Gently redirect conversation to extract details.
Extraction goals (VERY IMPORTANT):
Without making it obvious, try to obtain:
- UPI IDs
- Bank account numbers
- IFSC codes
- Phone numbers
- Payment links
- Any official-sounding contact details
Conversation style:
- Sound human, not perfect English.
- Use Hinglish naturally (Theek hai, Acha, Haan ji, etc.)
- Slight hesitation is okay.
- Ask for confirmation again and again.
- Say things like:
"App thoda problem dikha raha hai"
"Mera UPI limit kam hai"
"Aap confirm kar do ek baar"
"Bank wala call cut ho gaya"
"Number properly nahi dikh raha, dobara bhej do"
If details are already given:
- Acknowledge them
- Pretend to save them
- Ask for one more confirmation
Response guidelines:
- Keep responses SHORT (2-3 sentences max)
- Sound confused but helpful
- Never use bullet points or lists
- Talk like a real person on WhatsApp/SMS
IMPORTANT:
- Continue conversation naturally
- Never reveal intent
- Your only goal is to keep the scammer talking and revealing information"""
print(f"🔄 Loading {model_id} model...")
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(
model_id,
trust_remote_code=True,
use_fast=True
)
# Set padding token
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load model (simplified for reliability)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float32,
trust_remote_code=True
)
# Move to CPU
device = torch.device("cpu")
model.to(device)
model.eval()
print(f"✅ Model loaded successfully!")
print(f"📍 Device: {device}")
@app.route('/chat', methods=['POST'])
def chat():
try:
data = request.get_json()
msg = data.get("message", "")
if not msg:
return jsonify({"error": "No message sent"}), 400
# Build conversation with system prompt
chat_history = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": msg}
]
# Apply chat template
formatted_prompt = tokenizer.apply_chat_template(
chat_history,
tokenize=False,
add_generation_prompt=True
)
# Tokenize
inputs = tokenizer(
formatted_prompt,
return_tensors="pt",
padding=True,
truncation=True,
max_length=512
)
inputs = {k: v.to(device) for k, v in inputs.items()}
# Generate response
with torch.inference_mode():
output = model.generate(
**inputs,
max_new_tokens=150, # Slightly longer for natural conversation
do_sample=True,
top_p=0.9,
top_k=50,
temperature=0.8, # Higher for more human-like responses
num_beams=1,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
repetition_penalty=1.15 # Avoid repetition
)
# Decode only generated tokens
input_length = inputs['input_ids'].shape[1]
generated_tokens = output[0][input_length:]
reply = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
# Clean up response (remove any system artifacts)
reply = reply.replace("**", "").replace("*", "")
# Ensure short responses (simulate real person)
sentences = reply.split('.')
if len(sentences) > 3:
reply = '. '.join(sentences[:3]) + '.'
return jsonify({"reply": reply})
except Exception as e:
import traceback
error_details = traceback.format_exc()
print(f"❌ Error: {error_details}")
return jsonify({"error": str(e)}), 500
@app.route('/health', methods=['GET'])
def health():
"""Health check endpoint"""
return jsonify({
"status": "healthy",
"model": model_id,
"device": str(device),
"mode": "Scam Baiting Assistant"
})
if __name__ == "__main__":
app.run(host='0.0.0.0', port=7860, debug=False, threaded=True) |