Ilke Ileri commited on
Commit
a73c020
·
1 Parent(s): e48c956

Add system prompt guard to enforce sales-only responses

Browse files
Files changed (1) hide show
  1. app.py +37 -6
app.py CHANGED
@@ -30,16 +30,21 @@ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True, to
30
  print("Loading base model...")
31
  base_model = AutoModelForCausalLM.from_pretrained(
32
  BASE_MODEL,
33
- dtype=torch.float16,
34
  low_cpu_mem_usage=True,
35
  trust_remote_code=True,
36
- token=HF_TOKEN
 
37
  )
38
 
39
  print("Loading LoRA adapters...")
40
  model = PeftModel.from_pretrained(base_model, MODEL_NAME, token=HF_TOKEN)
41
  model.eval()
42
 
 
 
 
 
43
  print("Model loaded successfully!")
44
 
45
  @app.route("/", methods=["GET"])
@@ -77,13 +82,33 @@ def chat_completions():
77
  if not prompt:
78
  return jsonify({"error": "No prompt provided"}), 400
79
 
80
- # Gemma formatında prompt
81
- formatted_prompt = f"<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  # Model yanıtı üret
84
  inputs = tokenizer(formatted_prompt, return_tensors="pt")
 
 
 
 
 
85
  outputs = model.generate(
86
- inputs["input_ids"],
87
  max_new_tokens=256,
88
  temperature=0.7,
89
  do_sample=True,
@@ -113,8 +138,14 @@ def chat_completions():
113
  return jsonify(vapi_response), 200
114
 
115
  except Exception as e:
 
 
116
  print(f"Error: {str(e)}")
117
- return jsonify({"error": str(e)}), 500
 
 
 
 
118
 
119
  if __name__ == "__main__":
120
  app.run(host="0.0.0.0", port=7860)
 
30
  print("Loading base model...")
31
  base_model = AutoModelForCausalLM.from_pretrained(
32
  BASE_MODEL,
33
+ torch_dtype=torch.float16,
34
  low_cpu_mem_usage=True,
35
  trust_remote_code=True,
36
+ token=HF_TOKEN,
37
+ device_map="auto"
38
  )
39
 
40
  print("Loading LoRA adapters...")
41
  model = PeftModel.from_pretrained(base_model, MODEL_NAME, token=HF_TOKEN)
42
  model.eval()
43
 
44
+ # Device'ı belirle
45
+ device = "cuda" if torch.cuda.is_available() else "cpu"
46
+ print(f"Using device: {device}")
47
+
48
  print("Model loaded successfully!")
49
 
50
  @app.route("/", methods=["GET"])
 
82
  if not prompt:
83
  return jsonify({"error": "No prompt provided"}), 400
84
 
85
+ # Sales context guard - sistem prompt'u ekle
86
+ system_prompt = """You are a professional sales assistant for Wisemate. You ONLY answer questions related to:
87
+ - Sales techniques and strategies
88
+ - Handling objections (price, timing, competition)
89
+ - Closing deals
90
+ - Lead qualification
91
+ - Customer relationship management
92
+ - Sales processes and frameworks
93
+ - Wisemate's services and capabilities
94
+
95
+ If asked about unrelated topics (science, math, general knowledge, etc.), politely redirect:
96
+ "I'm here to help with sales and business-related questions about Wisemate. How can I assist you with your sales inquiries?"
97
+
98
+ Now respond to this sales-related question:"""
99
+
100
+ # Gemma formatında prompt - sistem prompt'u dahil et
101
+ formatted_prompt = f"<start_of_turn>user\n{system_prompt}\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
102
 
103
  # Model yanıtı üret
104
  inputs = tokenizer(formatted_prompt, return_tensors="pt")
105
+
106
+ # Input'u model ile aynı device'a taşı
107
+ if hasattr(model, 'device'):
108
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
109
+
110
  outputs = model.generate(
111
+ **inputs,
112
  max_new_tokens=256,
113
  temperature=0.7,
114
  do_sample=True,
 
138
  return jsonify(vapi_response), 200
139
 
140
  except Exception as e:
141
+ import traceback
142
+ error_details = traceback.format_exc()
143
  print(f"Error: {str(e)}")
144
+ print(f"Traceback: {error_details}")
145
+ return jsonify({
146
+ "error": str(e),
147
+ "type": type(e).__name__
148
+ }), 500
149
 
150
  if __name__ == "__main__":
151
  app.run(host="0.0.0.0", port=7860)