Ilke Ileri commited on
Commit
39bb917
·
1 Parent(s): 2311595

Improve conversation quality: use full history, increase tokens to 150, better sampling

Browse files
Files changed (1) hide show
  1. app.py +23 -52
app.py CHANGED
@@ -98,56 +98,25 @@ def chat_completions():
98
 
99
  messages = data.get("messages", [])
100
 
101
- # Extract user messages (ignore system messages from Vapi)
102
- user_messages = [msg for msg in messages if msg.get("role") == "user"]
103
-
104
- if not user_messages:
105
- prompt = data.get("prompt", "")
106
- else:
107
- # Get the last user message
108
- prompt = user_messages[-1].get("content", "")
109
-
110
- if not prompt:
111
- return jsonify({"error": "No prompt provided"}), 400
112
-
113
- print(f"User prompt: {prompt}")
114
-
115
- # Sales-only keyword filter
116
- sales_keywords = ['sale', 'price', 'objection', 'close', 'deal', 'customer', 'prospect',
117
- 'lead', 'pitch', 'negotiate', 'client', 'business', 'product', 'service',
118
- 'wisemate', 'revenue', 'quota', 'conversion', 'crm', 'follow up', 'meeting',
119
- 'appointment', 'schedule', 'calendar', 'demo', 'call', 'consultation']
120
-
121
- # Check if question is sales-related
122
- prompt_lower = prompt.lower()
123
- is_sales_related = any(keyword in prompt_lower for keyword in sales_keywords)
124
-
125
- # If not sales-related, return redirect message
126
- if not is_sales_related and len(prompt.split()) > 3: # Only filter if it's a real question
127
- redirect_message = "I'm Daniel from Wisemate, and I specialize in sales and business topics. I can help you with sales techniques, handling objections, closing deals, and Wisemate's services. How can I assist you with your sales needs?"
128
 
129
- return jsonify({
130
- "id": "chatcmpl-redirect",
131
- "object": "chat.completion",
132
- "created": int(__import__('time').time()),
133
- "model": MODEL_NAME,
134
- "choices": [{
135
- "index": 0,
136
- "message": {
137
- "role": "assistant",
138
- "content": redirect_message
139
- },
140
- "finish_reason": "stop"
141
- }],
142
- "usage": {
143
- "prompt_tokens": 0,
144
- "completion_tokens": 0,
145
- "total_tokens": 0
146
- }
147
- }), 200
148
-
149
- # Gemma formatında prompt
150
- formatted_prompt = f"<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
151
 
152
  # Model yanıtı üret
153
  inputs = tokenizer(formatted_prompt, return_tensors="pt")
@@ -159,9 +128,11 @@ def chat_completions():
159
  print(f"Generating response...")
160
  outputs = model.generate(
161
  **inputs,
162
- max_new_tokens=50, # Drastically reduced for real-time voice (was 100)
163
- temperature=0.3, # Lower for faster, more focused responses
164
- do_sample=False, # Greedy decoding for speed
 
 
165
  pad_token_id=tokenizer.pad_token_id,
166
  eos_token_id=tokenizer.eos_token_id
167
  )
 
98
 
99
  messages = data.get("messages", [])
100
 
101
+ # Build conversation history in Gemma format
102
+ # Only use user and assistant messages, skip system (Vapi's system prompt is just context)
103
+ conversation_parts = []
104
+ for msg in messages:
105
+ role = msg.get("role")
106
+ content = msg.get("content", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ if role == "user":
109
+ conversation_parts.append(f"<start_of_turn>user\n{content}<end_of_turn>")
110
+ elif role == "assistant":
111
+ conversation_parts.append(f"<start_of_turn>model\n{content}<end_of_turn>")
112
+ # Skip system messages - Vapi's system prompt is too long for context
113
+
114
+ # Add the model turn start
115
+ conversation_parts.append("<start_of_turn>model\n")
116
+ formatted_prompt = "\n".join(conversation_parts)
117
+
118
+ print(f"Formatted prompt length: {len(formatted_prompt)} chars")
119
+ print(f"Last user message: {[m['content'] for m in messages if m.get('role')=='user'][-1] if messages else 'none'}")
 
 
 
 
 
 
 
 
 
 
120
 
121
  # Model yanıtı üret
122
  inputs = tokenizer(formatted_prompt, return_tensors="pt")
 
128
  print(f"Generating response...")
129
  outputs = model.generate(
130
  **inputs,
131
+ max_new_tokens=150, # Increased for complete appointment booking responses
132
+ temperature=0.7, # Higher for more natural conversation
133
+ do_sample=True, # Enable sampling for variety
134
+ top_p=0.9,
135
+ repetition_penalty=1.1,
136
  pad_token_id=tokenizer.pad_token_id,
137
  eos_token_id=tokenizer.eos_token_id
138
  )