afanyu237 commited on
Commit
4202cbd
·
verified ·
1 Parent(s): a35c427

Create ai_topic_titles.py

Browse files
Files changed (1) hide show
  1. ai_topic_titles.py +428 -0
ai_topic_titles.py ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ai_topic_titles.py
2
+ import requests
3
+ import os
4
+ import json
5
+ from typing import List, Optional
6
+ import hashlib
7
+ from datetime import datetime
8
+
9
+ def create_topic_title_prompt(topic_words: List[str], topic_id: int, language_hint: str = None) -> str:
10
+ """
11
+ Create an optimized prompt for topic title generation.
12
+
13
+ Args:
14
+ topic_words: List of top words from the topic
15
+ topic_id: Topic identifier (0-indexed)
16
+ language_hint: Optional hint about language (e.g., 'french', 'english', 'mixed')
17
+
18
+ Returns:
19
+ Formatted prompt for AI
20
+ """
21
+ # Add language hint to prompt
22
+ language_context = ""
23
+ if language_hint:
24
+ language_context = f"The chat is primarily in {language_hint.capitalize()}, but generate the title in English for clarity.\n\n"
25
+
26
+ prompt = f"""You are analyzing WhatsApp chat conversations. Based on these top words from a conversation topic, create a concise, descriptive title (max 6 words) that summarizes what people are discussing.
27
+
28
+ {language_context}Top words from topic {topic_id + 1}: {', '.join(topic_words[:10])}
29
+
30
+ Rules for creating the title:
31
+ 1. Create a title that a normal person would understand immediately
32
+ 2. Make it specific to chat conversations (not generic)
33
+ 3. Don't include the words "Topic" or "Discussion" in the title
34
+ 4. Use natural, conversational language
35
+ 5. If the topic is about scheduling or time, mention that
36
+ 6. If it's about work/projects, reflect that context
37
+ 7. If it's social/personal, make it friendly and casual
38
+ 8. If words are in another language, translate the concept, not word-for-word
39
+ 9. Focus on the overall theme, not just individual words
40
+
41
+ Examples:
42
+ - Input: ['meeting', 'tomorrow', 'agenda', 'presentation', 'office']
43
+ - Output: "Work Meetings and Planning"
44
+
45
+ - Input: ['movie', 'weekend', 'dinner', 'party', 'fun']
46
+ - Output: "Weekend Social Plans"
47
+
48
+ - Input: ['loin', 'non', 'garçon', 'sortir', 'ami']
49
+ - Output: "Social Gatherings and Friends"
50
+
51
+ - Input: ['problem', 'help', 'fix', 'issue', 'solution']
52
+ - Output: "Problem Solving and Support"
53
+
54
+ Now create a title for these words:"""
55
+
56
+ return prompt
57
+
58
+ def detect_language_from_words(words: List[str]) -> str:
59
+ """
60
+ Simple language detection based on common words.
61
+ """
62
+ french_words = {'le', 'la', 'les', 'un', 'une', 'des', 'je', 'tu', 'il', 'elle',
63
+ 'nous', 'vous', 'ils', 'elles', 'oui', 'non', 'merci', 'bonjour',
64
+ 'aujourd', 'demain', 'hier', 'loin', 'près', 'garçon', 'fille',
65
+ 'sortie', 'boulot', 'travail', 'maison', 'école', 'université'}
66
+
67
+ english_words = {'the', 'and', 'you', 'that', 'was', 'for', 'are', 'with', 'this',
68
+ 'have', 'from', 'they', 'what', 'when', 'where', 'why', 'how'}
69
+
70
+ word_set = set(words[:20]) # Check first 20 words
71
+
72
+ french_count = len(word_set.intersection(french_words))
73
+ english_count = len(word_set.intersection(english_words))
74
+
75
+ if french_count > english_count and french_count > 2:
76
+ return 'french'
77
+ elif english_count > french_count and english_count > 2:
78
+ return 'english'
79
+ else:
80
+ return 'mixed'
81
+
82
+ def call_huggingface_api(prompt: str, model_name: str = None, api_key: str = None) -> str:
83
+ """
84
+ Call Hugging Face Inference API.
85
+
86
+ Returns:
87
+ Generated title or empty string if failed
88
+ """
89
+ if not api_key:
90
+ api_key = os.getenv("HUGGINGFACE_TOKEN")
91
+ if not api_key:
92
+ raise ValueError("Hugging Face API key not found. Set HUGGINGFACE_TOKEN environment variable.")
93
+
94
+ # Use a good free model if none specified
95
+ if not model_name:
96
+ model_name = "mistralai/Mistral-7B-Instruct-v0.2" # Free and good
97
+ # Alternatives: "google/flan-t5-xxl", "microsoft/phi-2"
98
+
99
+ API_URL = f"https://openrouter.ai/api/v1/chat/completions"
100
+ headers = {
101
+ 'Content-Type': 'application/json',
102
+ 'Authorization': 'Bearer ' + api_key,
103
+ 'HTTP-Referer': 'http://localhost',
104
+ 'X-Title': 'Verse For You App',
105
+ };
106
+
107
+ # Format the prompt properly for Mistral
108
+ formatted_prompt = f"{prompt}"
109
+
110
+ payload = {
111
+ "inputs": formatted_prompt,
112
+ "parameters": {
113
+ "max_new_tokens": 50,
114
+ "temperature": 0.7,
115
+ "top_p": 0.9,
116
+ "do_sample": True,
117
+ "return_full_text": False,
118
+ "repetition_penalty": 1.1
119
+ }
120
+ }
121
+
122
+ try:
123
+ response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
124
+
125
+ if response.status_code == 200:
126
+ print("Hugging Face API response:", response.json())
127
+ result = response.json()
128
+ if isinstance(result, list) and len(result) > 0:
129
+ title = result[0].get('generated_text', '').strip()
130
+ # Clean up the response
131
+ title = title.split('\n')[0].strip('"\'').strip()
132
+ # Remove any "Title:" prefix
133
+ if title.lower().startswith('title:'):
134
+ title = title[6:].strip()
135
+ return title
136
+ else:
137
+ print(f"Hugging Face API error: {response.status_code}")
138
+ print(f"Response: {response.text[:200]}")
139
+
140
+ except requests.exceptions.Timeout:
141
+ print("Hugging Face API timeout")
142
+ except Exception as e:
143
+ print(f"Hugging Face API exception: {e}")
144
+
145
+ return ""
146
+
147
+ def call_gemini_api(prompt: str, model_name: str = None, api_key: str = None) -> str:
148
+ """
149
+ Call Google Gemini API.
150
+ """
151
+ try:
152
+ import google.generativeai as genai
153
+ except ImportError:
154
+ raise ImportError("Install google-generativeai: pip install google-generativeai")
155
+
156
+ if not api_key:
157
+ api_key = os.getenv("GEMINI_API_KEY")
158
+ if not api_key:
159
+ raise ValueError("Gemini API key not found. Set GEMINI_API_KEY environment variable.")
160
+
161
+ genai.configure(api_key=api_key)
162
+
163
+ if not model_name:
164
+ model_name = "gemini-pro" # Free tier model
165
+
166
+ try:
167
+ model = genai.GenerativeModel(model_name)
168
+
169
+ response = model.generate_content(
170
+ prompt,
171
+ generation_config={
172
+ "temperature": 0.7,
173
+ "top_p": 0.95,
174
+ "max_output_tokens": 50,
175
+ }
176
+ )
177
+
178
+ if response.text:
179
+ title = response.text.strip().split('\n')[0].strip('"\'')
180
+ return title
181
+
182
+ except Exception as e:
183
+ print(f"Gemini API error: {e}")
184
+
185
+ return ""
186
+
187
+ def call_openai_compatible_api(prompt: str, model_name: str = None, api_key: str = None,
188
+ api_base: str = "https://api.together.xyz/v1") -> str:
189
+ """
190
+ Call OpenAI-compatible API (e.g., Togetherai, OpenRouter).
191
+ """
192
+ if not api_key:
193
+ # Try different environment variables
194
+ api_key = os.getenv("TOGETHER_API_KEY") or os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY")
195
+ if not api_key:
196
+ raise ValueError("API key not found. Set TOGETHER_API_KEY, OPENROUTER_API_KEY, or OPENAI_API_KEY.")
197
+
198
+ if not model_name:
199
+ model_name = "mistralai/Mistral-7B-Instruct-v0.1"
200
+
201
+ headers = {
202
+ "Authorization": f"Bearer {api_key}",
203
+ "Content-Type": "application/json"
204
+ }
205
+
206
+ # Format for different providers
207
+ if "together.xyz" in api_base:
208
+ formatted_prompt = f"[INST] {prompt} [/INST]"
209
+ else:
210
+ formatted_prompt = prompt
211
+
212
+ payload = {
213
+ "model": model_name,
214
+ "prompt": formatted_prompt,
215
+ "max_tokens": 50,
216
+ "temperature": 0.7,
217
+ "top_p": 0.9,
218
+ }
219
+
220
+ try:
221
+ response = requests.post(f"{api_base}/completions", headers=headers, json=payload, timeout=30)
222
+
223
+ if response.status_code == 200:
224
+ result = response.json()
225
+ title = result.get('choices', [{}])[0].get('text', '').strip()
226
+ # Clean the title
227
+ title = title.split('\n')[0].strip('"\'').strip()
228
+ return title
229
+ else:
230
+ print(f"API error {response.status_code}: {response.text[:200]}")
231
+
232
+ except Exception as e:
233
+ print(f"API call error: {e}")
234
+
235
+ return ""
236
+
237
+ def create_heuristic_title(topic_words: List[str], idx: int) -> str:
238
+ """
239
+ Fallback function to create simple titles when AI is not available.
240
+
241
+ Args:
242
+ topic_words: List of words from the topic
243
+ idx: Topic index
244
+
245
+ Returns:
246
+ Simple descriptive title
247
+ """
248
+ if not topic_words or len(topic_words) < 3:
249
+ return f"Topic {idx + 1}: General Discussion"
250
+
251
+ # Simple translation for common French words (you can expand this)
252
+ french_to_english = {
253
+ 'loin': 'distance', 'non': 'no', 'garçon': 'boy/guy',
254
+ 'sortie': 'outing', 'boulot': 'work', 'finir': 'finish/complete',
255
+ 'rapidement': 'quickly', 'studio': 'studio', 'place': 'place/spot',
256
+ 'homme': 'man', 'taire': 'quiet/silence', 'métro': 'subway/metro',
257
+ 'prévoir': 'plan', 'venir': 'come', 'travail': 'work',
258
+ 'projet': 'project', 'temps': 'time', 'matin': 'morning',
259
+ 'personne': 'person', 'groupe': 'group', 'plan': 'plan',
260
+ 'transport': 'transportation'
261
+ }
262
+
263
+ # Translate first 3 words
264
+ translated = []
265
+ for word in topic_words[:3]:
266
+ if word in french_to_english:
267
+ translated.append(french_to_english[word])
268
+ else:
269
+ translated.append(word)
270
+
271
+ return f"Topic {idx + 1}: {', '.join(translated)}"
272
+
273
+ def get_cache_key(topics: List[List[str]], api_type: str, model_name: str = "") -> str:
274
+ """
275
+ Generate a cache key for topics.
276
+ """
277
+ topics_str = json.dumps(topics, sort_keys=True)
278
+ cache_str = f"{topics_str}_{api_type}_{model_name}"
279
+ return hashlib.md5(cache_str.encode()).hexdigest()
280
+
281
+ def load_cache(cache_file: str = "topic_cache.json") -> dict:
282
+ """
283
+ Load cache from file.
284
+ """
285
+ if os.path.exists(cache_file):
286
+ try:
287
+ with open(cache_file, 'r') as f:
288
+ return json.load(f)
289
+ except:
290
+ return {}
291
+ return {}
292
+
293
+ def save_cache(cache: dict, cache_file: str = "topic_cache.json"):
294
+ """
295
+ Save cache to file.
296
+ """
297
+ try:
298
+ with open(cache_file, 'w') as f:
299
+ json.dump(cache, f, indent=2)
300
+ except Exception as e:
301
+ print(f"Warning: Could not save cache: {e}")
302
+
303
+ def generate_topic_titles_with_ai(
304
+ topics: List[List[str]],
305
+ api_type: str = "huggingface",
306
+ model_name: str = None,
307
+ api_key: str = None,
308
+ use_cache: bool = True,
309
+ cache_file: str = "topic_cache.json",
310
+ language_hint: str = None
311
+ ) -> List[str]:
312
+ """
313
+ Generate meaningful topic titles using AI APIs with caching support.
314
+
315
+ Args:
316
+ topics: List of topics, where each topic is a list of words
317
+ api_type: "huggingface", "gemini", "openai", or "local"
318
+ model_name: Specific model to use (optional)
319
+ api_key: API key (can also be set as environment variable)
320
+ use_cache: Whether to cache results to avoid repeated API calls
321
+ cache_file: File to store cache
322
+ language_hint: Hint about the language (auto-detected if None)
323
+
324
+ Returns:
325
+ List of generated topic titles
326
+ """
327
+
328
+ if not topics:
329
+ return []
330
+
331
+ # For local mode, use heuristic titles
332
+ if api_type == "local":
333
+ titles = []
334
+ for idx, topic in enumerate(topics):
335
+ titles.append(create_heuristic_title(topic, idx))
336
+ return titles
337
+
338
+ # Check cache first
339
+ if use_cache:
340
+ cache = load_cache(cache_file)
341
+ cache_key = get_cache_key(topics, api_type, model_name or "")
342
+ if cache_key in cache:
343
+ print(f"Using cached topic titles for {cache_key}")
344
+ return cache[cache_key]
345
+
346
+ titles = []
347
+
348
+ for idx, topic in enumerate(topics):
349
+ if not isinstance(topic, list) or len(topic) < 3:
350
+ # Fallback to simple title
351
+ title = f"Topic {idx + 1}: General Discussion"
352
+ titles.append(title)
353
+ continue
354
+
355
+ # Auto-detect language if not provided
356
+ if not language_hint:
357
+ language_hint = detect_language_from_words(topic)
358
+
359
+ # Create prompt for this topic
360
+ prompt = create_topic_title_prompt(topic, idx, language_hint)
361
+
362
+ try:
363
+ if api_type == "huggingface":
364
+ title = call_huggingface_api(prompt, model_name, api_key)
365
+ elif api_type == "gemini":
366
+ title = call_gemini_api(prompt, model_name, api_key)
367
+ elif api_type == "openai":
368
+ title = call_openai_compatible_api(prompt, model_name, api_key)
369
+ else:
370
+ raise ValueError(f"Unknown API type: {api_type}")
371
+
372
+ # If AI didn't generate a good title, use fallback
373
+ if not title or len(title) < 5:
374
+ title = create_heuristic_title(topic, idx)
375
+
376
+ except Exception as e:
377
+ print(f"Error generating title for topic {idx + 1}: {e}")
378
+ title = create_heuristic_title(topic, idx)
379
+
380
+ # Add to timestamp for debugging
381
+ timestamp = datetime.now().strftime("%H:%M:%S")
382
+ print(f"[{timestamp}] Topic {idx + 1}: {title}")
383
+
384
+ titles.append(title)
385
+
386
+ # Save to cache
387
+ if use_cache and titles:
388
+ cache = load_cache(cache_file)
389
+ cache_key = get_cache_key(topics, api_type, model_name or "")
390
+ cache[cache_key] = titles
391
+ save_cache(cache, cache_file)
392
+
393
+ return titles
394
+
395
+ # Main function for backward compatibility
396
+ def generate_topic_titles(
397
+ topics,
398
+ api_type="huggingface",
399
+ hf_token=None,
400
+ model_name=None,
401
+ use_cache=True,
402
+ **kwargs
403
+ ):
404
+ """
405
+ Main wrapper function with backward compatibility.
406
+
407
+ Args:
408
+ topics: List of topics
409
+ api_type: "local", "huggingface", "gemini", or "openai"
410
+ hf_token: For backward compatibility (Hugging Face token)
411
+ model_name: Specific model to use
412
+ use_cache: Whether to use caching
413
+ **kwargs: Additional parameters
414
+
415
+ Returns:
416
+ List of topic titles
417
+ """
418
+ api_key = hf_token or kwargs.get('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')
419
+ language_hint = kwargs.get('language_hint')
420
+
421
+ return generate_topic_titles_with_ai(
422
+ topics=topics,
423
+ api_type="huggingface",
424
+ model_name="mistralai/mixtral-8x7b-instruct",
425
+ api_key=api_key,
426
+ use_cache=use_cache,
427
+ language_hint=language_hint
428
+ )