Spaces:

afanyu237
/

whatsApp_chat

Sleeping

App Files Files Community

afanyu237 commited on Dec 5, 2025

Commit

4202cbd

verified ·

1 Parent(s): a35c427

Create ai_topic_titles.py

Browse files

Files changed (1) hide show

ai_topic_titles.py +428 -0

ai_topic_titles.py ADDED Viewed

	@@ -0,0 +1,428 @@

+# ai_topic_titles.py
+import requests
+import os
+import json
+from typing import List, Optional
+import hashlib
+from datetime import datetime
+def create_topic_title_prompt(topic_words: List[str], topic_id: int, language_hint: str = None) -> str:
+    """
+    Create an optimized prompt for topic title generation.
+    Args:
+        topic_words: List of top words from the topic
+        topic_id: Topic identifier (0-indexed)
+        language_hint: Optional hint about language (e.g., 'french', 'english', 'mixed')
+    Returns:
+        Formatted prompt for AI
+    """
+    # Add language hint to prompt
+    language_context = ""
+    if language_hint:
+        language_context = f"The chat is primarily in {language_hint.capitalize()}, but generate the title in English for clarity.\n\n"
+    prompt = f"""You are analyzing WhatsApp chat conversations. Based on these top words from a conversation topic, create a concise, descriptive title (max 6 words) that summarizes what people are discussing.
+{language_context}Top words from topic {topic_id + 1}: {', '.join(topic_words[:10])}
+Rules for creating the title:
+1. Create a title that a normal person would understand immediately
+2. Make it specific to chat conversations (not generic)
+3. Don't include the words "Topic" or "Discussion" in the title
+4. Use natural, conversational language
+5. If the topic is about scheduling or time, mention that
+6. If it's about work/projects, reflect that context
+7. If it's social/personal, make it friendly and casual
+8. If words are in another language, translate the concept, not word-for-word
+9. Focus on the overall theme, not just individual words
+Examples:
+- Input: ['meeting', 'tomorrow', 'agenda', 'presentation', 'office']
+- Output: "Work Meetings and Planning"
+- Input: ['movie', 'weekend', 'dinner', 'party', 'fun']
+- Output: "Weekend Social Plans"
+- Input: ['loin', 'non', 'garçon', 'sortir', 'ami']
+- Output: "Social Gatherings and Friends"
+- Input: ['problem', 'help', 'fix', 'issue', 'solution']
+- Output: "Problem Solving and Support"
+Now create a title for these words:"""
+    return prompt
+def detect_language_from_words(words: List[str]) -> str:
+    """
+    Simple language detection based on common words.
+    """
+    french_words = {'le', 'la', 'les', 'un', 'une', 'des', 'je', 'tu', 'il', 'elle',
+                    'nous', 'vous', 'ils', 'elles', 'oui', 'non', 'merci', 'bonjour',
+                    'aujourd', 'demain', 'hier', 'loin', 'près', 'garçon', 'fille',
+                    'sortie', 'boulot', 'travail', 'maison', 'école', 'université'}
+    english_words = {'the', 'and', 'you', 'that', 'was', 'for', 'are', 'with', 'this',
+                     'have', 'from', 'they', 'what', 'when', 'where', 'why', 'how'}
+    word_set = set(words[:20])  # Check first 20 words
+    french_count = len(word_set.intersection(french_words))
+    english_count = len(word_set.intersection(english_words))
+    if french_count > english_count and french_count > 2:
+        return 'french'
+    elif english_count > french_count and english_count > 2:
+        return 'english'
+    else:
+        return 'mixed'
+def call_huggingface_api(prompt: str, model_name: str = None, api_key: str = None) -> str:
+    """
+    Call Hugging Face Inference API.
+    Returns:
+        Generated title or empty string if failed
+    """
+    if not api_key:
+        api_key = os.getenv("HUGGINGFACE_TOKEN")
+        if not api_key:
+            raise ValueError("Hugging Face API key not found. Set HUGGINGFACE_TOKEN environment variable.")
+    # Use a good free model if none specified
+    if not model_name:
+        model_name = "mistralai/Mistral-7B-Instruct-v0.2"  # Free and good
+        # Alternatives: "google/flan-t5-xxl", "microsoft/phi-2"
+    API_URL = f"https://openrouter.ai/api/v1/chat/completions"
+    headers = {
+      'Content-Type': 'application/json',
+      'Authorization': 'Bearer ' + api_key,
+      'HTTP-Referer': 'http://localhost',
+      'X-Title': 'Verse For You App',
+    };
+    # Format the prompt properly for Mistral
+    formatted_prompt = f"{prompt}"
+    payload = {
+        "inputs": formatted_prompt,
+        "parameters": {
+            "max_new_tokens": 50,
+            "temperature": 0.7,
+            "top_p": 0.9,
+            "do_sample": True,
+            "return_full_text": False,
+            "repetition_penalty": 1.1
+        }
+    }
+    try:
+        response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
+        if response.status_code == 200:
+            print("Hugging Face API response:", response.json())
+            result = response.json()
+            if isinstance(result, list) and len(result) > 0:
+                title = result[0].get('generated_text', '').strip()
+                # Clean up the response
+                title = title.split('\n')[0].strip('"\'').strip()
+                # Remove any "Title:" prefix
+                if title.lower().startswith('title:'):
+                    title = title[6:].strip()
+                return title
+        else:
+            print(f"Hugging Face API error: {response.status_code}")
+            print(f"Response: {response.text[:200]}")
+    except requests.exceptions.Timeout:
+        print("Hugging Face API timeout")
+    except Exception as e:
+        print(f"Hugging Face API exception: {e}")
+    return ""
+def call_gemini_api(prompt: str, model_name: str = None, api_key: str = None) -> str:
+    """
+    Call Google Gemini API.
+    """
+    try:
+        import google.generativeai as genai
+    except ImportError:
+        raise ImportError("Install google-generativeai: pip install google-generativeai")
+    if not api_key:
+        api_key = os.getenv("GEMINI_API_KEY")
+        if not api_key:
+            raise ValueError("Gemini API key not found. Set GEMINI_API_KEY environment variable.")
+    genai.configure(api_key=api_key)
+    if not model_name:
+        model_name = "gemini-pro"  # Free tier model
+    try:
+        model = genai.GenerativeModel(model_name)
+        response = model.generate_content(
+            prompt,
+            generation_config={
+                "temperature": 0.7,
+                "top_p": 0.95,
+                "max_output_tokens": 50,
+            }
+        )
+        if response.text:
+            title = response.text.strip().split('\n')[0].strip('"\'')
+            return title
+    except Exception as e:
+        print(f"Gemini API error: {e}")
+    return ""
+def call_openai_compatible_api(prompt: str, model_name: str = None, api_key: str = None,
+                               api_base: str = "https://api.together.xyz/v1") -> str:
+    """
+    Call OpenAI-compatible API (e.g., Togetherai, OpenRouter).
+    """
+    if not api_key:
+        # Try different environment variables
+        api_key = os.getenv("TOGETHER_API_KEY") or os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY")
+        if not api_key:
+            raise ValueError("API key not found. Set TOGETHER_API_KEY, OPENROUTER_API_KEY, or OPENAI_API_KEY.")
+    if not model_name:
+        model_name = "mistralai/Mistral-7B-Instruct-v0.1"
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    # Format for different providers
+    if "together.xyz" in api_base:
+        formatted_prompt = f"[INST] {prompt} [/INST]"
+    else:
+        formatted_prompt = prompt
+    payload = {
+        "model": model_name,
+        "prompt": formatted_prompt,
+        "max_tokens": 50,
+        "temperature": 0.7,
+        "top_p": 0.9,
+    }
+    try:
+        response = requests.post(f"{api_base}/completions", headers=headers, json=payload, timeout=30)
+        if response.status_code == 200:
+            result = response.json()
+            title = result.get('choices', [{}])[0].get('text', '').strip()
+            # Clean the title
+            title = title.split('\n')[0].strip('"\'').strip()
+            return title
+        else:
+            print(f"API error {response.status_code}: {response.text[:200]}")
+    except Exception as e:
+        print(f"API call error: {e}")
+    return ""
+def create_heuristic_title(topic_words: List[str], idx: int) -> str:
+    """
+    Fallback function to create simple titles when AI is not available.
+    Args:
+        topic_words: List of words from the topic
+        idx: Topic index
+    Returns:
+        Simple descriptive title
+    """
+    if not topic_words or len(topic_words) < 3:
+        return f"Topic {idx + 1}: General Discussion"
+    # Simple translation for common French words (you can expand this)
+    french_to_english = {
+        'loin': 'distance', 'non': 'no', 'garçon': 'boy/guy',
+        'sortie': 'outing', 'boulot': 'work', 'finir': 'finish/complete',
+        'rapidement': 'quickly', 'studio': 'studio', 'place': 'place/spot',
+        'homme': 'man', 'taire': 'quiet/silence', 'métro': 'subway/metro',
+        'prévoir': 'plan', 'venir': 'come', 'travail': 'work',
+        'projet': 'project', 'temps': 'time', 'matin': 'morning',
+        'personne': 'person', 'groupe': 'group', 'plan': 'plan',
+        'transport': 'transportation'
+    }
+    # Translate first 3 words
+    translated = []
+    for word in topic_words[:3]:
+        if word in french_to_english:
+            translated.append(french_to_english[word])
+        else:
+            translated.append(word)
+    return f"Topic {idx + 1}: {', '.join(translated)}"
+def get_cache_key(topics: List[List[str]], api_type: str, model_name: str = "") -> str:
+    """
+    Generate a cache key for topics.
+    """
+    topics_str = json.dumps(topics, sort_keys=True)
+    cache_str = f"{topics_str}_{api_type}_{model_name}"
+    return hashlib.md5(cache_str.encode()).hexdigest()
+def load_cache(cache_file: str = "topic_cache.json") -> dict:
+    """
+    Load cache from file.
+    """
+    if os.path.exists(cache_file):
+        try:
+            with open(cache_file, 'r') as f:
+                return json.load(f)
+        except:
+            return {}
+    return {}
+def save_cache(cache: dict, cache_file: str = "topic_cache.json"):
+    """
+    Save cache to file.
+    """
+    try:
+        with open(cache_file, 'w') as f:
+            json.dump(cache, f, indent=2)
+    except Exception as e:
+        print(f"Warning: Could not save cache: {e}")
+def generate_topic_titles_with_ai(
+    topics: List[List[str]],
+    api_type: str = "huggingface",
+    model_name: str = None,
+    api_key: str = None,
+    use_cache: bool = True,
+    cache_file: str = "topic_cache.json",
+    language_hint: str = None
+) -> List[str]:
+    """
+    Generate meaningful topic titles using AI APIs with caching support.
+    Args:
+        topics: List of topics, where each topic is a list of words
+        api_type: "huggingface", "gemini", "openai", or "local"
+        model_name: Specific model to use (optional)
+        api_key: API key (can also be set as environment variable)
+        use_cache: Whether to cache results to avoid repeated API calls
+        cache_file: File to store cache
+        language_hint: Hint about the language (auto-detected if None)
+    Returns:
+        List of generated topic titles
+    """
+    if not topics:
+        return []
+    # For local mode, use heuristic titles
+    if api_type == "local":
+        titles = []
+        for idx, topic in enumerate(topics):
+            titles.append(create_heuristic_title(topic, idx))
+        return titles
+    # Check cache first
+    if use_cache:
+        cache = load_cache(cache_file)
+        cache_key = get_cache_key(topics, api_type, model_name or "")
+        if cache_key in cache:
+            print(f"Using cached topic titles for {cache_key}")
+            return cache[cache_key]
+    titles = []
+    for idx, topic in enumerate(topics):
+        if not isinstance(topic, list) or len(topic) < 3:
+            # Fallback to simple title
+            title = f"Topic {idx + 1}: General Discussion"
+            titles.append(title)
+            continue
+        # Auto-detect language if not provided
+        if not language_hint:
+            language_hint = detect_language_from_words(topic)
+        # Create prompt for this topic
+        prompt = create_topic_title_prompt(topic, idx, language_hint)
+        try:
+            if api_type == "huggingface":
+                title = call_huggingface_api(prompt, model_name, api_key)
+            elif api_type == "gemini":
+                title = call_gemini_api(prompt, model_name, api_key)
+            elif api_type == "openai":
+                title = call_openai_compatible_api(prompt, model_name, api_key)
+            else:
+                raise ValueError(f"Unknown API type: {api_type}")
+            # If AI didn't generate a good title, use fallback
+            if not title or len(title) < 5:
+                title = create_heuristic_title(topic, idx)
+        except Exception as e:
+            print(f"Error generating title for topic {idx + 1}: {e}")
+            title = create_heuristic_title(topic, idx)
+        # Add to timestamp for debugging
+        timestamp = datetime.now().strftime("%H:%M:%S")
+        print(f"[{timestamp}] Topic {idx + 1}: {title}")
+        titles.append(title)
+    # Save to cache
+    if use_cache and titles:
+        cache = load_cache(cache_file)
+        cache_key = get_cache_key(topics, api_type, model_name or "")
+        cache[cache_key] = titles
+        save_cache(cache, cache_file)
+    return titles
+# Main function for backward compatibility
+def generate_topic_titles(
+    topics,
+    api_type="huggingface",
+    hf_token=None,
+    model_name=None,
+    use_cache=True,
+    **kwargs
+):
+    """
+    Main wrapper function with backward compatibility.
+    Args:
+        topics: List of topics
+        api_type: "local", "huggingface", "gemini", or "openai"
+        hf_token: For backward compatibility (Hugging Face token)
+        model_name: Specific model to use
+        use_cache: Whether to use caching
+        **kwargs: Additional parameters
+    Returns:
+        List of topic titles
+    """
+    api_key = hf_token or kwargs.get('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')
+    language_hint = kwargs.get('language_hint')
+    return generate_topic_titles_with_ai(
+        topics=topics,
+        api_type="huggingface",
+        model_name="mistralai/mixtral-8x7b-instruct",
+        api_key=api_key,
+        use_cache=use_cache,
+        language_hint=language_hint
+    )