File size: 2,369 Bytes
c0c9f39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""Voice Module - AI description + ElevenLabs TTS"""

import logging
from typing import Tuple, Optional
from pathlib import Path

from ..config import get_config
from ..core.analyzer import CodeAnalyzer
from .elevenlabs import VoiceNarrator

logger = logging.getLogger("codeatlas.voice")

NARRATION_PROMPT = """Analyze this architecture diagram and provide a brief, conversational summary suitable for audio narration. 
Keep it under 200 words. Focus on what the codebase does, key components and their relationships, and the overall architecture pattern.
Provide a natural, spoken summary (no bullet points, no markdown)."""


def generate_audio_summary(
    dot_source: str,
    gemini_api_key: Optional[str] = None,
    elevenlabs_api_key: Optional[str] = None,
    model_name: Optional[str] = None,
    voice_id: Optional[str] = None,
) -> Tuple[Optional[Path], str]:
    config = get_config()
    gemini_key = gemini_api_key or config.gemini_api_key
    elevenlabs_key = elevenlabs_api_key or config.elevenlabs_api_key
    
    if not elevenlabs_key:
        return None, "⚠️ ElevenLabs API key not set. Go to Settings."
    if not gemini_key:
        return None, "⚠️ Gemini API key not set. Go to Settings."
    if not dot_source:
        return None, "⚠️ No diagram loaded. Generate or load a diagram first."
    
    try:
        logger.info("Generating description for audio...")
        analyzer = CodeAnalyzer(api_key=gemini_key, model_name="gemini-2.0-flash")
        
        prompt = f"{NARRATION_PROMPT}\n\nDOT diagram:\n```\n{dot_source}\n```"
        result = analyzer.chat(prompt, "", None)
        
        if not result.success or not result.content:
            return None, f"⚠️ Failed to generate description: {result.error or 'Empty response'}"
        
        logger.info(f"Generated description: {len(result.content)} chars")
        
        narrator = VoiceNarrator(api_key=elevenlabs_key)
        if not narrator.available:
            return None, "⚠️ ElevenLabs not available"
        
        audio_path, error = narrator.generate(result.content, voice_id)
        
        if error:
            return None, f"❌ {error}"
        
        return audio_path, "✅ Audio generated!"
        
    except Exception as e:
        logger.exception("Audio generation failed")
        return None, f"❌ Error: {str(e)}"