File size: 11,185 Bytes
7ee2bc7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
"""
AI Provider Abstraction Layer for Transcriptinator
Supports multiple AI providers: Gemini and HuggingFace
"""

from abc import ABC, abstractmethod
from typing import Dict, List
import google.generativeai as genai
import requests


class TranscriptionProvider(ABC):
    """Base class for AI transcription providers"""
    
    @abstractmethod
    def transcribe(self, audio_file_path: str) -> str:
        """Generate transcription from audio file"""
        pass
    
    @abstractmethod
    def generate_summary(self, text: str) -> str:
        """Generate summary from transcription text"""
        pass
    
    @abstractmethod
    def generate_key_ideas(self, text: str) -> List[Dict[str, str]]:
        """Extract key ideas from transcription text"""
        pass


class GeminiProvider(TranscriptionProvider):
    """Google Gemini provider with configurable models"""
    
    AVAILABLE_MODELS = {
        "Gemini 2.5 Flash": "models/gemini-2.5-flash",
        "Gemini 2.0 Flash": "models/gemini-2.0-flash-exp",
        "Gemini 1.5 Flash": "models/gemini-1.5-flash"
    }
    
    def __init__(self, api_key: str, model_name: str):
        self.api_key = api_key
        self.model_name = model_name
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel(self.AVAILABLE_MODELS[model_name])
    
    def transcribe(self, audio_file_path: str) -> str:
        """Generate transcription using Gemini API with timestamps and speakers"""
        try:
            with open(audio_file_path, "rb") as audio_file:
                audio_data = audio_file.read()
                
            contents = [
                {
                    "role": "user",
                    "parts": [
                        {
                            "mime_type": "audio/mp3",
                            "data": audio_data
                        },
                        "Create a clean transcription of the audio file in English. Tag timestamps and speakers separately within the transcription. If speakers can be identified, use their names; otherwise, use 'Speaker 1', 'Speaker 2', etc. **Return ONLY the raw transcription text, starting directly with the first line of the transcription.** Do not include any introductory phrases, speaker identification plans, completion messages, or any text other than the transcription itself."
                    ]
                },
                {
                    "role": "model",
                    "parts": [
                        "Understood. I will provide a clean, timestamped, and speaker-tagged transcription of the audio file, returning only the transcription text as requested."
                    ]
                }
            ]
            
            response = self.model.generate_content(contents)
            return response.text
            
        except Exception as e:
            raise Exception(f"Error during Gemini transcription: {e}")
    
    def generate_summary(self, text: str) -> str:
        """Generate a concise 2-3 sentence summary using Gemini"""
        try:
            prompt_text = f"""
            Please read the following transcription text and write a concise summary of the main points in 2-3 sentences.

            Transcription Text:
            {text}

            Summary:
            """
            
            response = self.model.generate_content(prompt_text)
            return response.text.strip()
            
        except Exception as e:
            return f"Error generating summary: {e}"
    
    def generate_key_ideas(self, text: str) -> List[Dict[str, str]]:
        """Identify 3-5 key ideas from the transcription using Gemini"""
        try:
            prompt_text = f"""
            Please read the following transcription text and identify 3-5 key ideas or concepts discussed.
            Return these key ideas as a bulleted list, with each item in the list being an idea followed by a short (1-sentence) description of the idea.

            Transcription Text:
            {text}

            Key Ideas:
            """
            
            response = self.model.generate_content(prompt_text)
            key_ideas_text = response.text.strip()
            
            key_ideas_list = []
            for item in key_ideas_text.split('\n'):
                item = item.lstrip('-* ')
                if item:
                    parts = item.split(':', 1)
                    if len(parts) == 2:
                        idea = parts[0].strip()
                        description = parts[1].strip()
                        key_ideas_list.append({'idea': idea, 'description': description})
                    else:
                        key_ideas_list.append({'idea': item.strip(), 'description': ''})
                        
            return key_ideas_list
            
        except Exception as e:
            return [{'idea': 'Error generating key ideas', 'description': str(e)}]


class OpenRouterProvider(TranscriptionProvider):
    """OpenRouter API provider for text generation (summary/key ideas)"""
    
    # Using DeepSeek R1 - excellent free model for reasoning and text generation
    MODEL_ID = "deepseek/deepseek-r1-0528:free"
    API_URL = "https://openrouter.ai/api/v1/chat/completions"
    
    def __init__(self, api_key: str, model_name: str = None):
        # model_name is ignored for OpenRouter since we use fixed DeepSeek R1
        self.api_key = api_key
    
    def transcribe(self, audio_file_path: str) -> str:
        """Not supported - OpenRouter doesn't handle audio"""
        raise NotImplementedError("OpenRouter doesn't support audio transcription. Use Gemini provider.")
    
    def generate_summary(self, text: str) -> str:
        """Generate summary using OpenRouter DeepSeek R1"""
        try:
            # Truncate text if too long
            max_chars = 8000
            text_to_summarize = text[:max_chars] if len(text) > max_chars else text
            
            headers = {
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json"
            }
            
            payload = {
                "model": self.MODEL_ID,
                "messages": [
                    {
                        "role": "user",
                        "content": f"Please provide a concise 2-3 sentence summary of the following transcription:\n\n{text_to_summarize}"
                    }
                ]
            }
            
            response = requests.post(self.API_URL, headers=headers, json=payload)
            
            # Handle errors
            if response.status_code != 200:
                return f"Summary unavailable: OpenRouter API error (status {response.status_code})"
            
            result = response.json()
            
            # Extract the response
            if "choices" in result and len(result["choices"]) > 0:
                return result["choices"][0]["message"]["content"].strip()
            
            return "Summary generation completed but format unexpected."
            
        except Exception as e:
            return f"Error generating summary: {e}"
    
    def generate_key_ideas(self, text: str) -> List[Dict[str, str]]:
        """Generate key ideas using OpenRouter DeepSeek R1"""
        try:
            # Truncate text if too long
            max_chars = 6000
            text_to_analyze = text[:max_chars] if len(text) > max_chars else text
            
            headers = {
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json"
            }
            
            payload = {
                "model": self.MODEL_ID,
                "messages": [
                    {
                        "role": "user",
                        "content": f"""Extract 3-5 key ideas from this transcription. Format each as:
Idea: Brief title
Description: One sentence explanation

{text_to_analyze}"""
                    }
                ]
            }
            
            response = requests.post(self.API_URL, headers=headers, json=payload)
            
            if response.status_code != 200:
                return [{'idea': 'Key ideas unavailable', 'description': f'OpenRouter API error (status {response.status_code})'}]
            
            result = response.json()
            
            # Extract and parse the response
            if "choices" in result and len(result["choices"]) > 0:
                content = result["choices"][0]["message"]["content"]
                
                # Parse the response into structured key ideas
                key_ideas_list = []
                lines = content.split('\n')
                
                current_idea = None
                for line in lines:
                    line = line.strip()
                    if line.startswith(("Idea:", "**Idea:")):
                        if current_idea:
                            key_ideas_list.append(current_idea)
                        idea_text = line.replace("Idea:", "").replace("**", "").strip()
                        current_idea = {'idea': idea_text, 'description': ''}
                    elif line.startswith(("Description:", "**Description:")) and current_idea:
                        desc_text = line.replace("Description:", "").replace("**", "").strip()
                        current_idea['description'] = desc_text
                    elif ':' in line and not current_idea:
                        # Fallback parsing
                        parts = line.split(':', 1)
                        if len(parts) == 2:
                            key_ideas_list.append({
                                'idea': parts[0].strip('- •*123456789.').strip(),
                                'description': parts[1].strip()
                            })
                
                # Add last idea if exists
                if current_idea and current_idea['idea']:
                    key_ideas_list.append(current_idea)
                
                # Fallback if parsing fails
                if not key_ideas_list:
                    # Just use first few sentences
                    sentences = [s.strip() for s in content.split('.') if s.strip()][:5]
                    for i, sent in enumerate(sentences, 1):
                        if sent:
                            key_ideas_list.append({'idea': f'Key Point {i}', 'description': sent})
                
                return key_ideas_list[:5]
            
            return [{'idea': 'Key ideas extraction', 'description': 'Unable to parse response'}]
            
        except Exception as e:
            return [{'idea': 'Error generating key ideas', 'description': str(e)}]


def get_provider(provider_type: str, api_key: str, model_name: str) -> TranscriptionProvider:
    """Factory function to create appropriate provider"""
    if provider_type == "Gemini":
        return GeminiProvider(api_key, model_name)
    elif provider_type == "OpenRouter":
        return OpenRouterProvider(api_key, model_name)
    else:
        raise ValueError(f"Unknown provider: {provider_type}")