""" AI Provider Abstraction Layer for Transcriptinator Supports multiple AI providers: Gemini and HuggingFace """ from abc import ABC, abstractmethod from typing import Dict, List import google.generativeai as genai import requests class TranscriptionProvider(ABC): """Base class for AI transcription providers""" @abstractmethod def transcribe(self, audio_file_path: str) -> str: """Generate transcription from audio file""" pass @abstractmethod def generate_summary(self, text: str) -> str: """Generate summary from transcription text""" pass @abstractmethod def generate_key_ideas(self, text: str) -> List[Dict[str, str]]: """Extract key ideas from transcription text""" pass class GeminiProvider(TranscriptionProvider): """Google Gemini provider with configurable models""" AVAILABLE_MODELS = { "Gemini 2.5 Flash": "models/gemini-2.5-flash", "Gemini 2.0 Flash": "models/gemini-2.0-flash-exp", "Gemini 1.5 Flash": "models/gemini-1.5-flash" } def __init__(self, api_key: str, model_name: str): self.api_key = api_key self.model_name = model_name genai.configure(api_key=api_key) self.model = genai.GenerativeModel(self.AVAILABLE_MODELS[model_name]) def transcribe(self, audio_file_path: str) -> str: """Generate transcription using Gemini API with timestamps and speakers""" try: with open(audio_file_path, "rb") as audio_file: audio_data = audio_file.read() contents = [ { "role": "user", "parts": [ { "mime_type": "audio/mp3", "data": audio_data }, "Create a clean transcription of the audio file in English. Tag timestamps and speakers separately within the transcription. If speakers can be identified, use their names; otherwise, use 'Speaker 1', 'Speaker 2', etc. **Return ONLY the raw transcription text, starting directly with the first line of the transcription.** Do not include any introductory phrases, speaker identification plans, completion messages, or any text other than the transcription itself." ] }, { "role": "model", "parts": [ "Understood. I will provide a clean, timestamped, and speaker-tagged transcription of the audio file, returning only the transcription text as requested." ] } ] response = self.model.generate_content(contents) return response.text except Exception as e: raise Exception(f"Error during Gemini transcription: {e}") def generate_summary(self, text: str) -> str: """Generate a concise 2-3 sentence summary using Gemini""" try: prompt_text = f""" Please read the following transcription text and write a concise summary of the main points in 2-3 sentences. Transcription Text: {text} Summary: """ response = self.model.generate_content(prompt_text) return response.text.strip() except Exception as e: return f"Error generating summary: {e}" def generate_key_ideas(self, text: str) -> List[Dict[str, str]]: """Identify 3-5 key ideas from the transcription using Gemini""" try: prompt_text = f""" Please read the following transcription text and identify 3-5 key ideas or concepts discussed. Return these key ideas as a bulleted list, with each item in the list being an idea followed by a short (1-sentence) description of the idea. Transcription Text: {text} Key Ideas: """ response = self.model.generate_content(prompt_text) key_ideas_text = response.text.strip() key_ideas_list = [] for item in key_ideas_text.split('\n'): item = item.lstrip('-* ') if item: parts = item.split(':', 1) if len(parts) == 2: idea = parts[0].strip() description = parts[1].strip() key_ideas_list.append({'idea': idea, 'description': description}) else: key_ideas_list.append({'idea': item.strip(), 'description': ''}) return key_ideas_list except Exception as e: return [{'idea': 'Error generating key ideas', 'description': str(e)}] class OpenRouterProvider(TranscriptionProvider): """OpenRouter API provider for text generation (summary/key ideas)""" # Using DeepSeek R1 - excellent free model for reasoning and text generation MODEL_ID = "deepseek/deepseek-r1-0528:free" API_URL = "https://openrouter.ai/api/v1/chat/completions" def __init__(self, api_key: str, model_name: str = None): # model_name is ignored for OpenRouter since we use fixed DeepSeek R1 self.api_key = api_key def transcribe(self, audio_file_path: str) -> str: """Not supported - OpenRouter doesn't handle audio""" raise NotImplementedError("OpenRouter doesn't support audio transcription. Use Gemini provider.") def generate_summary(self, text: str) -> str: """Generate summary using OpenRouter DeepSeek R1""" try: # Truncate text if too long max_chars = 8000 text_to_summarize = text[:max_chars] if len(text) > max_chars else text headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" } payload = { "model": self.MODEL_ID, "messages": [ { "role": "user", "content": f"Please provide a concise 2-3 sentence summary of the following transcription:\n\n{text_to_summarize}" } ] } response = requests.post(self.API_URL, headers=headers, json=payload) # Handle errors if response.status_code != 200: return f"Summary unavailable: OpenRouter API error (status {response.status_code})" result = response.json() # Extract the response if "choices" in result and len(result["choices"]) > 0: return result["choices"][0]["message"]["content"].strip() return "Summary generation completed but format unexpected." except Exception as e: return f"Error generating summary: {e}" def generate_key_ideas(self, text: str) -> List[Dict[str, str]]: """Generate key ideas using OpenRouter DeepSeek R1""" try: # Truncate text if too long max_chars = 6000 text_to_analyze = text[:max_chars] if len(text) > max_chars else text headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" } payload = { "model": self.MODEL_ID, "messages": [ { "role": "user", "content": f"""Extract 3-5 key ideas from this transcription. Format each as: Idea: Brief title Description: One sentence explanation {text_to_analyze}""" } ] } response = requests.post(self.API_URL, headers=headers, json=payload) if response.status_code != 200: return [{'idea': 'Key ideas unavailable', 'description': f'OpenRouter API error (status {response.status_code})'}] result = response.json() # Extract and parse the response if "choices" in result and len(result["choices"]) > 0: content = result["choices"][0]["message"]["content"] # Parse the response into structured key ideas key_ideas_list = [] lines = content.split('\n') current_idea = None for line in lines: line = line.strip() if line.startswith(("Idea:", "**Idea:")): if current_idea: key_ideas_list.append(current_idea) idea_text = line.replace("Idea:", "").replace("**", "").strip() current_idea = {'idea': idea_text, 'description': ''} elif line.startswith(("Description:", "**Description:")) and current_idea: desc_text = line.replace("Description:", "").replace("**", "").strip() current_idea['description'] = desc_text elif ':' in line and not current_idea: # Fallback parsing parts = line.split(':', 1) if len(parts) == 2: key_ideas_list.append({ 'idea': parts[0].strip('- •*123456789.').strip(), 'description': parts[1].strip() }) # Add last idea if exists if current_idea and current_idea['idea']: key_ideas_list.append(current_idea) # Fallback if parsing fails if not key_ideas_list: # Just use first few sentences sentences = [s.strip() for s in content.split('.') if s.strip()][:5] for i, sent in enumerate(sentences, 1): if sent: key_ideas_list.append({'idea': f'Key Point {i}', 'description': sent}) return key_ideas_list[:5] return [{'idea': 'Key ideas extraction', 'description': 'Unable to parse response'}] except Exception as e: return [{'idea': 'Error generating key ideas', 'description': str(e)}] def get_provider(provider_type: str, api_key: str, model_name: str) -> TranscriptionProvider: """Factory function to create appropriate provider""" if provider_type == "Gemini": return GeminiProvider(api_key, model_name) elif provider_type == "OpenRouter": return OpenRouterProvider(api_key, model_name) else: raise ValueError(f"Unknown provider: {provider_type}")