Spaces:
Build error
Build error
| """ | |
| AI Provider Abstraction Layer for Transcriptinator | |
| Supports multiple AI providers: Gemini and HuggingFace | |
| """ | |
| from abc import ABC, abstractmethod | |
| from typing import Dict, List | |
| import google.generativeai as genai | |
| import requests | |
| class TranscriptionProvider(ABC): | |
| """Base class for AI transcription providers""" | |
| def transcribe(self, audio_file_path: str) -> str: | |
| """Generate transcription from audio file""" | |
| pass | |
| def generate_summary(self, text: str) -> str: | |
| """Generate summary from transcription text""" | |
| pass | |
| def generate_key_ideas(self, text: str) -> List[Dict[str, str]]: | |
| """Extract key ideas from transcription text""" | |
| pass | |
| class GeminiProvider(TranscriptionProvider): | |
| """Google Gemini provider with configurable models""" | |
| AVAILABLE_MODELS = { | |
| "Gemini 2.5 Flash": "models/gemini-2.5-flash", | |
| "Gemini 2.0 Flash": "models/gemini-2.0-flash-exp", | |
| "Gemini 1.5 Flash": "models/gemini-1.5-flash" | |
| } | |
| def __init__(self, api_key: str, model_name: str): | |
| self.api_key = api_key | |
| self.model_name = model_name | |
| genai.configure(api_key=api_key) | |
| self.model = genai.GenerativeModel(self.AVAILABLE_MODELS[model_name]) | |
| def transcribe(self, audio_file_path: str) -> str: | |
| """Generate transcription using Gemini API with timestamps and speakers""" | |
| try: | |
| with open(audio_file_path, "rb") as audio_file: | |
| audio_data = audio_file.read() | |
| contents = [ | |
| { | |
| "role": "user", | |
| "parts": [ | |
| { | |
| "mime_type": "audio/mp3", | |
| "data": audio_data | |
| }, | |
| "Create a clean transcription of the audio file in English. Tag timestamps and speakers separately within the transcription. If speakers can be identified, use their names; otherwise, use 'Speaker 1', 'Speaker 2', etc. **Return ONLY the raw transcription text, starting directly with the first line of the transcription.** Do not include any introductory phrases, speaker identification plans, completion messages, or any text other than the transcription itself." | |
| ] | |
| }, | |
| { | |
| "role": "model", | |
| "parts": [ | |
| "Understood. I will provide a clean, timestamped, and speaker-tagged transcription of the audio file, returning only the transcription text as requested." | |
| ] | |
| } | |
| ] | |
| response = self.model.generate_content(contents) | |
| return response.text | |
| except Exception as e: | |
| raise Exception(f"Error during Gemini transcription: {e}") | |
| def generate_summary(self, text: str) -> str: | |
| """Generate a concise 2-3 sentence summary using Gemini""" | |
| try: | |
| prompt_text = f""" | |
| Please read the following transcription text and write a concise summary of the main points in 2-3 sentences. | |
| Transcription Text: | |
| {text} | |
| Summary: | |
| """ | |
| response = self.model.generate_content(prompt_text) | |
| return response.text.strip() | |
| except Exception as e: | |
| return f"Error generating summary: {e}" | |
| def generate_key_ideas(self, text: str) -> List[Dict[str, str]]: | |
| """Identify 3-5 key ideas from the transcription using Gemini""" | |
| try: | |
| prompt_text = f""" | |
| Please read the following transcription text and identify 3-5 key ideas or concepts discussed. | |
| Return these key ideas as a bulleted list, with each item in the list being an idea followed by a short (1-sentence) description of the idea. | |
| Transcription Text: | |
| {text} | |
| Key Ideas: | |
| """ | |
| response = self.model.generate_content(prompt_text) | |
| key_ideas_text = response.text.strip() | |
| key_ideas_list = [] | |
| for item in key_ideas_text.split('\n'): | |
| item = item.lstrip('-* ') | |
| if item: | |
| parts = item.split(':', 1) | |
| if len(parts) == 2: | |
| idea = parts[0].strip() | |
| description = parts[1].strip() | |
| key_ideas_list.append({'idea': idea, 'description': description}) | |
| else: | |
| key_ideas_list.append({'idea': item.strip(), 'description': ''}) | |
| return key_ideas_list | |
| except Exception as e: | |
| return [{'idea': 'Error generating key ideas', 'description': str(e)}] | |
| class OpenRouterProvider(TranscriptionProvider): | |
| """OpenRouter API provider for text generation (summary/key ideas)""" | |
| # Using DeepSeek R1 - excellent free model for reasoning and text generation | |
| MODEL_ID = "deepseek/deepseek-r1-0528:free" | |
| API_URL = "https://openrouter.ai/api/v1/chat/completions" | |
| def __init__(self, api_key: str, model_name: str = None): | |
| # model_name is ignored for OpenRouter since we use fixed DeepSeek R1 | |
| self.api_key = api_key | |
| def transcribe(self, audio_file_path: str) -> str: | |
| """Not supported - OpenRouter doesn't handle audio""" | |
| raise NotImplementedError("OpenRouter doesn't support audio transcription. Use Gemini provider.") | |
| def generate_summary(self, text: str) -> str: | |
| """Generate summary using OpenRouter DeepSeek R1""" | |
| try: | |
| # Truncate text if too long | |
| max_chars = 8000 | |
| text_to_summarize = text[:max_chars] if len(text) > max_chars else text | |
| headers = { | |
| "Authorization": f"Bearer {self.api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| payload = { | |
| "model": self.MODEL_ID, | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": f"Please provide a concise 2-3 sentence summary of the following transcription:\n\n{text_to_summarize}" | |
| } | |
| ] | |
| } | |
| response = requests.post(self.API_URL, headers=headers, json=payload) | |
| # Handle errors | |
| if response.status_code != 200: | |
| return f"Summary unavailable: OpenRouter API error (status {response.status_code})" | |
| result = response.json() | |
| # Extract the response | |
| if "choices" in result and len(result["choices"]) > 0: | |
| return result["choices"][0]["message"]["content"].strip() | |
| return "Summary generation completed but format unexpected." | |
| except Exception as e: | |
| return f"Error generating summary: {e}" | |
| def generate_key_ideas(self, text: str) -> List[Dict[str, str]]: | |
| """Generate key ideas using OpenRouter DeepSeek R1""" | |
| try: | |
| # Truncate text if too long | |
| max_chars = 6000 | |
| text_to_analyze = text[:max_chars] if len(text) > max_chars else text | |
| headers = { | |
| "Authorization": f"Bearer {self.api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| payload = { | |
| "model": self.MODEL_ID, | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": f"""Extract 3-5 key ideas from this transcription. Format each as: | |
| Idea: Brief title | |
| Description: One sentence explanation | |
| {text_to_analyze}""" | |
| } | |
| ] | |
| } | |
| response = requests.post(self.API_URL, headers=headers, json=payload) | |
| if response.status_code != 200: | |
| return [{'idea': 'Key ideas unavailable', 'description': f'OpenRouter API error (status {response.status_code})'}] | |
| result = response.json() | |
| # Extract and parse the response | |
| if "choices" in result and len(result["choices"]) > 0: | |
| content = result["choices"][0]["message"]["content"] | |
| # Parse the response into structured key ideas | |
| key_ideas_list = [] | |
| lines = content.split('\n') | |
| current_idea = None | |
| for line in lines: | |
| line = line.strip() | |
| if line.startswith(("Idea:", "**Idea:")): | |
| if current_idea: | |
| key_ideas_list.append(current_idea) | |
| idea_text = line.replace("Idea:", "").replace("**", "").strip() | |
| current_idea = {'idea': idea_text, 'description': ''} | |
| elif line.startswith(("Description:", "**Description:")) and current_idea: | |
| desc_text = line.replace("Description:", "").replace("**", "").strip() | |
| current_idea['description'] = desc_text | |
| elif ':' in line and not current_idea: | |
| # Fallback parsing | |
| parts = line.split(':', 1) | |
| if len(parts) == 2: | |
| key_ideas_list.append({ | |
| 'idea': parts[0].strip('- •*123456789.').strip(), | |
| 'description': parts[1].strip() | |
| }) | |
| # Add last idea if exists | |
| if current_idea and current_idea['idea']: | |
| key_ideas_list.append(current_idea) | |
| # Fallback if parsing fails | |
| if not key_ideas_list: | |
| # Just use first few sentences | |
| sentences = [s.strip() for s in content.split('.') if s.strip()][:5] | |
| for i, sent in enumerate(sentences, 1): | |
| if sent: | |
| key_ideas_list.append({'idea': f'Key Point {i}', 'description': sent}) | |
| return key_ideas_list[:5] | |
| return [{'idea': 'Key ideas extraction', 'description': 'Unable to parse response'}] | |
| except Exception as e: | |
| return [{'idea': 'Error generating key ideas', 'description': str(e)}] | |
| def get_provider(provider_type: str, api_key: str, model_name: str) -> TranscriptionProvider: | |
| """Factory function to create appropriate provider""" | |
| if provider_type == "Gemini": | |
| return GeminiProvider(api_key, model_name) | |
| elif provider_type == "OpenRouter": | |
| return OpenRouterProvider(api_key, model_name) | |
| else: | |
| raise ValueError(f"Unknown provider: {provider_type}") | |