Spaces:

emmajeed
/

transcriptinator_v2

Build error

App Files Files Community

transcriptinator_v2 / ai_providers.py

emmajeed

Upload 5 files

7ee2bc7 verified 5 months ago

raw

history blame contribute delete

11.2 kB

	"""
	AI Provider Abstraction Layer for Transcriptinator
	Supports multiple AI providers: Gemini and HuggingFace
	"""

	from abc import ABC, abstractmethod
	from typing import Dict, List
	import google.generativeai as genai
	import requests


	class TranscriptionProvider(ABC):
	"""Base class for AI transcription providers"""

	@abstractmethod
	def transcribe(self, audio_file_path: str) -> str:
	"""Generate transcription from audio file"""
	pass

	@abstractmethod
	def generate_summary(self, text: str) -> str:
	"""Generate summary from transcription text"""
	pass

	@abstractmethod
	def generate_key_ideas(self, text: str) -> List[Dict[str, str]]:
	"""Extract key ideas from transcription text"""
	pass


	class GeminiProvider(TranscriptionProvider):
	"""Google Gemini provider with configurable models"""

	AVAILABLE_MODELS = {
	"Gemini 2.5 Flash": "models/gemini-2.5-flash",
	"Gemini 2.0 Flash": "models/gemini-2.0-flash-exp",
	"Gemini 1.5 Flash": "models/gemini-1.5-flash"
	}

	def __init__(self, api_key: str, model_name: str):
	self.api_key = api_key
	self.model_name = model_name
	genai.configure(api_key=api_key)
	self.model = genai.GenerativeModel(self.AVAILABLE_MODELS[model_name])

	def transcribe(self, audio_file_path: str) -> str:
	"""Generate transcription using Gemini API with timestamps and speakers"""
	try:
	with open(audio_file_path, "rb") as audio_file:
	audio_data = audio_file.read()

	contents = [
	{
	"role": "user",
	"parts": [
	{
	"mime_type": "audio/mp3",
	"data": audio_data
	},
	"Create a clean transcription of the audio file in English. Tag timestamps and speakers separately within the transcription. If speakers can be identified, use their names; otherwise, use 'Speaker 1', 'Speaker 2', etc. Return ONLY the raw transcription text, starting directly with the first line of the transcription. Do not include any introductory phrases, speaker identification plans, completion messages, or any text other than the transcription itself."
	]
	},
	{
	"role": "model",
	"parts": [
	"Understood. I will provide a clean, timestamped, and speaker-tagged transcription of the audio file, returning only the transcription text as requested."
	]
	}
	]

	response = self.model.generate_content(contents)
	return response.text

	except Exception as e:
	raise Exception(f"Error during Gemini transcription: {e}")

	def generate_summary(self, text: str) -> str:
	"""Generate a concise 2-3 sentence summary using Gemini"""
	try:
	prompt_text = f"""
	Please read the following transcription text and write a concise summary of the main points in 2-3 sentences.

	Transcription Text:
	{text}

	Summary:
	"""

	response = self.model.generate_content(prompt_text)
	return response.text.strip()

	except Exception as e:
	return f"Error generating summary: {e}"

	def generate_key_ideas(self, text: str) -> List[Dict[str, str]]:
	"""Identify 3-5 key ideas from the transcription using Gemini"""
	try:
	prompt_text = f"""
	Please read the following transcription text and identify 3-5 key ideas or concepts discussed.
	Return these key ideas as a bulleted list, with each item in the list being an idea followed by a short (1-sentence) description of the idea.

	Transcription Text:
	{text}

	Key Ideas:
	"""

	response = self.model.generate_content(prompt_text)
	key_ideas_text = response.text.strip()

	key_ideas_list = []
	for item in key_ideas_text.split('\n'):
	item = item.lstrip('-* ')
	if item:
	parts = item.split(':', 1)
	if len(parts) == 2:
	idea = parts[0].strip()
	description = parts[1].strip()
	key_ideas_list.append({'idea': idea, 'description': description})
	else:
	key_ideas_list.append({'idea': item.strip(), 'description': ''})

	return key_ideas_list

	except Exception as e:
	return [{'idea': 'Error generating key ideas', 'description': str(e)}]


	class OpenRouterProvider(TranscriptionProvider):
	"""OpenRouter API provider for text generation (summary/key ideas)"""

	# Using DeepSeek R1 - excellent free model for reasoning and text generation
	MODEL_ID = "deepseek/deepseek-r1-0528:free"
	API_URL = "https://openrouter.ai/api/v1/chat/completions"

	def __init__(self, api_key: str, model_name: str = None):
	# model_name is ignored for OpenRouter since we use fixed DeepSeek R1
	self.api_key = api_key

	def transcribe(self, audio_file_path: str) -> str:
	"""Not supported - OpenRouter doesn't handle audio"""
	raise NotImplementedError("OpenRouter doesn't support audio transcription. Use Gemini provider.")

	def generate_summary(self, text: str) -> str:
	"""Generate summary using OpenRouter DeepSeek R1"""
	try:
	# Truncate text if too long
	max_chars = 8000
	text_to_summarize = text[:max_chars] if len(text) > max_chars else text

	headers = {
	"Authorization": f"Bearer {self.api_key}",
	"Content-Type": "application/json"
	}

	payload = {
	"model": self.MODEL_ID,
	"messages": [
	{
	"role": "user",
	"content": f"Please provide a concise 2-3 sentence summary of the following transcription:\n\n{text_to_summarize}"
	}
	]
	}

	response = requests.post(self.API_URL, headers=headers, json=payload)

	# Handle errors
	if response.status_code != 200:
	return f"Summary unavailable: OpenRouter API error (status {response.status_code})"

	result = response.json()

	# Extract the response
	if "choices" in result and len(result["choices"]) > 0:
	return result["choices"][0]["message"]["content"].strip()

	return "Summary generation completed but format unexpected."

	except Exception as e:
	return f"Error generating summary: {e}"

	def generate_key_ideas(self, text: str) -> List[Dict[str, str]]:
	"""Generate key ideas using OpenRouter DeepSeek R1"""
	try:
	# Truncate text if too long
	max_chars = 6000
	text_to_analyze = text[:max_chars] if len(text) > max_chars else text

	headers = {
	"Authorization": f"Bearer {self.api_key}",
	"Content-Type": "application/json"
	}

	payload = {
	"model": self.MODEL_ID,
	"messages": [
	{
	"role": "user",
	"content": f"""Extract 3-5 key ideas from this transcription. Format each as:
	Idea: Brief title
	Description: One sentence explanation

	{text_to_analyze}"""
	}
	]
	}

	response = requests.post(self.API_URL, headers=headers, json=payload)

	if response.status_code != 200:
	return [{'idea': 'Key ideas unavailable', 'description': f'OpenRouter API error (status {response.status_code})'}]

	result = response.json()

	# Extract and parse the response
	if "choices" in result and len(result["choices"]) > 0:
	content = result["choices"][0]["message"]["content"]

	# Parse the response into structured key ideas
	key_ideas_list = []
	lines = content.split('\n')

	current_idea = None
	for line in lines:
	line = line.strip()
	if line.startswith(("Idea:", "**Idea:")):
	if current_idea:
	key_ideas_list.append(current_idea)
	idea_text = line.replace("Idea:", "").replace("**", "").strip()
	current_idea = {'idea': idea_text, 'description': ''}
	elif line.startswith(("Description:", "**Description:")) and current_idea:
	desc_text = line.replace("Description:", "").replace("**", "").strip()
	current_idea['description'] = desc_text
	elif ':' in line and not current_idea:
	# Fallback parsing
	parts = line.split(':', 1)
	if len(parts) == 2:
	key_ideas_list.append({
	'idea': parts[0].strip('- •*123456789.').strip(),
	'description': parts[1].strip()
	})

	# Add last idea if exists
	if current_idea and current_idea['idea']:
	key_ideas_list.append(current_idea)

	# Fallback if parsing fails
	if not key_ideas_list:
	# Just use first few sentences
	sentences = [s.strip() for s in content.split('.') if s.strip()][:5]
	for i, sent in enumerate(sentences, 1):
	if sent:
	key_ideas_list.append({'idea': f'Key Point {i}', 'description': sent})

	return key_ideas_list[:5]

	return [{'idea': 'Key ideas extraction', 'description': 'Unable to parse response'}]

	except Exception as e:
	return [{'idea': 'Error generating key ideas', 'description': str(e)}]


	def get_provider(provider_type: str, api_key: str, model_name: str) -> TranscriptionProvider:
	"""Factory function to create appropriate provider"""
	if provider_type == "Gemini":
	return GeminiProvider(api_key, model_name)
	elif provider_type == "OpenRouter":
	return OpenRouterProvider(api_key, model_name)
	else:
	raise ValueError(f"Unknown provider: {provider_type}")