Spaces:

ikram98ai
/

language_detector

Sleeping

App Files Files Community

language_detector / app.py

ikram98ai

adding progress

1b741b9 2 months ago

raw

history blame contribute delete

13 kB

	"""
	Multi-Language Detection Demo with Gradio and OpenAI
	Supports text, audio, and video input for language detection
	"""

	import gradio as gr
	import openai
	from openai import OpenAI
	import json
	import os
	from pydantic import BaseModel
	import tempfile
	import base64
	from moviepy import VideoFileClip
	# Initialize OpenAI client (will be set with API key from UI)
	client = None

	# Model configuration
	GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"


	def get_client(api_key, model_name):
	"""Get appropriate OpenAI client based on model type"""
	if model_name.lower().startswith("gemini"):
	return OpenAI(
	api_key=api_key,
	base_url=GEMINI_BASE_URL
	)
	else:
	return OpenAI(api_key=api_key)


	def extract_audio_from_video(video_path):
	"""Extract audio from video file using moviepy"""
	try:
	video = VideoFileClip(video_path)
	audio_path = tempfile.mktemp(suffix=".mp3")
	video.audio.write_audiofile(audio_path, codec='libmp3lame')
	return audio_path
	except Exception as e:
	raise Exception(f"Failed to extract audio from video: {str(e)}")


	def transcribe_audio(api_key, audio_path, model_name="gemini-2.5-flash"):
	"""Transcribe audio using OpenAI Whisper or Gemini"""
	global client

	if model_name.lower().startswith("gemini"):
	# Use Gemini audio understanding
	client = get_client(api_key, model_name)

	try:
	with open(audio_path, "rb") as audio_file:
	base64_audio = base64.b64encode(audio_file.read()).decode('utf-8')

	# Determine audio format from file extension
	audio_format = audio_path.split('.')[-1].lower()
	if audio_format == "mp3":
	audio_format = "mp3"
	elif audio_format in ["wav", "webm", "ogg"]:
	audio_format = audio_format
	else:
	audio_format = "mp3" # Default to mp3

	response = client.chat.completions.create(
	model=model_name,
	messages=[
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": "Transcribe this audio file exactly as spoken. Return only the transcription text, nothing else."
	},
	{
	"type": "input_audio",
	"input_audio": {
	"data": base64_audio,
	"format": audio_format
	}
	}
	]
	}
	]
	)
	return response.choices[0].message.content.strip()
	except Exception as e:
	raise Exception(f"Gemini transcription failed: {str(e)}")
	else:
	# Use Whisper for OpenAI models
	client = get_client(api_key, model_name)

	try:
	with open(audio_path, "rb") as audio_file:
	transcript = client.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file,
	response_format="text"
	)
	return transcript
	except Exception as e:
	raise Exception(f"Whisper transcription failed: {str(e)}")


	class Language(BaseModel):
	code: str
	name: str
	percentage: float
	sample: str

	class LanguageDetection(BaseModel):
	languages: list[Language]
	primary_language: str
	is_multilingual: bool
	confidence: str
	LanguageDetection.model_rebuild()

	def detect_language(api_key, text, model_name="gemini-2.5-flash")->LanguageDetection:
	"""Detect language(s) in text using OpenAI GPT or Gemini"""
	global client
	client = get_client(api_key, model_name)

	prompt = f"""Analyze the following text and identify all languages present.
	If multiple languages are detected, provide the percentage breakdown.

	Respond ONLY with valid JSON in this exact format (no markdown, no code blocks):
	{{
	"languages": [
	{{
	"code": "en",
	"name": "English",
	"percentage": 100,
	"sample": "sample text from the language"
	}}
	],
	"primary_language": "en",
	"is_multilingual": false,
	"confidence": "high"
	}}

	Text to analyze:
	{text}"""

	try:
	response = client.beta.chat.completions.parse(
	model=model_name,
	messages=[
	{"role": "system", "content": "You are a language detection expert. Always respond with valid JSON only."},
	{"role": "user", "content": prompt}
	],
	response_format=LanguageDetection,
	temperature=0.1,
	max_tokens=1000
	)

	result = response.choices[0].message.parsed


	return result
	except json.JSONDecodeError as e:
	raise Exception(f"Failed to parse language detection response: {str(e)}")
	except Exception as e:
	raise Exception(f"Language detection failed: {str(e)}")


	def format_results(detection_result:LanguageDetection, transcribed_text=None):
	"""Format detection results for display"""
	output = "# 🌍 Language Detection Results\n\n"

	if detection_result.is_multilingual:
	output += "📊 Status: Multiple languages detected\n\n"
	else:
	output += "📊 Status: Single language detected\n\n"

	output += f"🎯 Primary Language: {detection_result.primary_language}\n\n"
	output += f"✅ Confidence: {detection_result.confidence}\n\n"

	output += "---\n\n## Detected Languages:\n\n"

	for lang in detection_result.languages:
	output += f"### {lang.name} ({lang.code})\n"
	output += f"- Percentage: {lang.percentage}%\n"
	if lang.sample:
	output += f"- Sample: \"{lang.sample}\"\n"
	output += "\n"

	if transcribed_text:
	output += "---\n\n## 📝 Transcribed Text:\n\n"
	output += f"```\n{transcribed_text}\n```\n"

	return output


	def process_text_input(api_key, text, model_name):
	"""Process text input for language detection (provides progress updates)"""
	if not api_key:
	yield "❌ Error: Please enter your API key", ""
	return

	if not text or not text.strip():
	yield "❌ Error: Please enter some text to analyze", ""
	return

	try:
	yield "🔍 Starting language detection... (10%)", ""
	result = detect_language(api_key, text, model_name)
	yield "🧠 Analyzing results... (70%)", ""
	formatted = format_results(result)
	yield "✅ Done (100%)", formatted
	except Exception as e:
	yield f"❌ Error: {str(e)}", ""


	def process_audio_input(api_key, audio_file, model_name):
	"""Process audio input for transcription and language detection (provides progress updates)"""
	if not api_key:
	yield "❌ Error: Please enter your API key", ""
	return

	if audio_file is None:
	yield "❌ Error: Please upload an audio file", ""
	return

	try:
	yield "🎧 Upload received. Starting transcription... (10%)", ""
	transcribed_text = transcribe_audio(api_key, audio_file, model_name)
	yield "📝 Transcription complete. Detecting language... (60%)", ""
	result = detect_language(api_key, transcribed_text, model_name)
	yield "🧾 Analysis complete. Formatting results... (90%)", ""
	formatted = format_results(result, transcribed_text)
	yield "✅ Done (100%)", formatted
	except Exception as e:
	yield f"❌ Error: {str(e)}", ""


	def process_video_input(api_key, video_file, model_name):
	"""Process video input by extracting audio, transcribing, and detecting language (provides progress updates)"""
	if not api_key:
	yield "❌ Error: Please enter your API key", ""
	return

	if video_file is None:
	yield "❌ Error: Please upload a video file", ""
	return

	audio_path = None
	try:
	yield "🎬 Received video. Extracting audio... (10%)", ""
	audio_path = extract_audio_from_video(video_file)
	yield "🎧 Audio extracted. Starting transcription... (40%)", ""
	transcribed_text = transcribe_audio(api_key, audio_path, model_name)
	yield "📝 Transcription complete. Detecting language... (70%)", ""
	result = detect_language(api_key, transcribed_text, model_name)
	yield "🧾 Analysis complete. Formatting results... (90%)", ""
	formatted = format_results(result, transcribed_text)
	yield "✅ Done (100%)", formatted
	except Exception as e:
	yield f"❌ Error: {str(e)}", ""
	finally:
	# Clean up temporary audio file
	if audio_path and os.path.exists(audio_path):
	try:
	os.remove(audio_path)
	except:
	pass


	# Create Gradio interface
	with gr.Blocks(theme=gr.themes.Soft(), title="Multi-Language Detector") as demo:
	gr.Markdown("""
	# 🌍 Multi-Language Detector
	Detect and distinguish multiple languages from text, audio, or video input using OpenAI or Gemini APIs.
	""")

	# API Key input
	with gr.Row():
	api_key_input = gr.Textbox(
	label="API Key",
	placeholder="sk-... (OpenAI) or GEMINI_API_KEY",
	type="password",
	info="Enter your OpenAI API key or Gemini API key"
	)
	# Model selector (supports GPT and Gemini families)
	model_selector = gr.Dropdown(
	label="Model",
	choices=["gpt-4", "gpt-4o", "gemini-2.5-flash", "gemini-2.5-pro"],
	value="gemini-2.5-flash",
	info="Choose model. Gemini models use Google's API endpoint."
	)

	gr.Markdown("---")

	# Create tabs for different input types
	with gr.Tabs():
	# Text Input Tab
	with gr.Tab("📝 Text Input"):
	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	label="Enter Text",
	placeholder="Type or paste text in any language...",
	lines=8
	)
	text_button = gr.Button("🔍 Detect Language", variant="primary")
	with gr.Column():
	text_progress = gr.Markdown(value="", label="Progress")
	text_output = gr.Markdown(label="Results")

	# Examples
	gr.Examples(
	examples=[
	["Hello, how are you today?"],
	["Bonjour! Comment allez-vous?"],
	["こんにちは、お元気ですか？"],
	["Hola, ¿cómo estás? Hello, how are you?"],
	["Привет! مرحبا! 你好！"]
	],
	inputs=text_input,
	label="Example Texts"
	)

	# Audio Input Tab
	with gr.Tab("🎤 Audio Input"):
	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(
	label="Upload Audio File",
	type="filepath"
	)
	audio_button = gr.Button("🔍 Transcribe & Detect Language", variant="primary")
	with gr.Column():
	audio_progress = gr.Markdown(value="", label="Progress")
	audio_output = gr.Markdown(label="Results")

	# Video Input Tab
	with gr.Tab("🎥 Video Input"):
	with gr.Row():
	with gr.Column():
	video_input = gr.Video(
	label="Upload Video File"
	)
	video_button = gr.Button("🔍 Extract Audio, Transcribe & Detect", variant="primary")
	with gr.Column():
	video_progress = gr.Markdown(value="", label="Progress")
	video_output = gr.Markdown(label="Results")

	# Set up event handlers
	text_button.click(
	fn=process_text_input,
	inputs=[api_key_input, text_input, model_selector],
	outputs=[text_progress, text_output]
	)

	audio_button.click(
	fn=process_audio_input,
	inputs=[api_key_input, audio_input, model_selector],
	outputs=[audio_progress, audio_output]
	)

	video_button.click(
	fn=process_video_input,
	inputs=[api_key_input, video_input, model_selector],
	outputs=[video_progress, video_output]
	)

	# Launch the demo
	if __name__ == "__main__":
	demo.launch(share=False)