Spaces:

aseelflihan
/

syncmaster8

Sleeping

App Files Files Community

syncmaster8 / exporter.py

aseelflihan

Initial commit without node_modules

33d3592 8 months ago

raw

history blame contribute delete

13.4 kB

	# exporter.py - Broadcast Export Engine for SyncMaster Enhanced

	import os
	import time
	from datetime import datetime
	from typing import List, Dict, Optional, Tuple, Any
	from dataclasses import dataclass
	import tempfile
	import json

	# Document generation
	from docx import Document
	from docx.shared import Inches, Pt
	from docx.enum.text import WD_ALIGN_PARAGRAPH
	from docx.oxml.ns import qn

	# Google Docs integration
	try:
	from googleapiclient.discovery import build
	from google.auth.transport.requests import Request
	from google.oauth2.credentials import Credentials
	from google_auth_oauthlib.flow import InstalledAppFlow
	GOOGLE_DOCS_AVAILABLE = True
	except ImportError:
	GOOGLE_DOCS_AVAILABLE = False

	@dataclass
	class ExportConfig:
	"""Configuration for export operations"""
	export_timestamp: int # Unix timestamp in milliseconds
	format_type: str # 'word' or 'google_docs'
	include_summary: bool # Whether to include AI summary
	ui_language: str # 'ar' or 'en' for interface
	target_language: str # Translation language for summary

	@dataclass
	class ExportContent:
	"""Structured content for export"""
	title: str
	export_time: str
	segments: List[Dict[str, Any]]
	summary: Optional[str]
	metadata: Dict[str, Any]

	class BroadcastExporter:
	"""
	Main export engine for SyncMaster broadcast content
	"""

	def __init__(self, translator_instance=None):
	self.translator = translator_instance
	self.supported_formats = ['word', 'google_docs']

	# Google Docs configuration
	self.google_scopes = ['https://www.googleapis.com/auth/documents']
	self.google_creds = None

	# UI translations
	self.ui_texts = {
	'ar': {
	'title': 'محاضرة - تصدير البرودكاست',
	'export_time': 'وقت التصدير',
	'broadcast_section': 'البرودكاست المُصدر',
	'summary_section': 'الملخص',
	'original_text': 'النص الأصلي',
	'translation': 'الترجمة',
	'time_range': 'المدى الزمني',
	'model_used': 'النموذج المستخدم'
	},
	'en': {
	'title': 'Lecture - Broadcast Export',
	'export_time': 'Export Time',
	'broadcast_section': 'Exported Broadcast',
	'summary_section': 'Summary',
	'original_text': 'Original Text',
	'translation': 'Translation',
	'time_range': 'Time Range',
	'model_used': 'Model Used'
	}
	}

	def filter_segments_from_timestamp(self, segments: List[Dict], export_timestamp: int) -> List[Dict]:
	"""
	Filter broadcast segments from export timestamp

	Args:
	segments: List of broadcast segments
	export_timestamp: Timestamp in milliseconds

	Returns:
	Filtered list of segments after the export timestamp
	"""
	if not segments:
	return []

	filtered_segments = []
	for segment in segments:
	# Check if segment starts after export timestamp
	segment_start = segment.get('start_ms', 0)
	if segment_start >= export_timestamp:
	filtered_segments.append(segment)

	# Sort by start time (oldest first for export)
	filtered_segments.sort(key=lambda s: s.get('start_ms', 0))
	return filtered_segments

	def prepare_export_content(self, segments: List[Dict], config: ExportConfig) -> ExportContent:
	"""
	Prepare structured content for export

	Args:
	segments: Filtered broadcast segments
	config: Export configuration

	Returns:
	Structured export content
	"""
	ui_lang = config.ui_language
	texts = self.ui_texts.get(ui_lang, self.ui_texts['en'])

	# Create title with timestamp
	export_datetime = datetime.fromtimestamp(config.export_timestamp / 1000)
	title = f"{texts['title']} - {export_datetime.strftime('%Y-%m-%d %H:%M:%S')}"

	# Format export time
	export_time = export_datetime.strftime('%Y-%m-%d %H:%M:%S')

	# Generate summary if requested
	summary = None
	if config.include_summary and segments and self.translator:
	summary = self._generate_export_summary(segments, config.target_language)

	# Prepare metadata
	metadata = {
	'export_timestamp': config.export_timestamp,
	'segment_count': len(segments),
	'ui_language': ui_lang,
	'target_language': config.target_language,
	'generated_at': datetime.now().isoformat()
	}

	return ExportContent(
	title=title,
	export_time=export_time,
	segments=segments,
	summary=summary,
	metadata=metadata
	)

	def export_to_word(self, content: ExportContent, config: ExportConfig) -> Tuple[str, Optional[str]]:
	"""
	Generate Word document from export content

	Args:
	content: Structured export content
	config: Export configuration

	Returns:
	Tuple of (file_path, error_message)
	"""
	try:
	doc = Document()
	ui_lang = config.ui_language
	texts = self.ui_texts.get(ui_lang, self.ui_texts['en'])

	# Set document direction for Arabic
	if ui_lang == 'ar':
	sections = doc.sections
	for section in sections:
	sectPr = section._sectPr
	sectPr.set(qn('w:bidi'), '1')

	# Title
	title_para = doc.add_heading(content.title, level=1)
	if ui_lang == 'ar':
	title_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT

	# Export time
	time_para = doc.add_paragraph(f"{texts['export_time']}: {content.export_time}")
	if ui_lang == 'ar':
	time_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT

	doc.add_paragraph("=" * 50)

	# Broadcast section
	broadcast_heading = doc.add_heading(texts['broadcast_section'], level=2)
	if ui_lang == 'ar':
	broadcast_heading.alignment = WD_ALIGN_PARAGRAPH.RIGHT

	doc.add_paragraph("=" * 50)

	# Add segments
	for segment in content.segments:
	# Time range
	start_time = segment.get('start_ms', 0) / 1000
	end_time = segment.get('end_ms', 0) / 1000
	time_range = f"[{start_time:.2f}s → {end_time:.2f}s]"

	time_para = doc.add_paragraph()
	time_run = time_para.add_run(f"{texts['time_range']}: {time_range}")
	time_run.bold = True
	if ui_lang == 'ar':
	time_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT

	# Original text
	original_text = segment.get('text', '')
	if original_text:
	orig_para = doc.add_paragraph()
	orig_run = orig_para.add_run(f"{texts['original_text']}: ")
	orig_run.bold = True
	orig_para.add_run(original_text)
	if ui_lang == 'ar':
	orig_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT

	# Translation
	translations = segment.get('translations', {})
	if translations:
	for lang_code, translation in translations.items():
	if translation:
	trans_para = doc.add_paragraph()
	trans_run = trans_para.add_run(f"{texts['translation']} ({lang_code.upper()}): ")
	trans_run.bold = True
	trans_para.add_run(translation)
	if ui_lang == 'ar':
	trans_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT

	# Model used
	model_used = segment.get('transcription_model')
	if model_used:
	model_para = doc.add_paragraph(f"{texts['model_used']}: {model_used}")
	model_para.style = 'Caption'
	if ui_lang == 'ar':
	model_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT

	doc.add_paragraph("-" * 30)

	# Summary section
	if content.summary:
	doc.add_page_break()
	summary_heading = doc.add_heading(texts['summary_section'], level=2)
	if ui_lang == 'ar':
	summary_heading.alignment = WD_ALIGN_PARAGRAPH.RIGHT

	doc.add_paragraph("=" * 50)

	summary_para = doc.add_paragraph(content.summary)
	if ui_lang == 'ar':
	summary_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT

	# Save document
	timestamp = int(time.time())
	filename = f"broadcast_export_{timestamp}.docx"
	temp_dir = tempfile.gettempdir()
	file_path = os.path.join(temp_dir, filename)

	doc.save(file_path)
	return file_path, None

	except Exception as e:
	return None, f"Error generating Word document: {str(e)}"

	def export_to_google_docs(self, content: ExportContent, config: ExportConfig, google_auth) -> Tuple[Optional[str], Optional[str]]:
	"""
	Create Google Docs document from export content

	Args:
	content: Structured export content
	config: Export configuration
	google_auth: GoogleDocsAuth instance

	Returns:
	Tuple of (document_url, error_message)
	"""
	if not GOOGLE_DOCS_AVAILABLE:
	return None, "Google Docs integration not available. Please install required packages."

	try:
	# Import here to avoid circular imports
	from google_docs_config import prepare_google_docs_content

	# Prepare content requests
	content_requests = prepare_google_docs_content(content, config)

	# Create document using GoogleDocsAuth
	doc_url, error = google_auth.create_document(content.title, content_requests)

	if error:
	return None, error

	return doc_url, None

	except Exception as e:
	return None, f"Error creating Google Docs document: {str(e)}"

	def _generate_export_summary(self, segments: List[Dict], target_language: str = 'ar') -> Optional[str]:
	"""Generate summary for export content"""
	if not self.translator or not segments:
	return None

	try:
	# Combine all segment texts
	combined_text = " ".join([
	segment.get('text', '') for segment in segments
	if segment.get('text')
	])

	if not combined_text.strip():
	return None

	# Generate summary using translator
	if hasattr(self.translator, 'summarize_text'):
	summary, error = self.translator.summarize_text(combined_text, target_language)
	return summary if summary else None
	elif hasattr(self.translator, 'summarize_text_arabic'):
	summary, error = self.translator.summarize_text_arabic(combined_text)
	return summary if summary else None

	except Exception:
	pass

	return None



	def export_with_fallback(self, content: ExportContent, config: ExportConfig, google_auth=None) -> Tuple[Optional[str], Optional[str]]:
	"""
	Export with automatic fallback handling

	Args:
	content: Export content
	config: Export configuration
	google_auth: GoogleDocsAuth instance (optional)

	Returns:
	Tuple of (result_path_or_url, error_message)
	"""
	try:
	if config.format_type == 'google_docs' and google_auth:
	result, error = self.export_to_google_docs(content, config, google_auth)
	if result:
	return result, None
	# Fallback to Word if Google Docs fails
	config.format_type = 'word'

	# Export to Word
	if config.format_type == 'word':
	return self.export_to_word(content, config)

	return None, f"Unsupported export format: {config.format_type}"

	except Exception as e:
	return None, f"Export failed: {str(e)}"