syncmaster8 / exporter.py
aseelflihan's picture
Initial commit without node_modules
33d3592
# exporter.py - Broadcast Export Engine for SyncMaster Enhanced
import os
import time
from datetime import datetime
from typing import List, Dict, Optional, Tuple, Any
from dataclasses import dataclass
import tempfile
import json
# Document generation
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.ns import qn
# Google Docs integration
try:
from googleapiclient.discovery import build
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
GOOGLE_DOCS_AVAILABLE = True
except ImportError:
GOOGLE_DOCS_AVAILABLE = False
@dataclass
class ExportConfig:
"""Configuration for export operations"""
export_timestamp: int # Unix timestamp in milliseconds
format_type: str # 'word' or 'google_docs'
include_summary: bool # Whether to include AI summary
ui_language: str # 'ar' or 'en' for interface
target_language: str # Translation language for summary
@dataclass
class ExportContent:
"""Structured content for export"""
title: str
export_time: str
segments: List[Dict[str, Any]]
summary: Optional[str]
metadata: Dict[str, Any]
class BroadcastExporter:
"""
Main export engine for SyncMaster broadcast content
"""
def __init__(self, translator_instance=None):
self.translator = translator_instance
self.supported_formats = ['word', 'google_docs']
# Google Docs configuration
self.google_scopes = ['https://www.googleapis.com/auth/documents']
self.google_creds = None
# UI translations
self.ui_texts = {
'ar': {
'title': 'محاضرة - تصدير البرودكاست',
'export_time': 'وقت التصدير',
'broadcast_section': 'البرودكاست المُصدر',
'summary_section': 'الملخص',
'original_text': 'النص الأصلي',
'translation': 'الترجمة',
'time_range': 'المدى الزمني',
'model_used': 'النموذج المستخدم'
},
'en': {
'title': 'Lecture - Broadcast Export',
'export_time': 'Export Time',
'broadcast_section': 'Exported Broadcast',
'summary_section': 'Summary',
'original_text': 'Original Text',
'translation': 'Translation',
'time_range': 'Time Range',
'model_used': 'Model Used'
}
}
def filter_segments_from_timestamp(self, segments: List[Dict], export_timestamp: int) -> List[Dict]:
"""
Filter broadcast segments from export timestamp
Args:
segments: List of broadcast segments
export_timestamp: Timestamp in milliseconds
Returns:
Filtered list of segments after the export timestamp
"""
if not segments:
return []
filtered_segments = []
for segment in segments:
# Check if segment starts after export timestamp
segment_start = segment.get('start_ms', 0)
if segment_start >= export_timestamp:
filtered_segments.append(segment)
# Sort by start time (oldest first for export)
filtered_segments.sort(key=lambda s: s.get('start_ms', 0))
return filtered_segments
def prepare_export_content(self, segments: List[Dict], config: ExportConfig) -> ExportContent:
"""
Prepare structured content for export
Args:
segments: Filtered broadcast segments
config: Export configuration
Returns:
Structured export content
"""
ui_lang = config.ui_language
texts = self.ui_texts.get(ui_lang, self.ui_texts['en'])
# Create title with timestamp
export_datetime = datetime.fromtimestamp(config.export_timestamp / 1000)
title = f"{texts['title']} - {export_datetime.strftime('%Y-%m-%d %H:%M:%S')}"
# Format export time
export_time = export_datetime.strftime('%Y-%m-%d %H:%M:%S')
# Generate summary if requested
summary = None
if config.include_summary and segments and self.translator:
summary = self._generate_export_summary(segments, config.target_language)
# Prepare metadata
metadata = {
'export_timestamp': config.export_timestamp,
'segment_count': len(segments),
'ui_language': ui_lang,
'target_language': config.target_language,
'generated_at': datetime.now().isoformat()
}
return ExportContent(
title=title,
export_time=export_time,
segments=segments,
summary=summary,
metadata=metadata
)
def export_to_word(self, content: ExportContent, config: ExportConfig) -> Tuple[str, Optional[str]]:
"""
Generate Word document from export content
Args:
content: Structured export content
config: Export configuration
Returns:
Tuple of (file_path, error_message)
"""
try:
doc = Document()
ui_lang = config.ui_language
texts = self.ui_texts.get(ui_lang, self.ui_texts['en'])
# Set document direction for Arabic
if ui_lang == 'ar':
sections = doc.sections
for section in sections:
sectPr = section._sectPr
sectPr.set(qn('w:bidi'), '1')
# Title
title_para = doc.add_heading(content.title, level=1)
if ui_lang == 'ar':
title_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
# Export time
time_para = doc.add_paragraph(f"{texts['export_time']}: {content.export_time}")
if ui_lang == 'ar':
time_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
doc.add_paragraph("=" * 50)
# Broadcast section
broadcast_heading = doc.add_heading(texts['broadcast_section'], level=2)
if ui_lang == 'ar':
broadcast_heading.alignment = WD_ALIGN_PARAGRAPH.RIGHT
doc.add_paragraph("=" * 50)
# Add segments
for segment in content.segments:
# Time range
start_time = segment.get('start_ms', 0) / 1000
end_time = segment.get('end_ms', 0) / 1000
time_range = f"[{start_time:.2f}s → {end_time:.2f}s]"
time_para = doc.add_paragraph()
time_run = time_para.add_run(f"{texts['time_range']}: {time_range}")
time_run.bold = True
if ui_lang == 'ar':
time_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
# Original text
original_text = segment.get('text', '')
if original_text:
orig_para = doc.add_paragraph()
orig_run = orig_para.add_run(f"{texts['original_text']}: ")
orig_run.bold = True
orig_para.add_run(original_text)
if ui_lang == 'ar':
orig_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
# Translation
translations = segment.get('translations', {})
if translations:
for lang_code, translation in translations.items():
if translation:
trans_para = doc.add_paragraph()
trans_run = trans_para.add_run(f"{texts['translation']} ({lang_code.upper()}): ")
trans_run.bold = True
trans_para.add_run(translation)
if ui_lang == 'ar':
trans_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
# Model used
model_used = segment.get('transcription_model')
if model_used:
model_para = doc.add_paragraph(f"{texts['model_used']}: {model_used}")
model_para.style = 'Caption'
if ui_lang == 'ar':
model_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
doc.add_paragraph("-" * 30)
# Summary section
if content.summary:
doc.add_page_break()
summary_heading = doc.add_heading(texts['summary_section'], level=2)
if ui_lang == 'ar':
summary_heading.alignment = WD_ALIGN_PARAGRAPH.RIGHT
doc.add_paragraph("=" * 50)
summary_para = doc.add_paragraph(content.summary)
if ui_lang == 'ar':
summary_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
# Save document
timestamp = int(time.time())
filename = f"broadcast_export_{timestamp}.docx"
temp_dir = tempfile.gettempdir()
file_path = os.path.join(temp_dir, filename)
doc.save(file_path)
return file_path, None
except Exception as e:
return None, f"Error generating Word document: {str(e)}"
def export_to_google_docs(self, content: ExportContent, config: ExportConfig, google_auth) -> Tuple[Optional[str], Optional[str]]:
"""
Create Google Docs document from export content
Args:
content: Structured export content
config: Export configuration
google_auth: GoogleDocsAuth instance
Returns:
Tuple of (document_url, error_message)
"""
if not GOOGLE_DOCS_AVAILABLE:
return None, "Google Docs integration not available. Please install required packages."
try:
# Import here to avoid circular imports
from google_docs_config import prepare_google_docs_content
# Prepare content requests
content_requests = prepare_google_docs_content(content, config)
# Create document using GoogleDocsAuth
doc_url, error = google_auth.create_document(content.title, content_requests)
if error:
return None, error
return doc_url, None
except Exception as e:
return None, f"Error creating Google Docs document: {str(e)}"
def _generate_export_summary(self, segments: List[Dict], target_language: str = 'ar') -> Optional[str]:
"""Generate summary for export content"""
if not self.translator or not segments:
return None
try:
# Combine all segment texts
combined_text = " ".join([
segment.get('text', '') for segment in segments
if segment.get('text')
])
if not combined_text.strip():
return None
# Generate summary using translator
if hasattr(self.translator, 'summarize_text'):
summary, error = self.translator.summarize_text(combined_text, target_language)
return summary if summary else None
elif hasattr(self.translator, 'summarize_text_arabic'):
summary, error = self.translator.summarize_text_arabic(combined_text)
return summary if summary else None
except Exception:
pass
return None
def export_with_fallback(self, content: ExportContent, config: ExportConfig, google_auth=None) -> Tuple[Optional[str], Optional[str]]:
"""
Export with automatic fallback handling
Args:
content: Export content
config: Export configuration
google_auth: GoogleDocsAuth instance (optional)
Returns:
Tuple of (result_path_or_url, error_message)
"""
try:
if config.format_type == 'google_docs' and google_auth:
result, error = self.export_to_google_docs(content, config, google_auth)
if result:
return result, None
# Fallback to Word if Google Docs fails
config.format_type = 'word'
# Export to Word
if config.format_type == 'word':
return self.export_to_word(content, config)
return None, f"Unsupported export format: {config.format_type}"
except Exception as e:
return None, f"Export failed: {str(e)}"