Spaces:
Sleeping
Sleeping
| # exporter.py - Broadcast Export Engine for SyncMaster Enhanced | |
| import os | |
| import time | |
| from datetime import datetime | |
| from typing import List, Dict, Optional, Tuple, Any | |
| from dataclasses import dataclass | |
| import tempfile | |
| import json | |
| # Document generation | |
| from docx import Document | |
| from docx.shared import Inches, Pt | |
| from docx.enum.text import WD_ALIGN_PARAGRAPH | |
| from docx.oxml.ns import qn | |
| # Google Docs integration | |
| try: | |
| from googleapiclient.discovery import build | |
| from google.auth.transport.requests import Request | |
| from google.oauth2.credentials import Credentials | |
| from google_auth_oauthlib.flow import InstalledAppFlow | |
| GOOGLE_DOCS_AVAILABLE = True | |
| except ImportError: | |
| GOOGLE_DOCS_AVAILABLE = False | |
| class ExportConfig: | |
| """Configuration for export operations""" | |
| export_timestamp: int # Unix timestamp in milliseconds | |
| format_type: str # 'word' or 'google_docs' | |
| include_summary: bool # Whether to include AI summary | |
| ui_language: str # 'ar' or 'en' for interface | |
| target_language: str # Translation language for summary | |
| class ExportContent: | |
| """Structured content for export""" | |
| title: str | |
| export_time: str | |
| segments: List[Dict[str, Any]] | |
| summary: Optional[str] | |
| metadata: Dict[str, Any] | |
| class BroadcastExporter: | |
| """ | |
| Main export engine for SyncMaster broadcast content | |
| """ | |
| def __init__(self, translator_instance=None): | |
| self.translator = translator_instance | |
| self.supported_formats = ['word', 'google_docs'] | |
| # Google Docs configuration | |
| self.google_scopes = ['https://www.googleapis.com/auth/documents'] | |
| self.google_creds = None | |
| # UI translations | |
| self.ui_texts = { | |
| 'ar': { | |
| 'title': 'محاضرة - تصدير البرودكاست', | |
| 'export_time': 'وقت التصدير', | |
| 'broadcast_section': 'البرودكاست المُصدر', | |
| 'summary_section': 'الملخص', | |
| 'original_text': 'النص الأصلي', | |
| 'translation': 'الترجمة', | |
| 'time_range': 'المدى الزمني', | |
| 'model_used': 'النموذج المستخدم' | |
| }, | |
| 'en': { | |
| 'title': 'Lecture - Broadcast Export', | |
| 'export_time': 'Export Time', | |
| 'broadcast_section': 'Exported Broadcast', | |
| 'summary_section': 'Summary', | |
| 'original_text': 'Original Text', | |
| 'translation': 'Translation', | |
| 'time_range': 'Time Range', | |
| 'model_used': 'Model Used' | |
| } | |
| } | |
| def filter_segments_from_timestamp(self, segments: List[Dict], export_timestamp: int) -> List[Dict]: | |
| """ | |
| Filter broadcast segments from export timestamp | |
| Args: | |
| segments: List of broadcast segments | |
| export_timestamp: Timestamp in milliseconds | |
| Returns: | |
| Filtered list of segments after the export timestamp | |
| """ | |
| if not segments: | |
| return [] | |
| filtered_segments = [] | |
| for segment in segments: | |
| # Check if segment starts after export timestamp | |
| segment_start = segment.get('start_ms', 0) | |
| if segment_start >= export_timestamp: | |
| filtered_segments.append(segment) | |
| # Sort by start time (oldest first for export) | |
| filtered_segments.sort(key=lambda s: s.get('start_ms', 0)) | |
| return filtered_segments | |
| def prepare_export_content(self, segments: List[Dict], config: ExportConfig) -> ExportContent: | |
| """ | |
| Prepare structured content for export | |
| Args: | |
| segments: Filtered broadcast segments | |
| config: Export configuration | |
| Returns: | |
| Structured export content | |
| """ | |
| ui_lang = config.ui_language | |
| texts = self.ui_texts.get(ui_lang, self.ui_texts['en']) | |
| # Create title with timestamp | |
| export_datetime = datetime.fromtimestamp(config.export_timestamp / 1000) | |
| title = f"{texts['title']} - {export_datetime.strftime('%Y-%m-%d %H:%M:%S')}" | |
| # Format export time | |
| export_time = export_datetime.strftime('%Y-%m-%d %H:%M:%S') | |
| # Generate summary if requested | |
| summary = None | |
| if config.include_summary and segments and self.translator: | |
| summary = self._generate_export_summary(segments, config.target_language) | |
| # Prepare metadata | |
| metadata = { | |
| 'export_timestamp': config.export_timestamp, | |
| 'segment_count': len(segments), | |
| 'ui_language': ui_lang, | |
| 'target_language': config.target_language, | |
| 'generated_at': datetime.now().isoformat() | |
| } | |
| return ExportContent( | |
| title=title, | |
| export_time=export_time, | |
| segments=segments, | |
| summary=summary, | |
| metadata=metadata | |
| ) | |
| def export_to_word(self, content: ExportContent, config: ExportConfig) -> Tuple[str, Optional[str]]: | |
| """ | |
| Generate Word document from export content | |
| Args: | |
| content: Structured export content | |
| config: Export configuration | |
| Returns: | |
| Tuple of (file_path, error_message) | |
| """ | |
| try: | |
| doc = Document() | |
| ui_lang = config.ui_language | |
| texts = self.ui_texts.get(ui_lang, self.ui_texts['en']) | |
| # Set document direction for Arabic | |
| if ui_lang == 'ar': | |
| sections = doc.sections | |
| for section in sections: | |
| sectPr = section._sectPr | |
| sectPr.set(qn('w:bidi'), '1') | |
| # Title | |
| title_para = doc.add_heading(content.title, level=1) | |
| if ui_lang == 'ar': | |
| title_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT | |
| # Export time | |
| time_para = doc.add_paragraph(f"{texts['export_time']}: {content.export_time}") | |
| if ui_lang == 'ar': | |
| time_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT | |
| doc.add_paragraph("=" * 50) | |
| # Broadcast section | |
| broadcast_heading = doc.add_heading(texts['broadcast_section'], level=2) | |
| if ui_lang == 'ar': | |
| broadcast_heading.alignment = WD_ALIGN_PARAGRAPH.RIGHT | |
| doc.add_paragraph("=" * 50) | |
| # Add segments | |
| for segment in content.segments: | |
| # Time range | |
| start_time = segment.get('start_ms', 0) / 1000 | |
| end_time = segment.get('end_ms', 0) / 1000 | |
| time_range = f"[{start_time:.2f}s → {end_time:.2f}s]" | |
| time_para = doc.add_paragraph() | |
| time_run = time_para.add_run(f"{texts['time_range']}: {time_range}") | |
| time_run.bold = True | |
| if ui_lang == 'ar': | |
| time_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT | |
| # Original text | |
| original_text = segment.get('text', '') | |
| if original_text: | |
| orig_para = doc.add_paragraph() | |
| orig_run = orig_para.add_run(f"{texts['original_text']}: ") | |
| orig_run.bold = True | |
| orig_para.add_run(original_text) | |
| if ui_lang == 'ar': | |
| orig_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT | |
| # Translation | |
| translations = segment.get('translations', {}) | |
| if translations: | |
| for lang_code, translation in translations.items(): | |
| if translation: | |
| trans_para = doc.add_paragraph() | |
| trans_run = trans_para.add_run(f"{texts['translation']} ({lang_code.upper()}): ") | |
| trans_run.bold = True | |
| trans_para.add_run(translation) | |
| if ui_lang == 'ar': | |
| trans_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT | |
| # Model used | |
| model_used = segment.get('transcription_model') | |
| if model_used: | |
| model_para = doc.add_paragraph(f"{texts['model_used']}: {model_used}") | |
| model_para.style = 'Caption' | |
| if ui_lang == 'ar': | |
| model_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT | |
| doc.add_paragraph("-" * 30) | |
| # Summary section | |
| if content.summary: | |
| doc.add_page_break() | |
| summary_heading = doc.add_heading(texts['summary_section'], level=2) | |
| if ui_lang == 'ar': | |
| summary_heading.alignment = WD_ALIGN_PARAGRAPH.RIGHT | |
| doc.add_paragraph("=" * 50) | |
| summary_para = doc.add_paragraph(content.summary) | |
| if ui_lang == 'ar': | |
| summary_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT | |
| # Save document | |
| timestamp = int(time.time()) | |
| filename = f"broadcast_export_{timestamp}.docx" | |
| temp_dir = tempfile.gettempdir() | |
| file_path = os.path.join(temp_dir, filename) | |
| doc.save(file_path) | |
| return file_path, None | |
| except Exception as e: | |
| return None, f"Error generating Word document: {str(e)}" | |
| def export_to_google_docs(self, content: ExportContent, config: ExportConfig, google_auth) -> Tuple[Optional[str], Optional[str]]: | |
| """ | |
| Create Google Docs document from export content | |
| Args: | |
| content: Structured export content | |
| config: Export configuration | |
| google_auth: GoogleDocsAuth instance | |
| Returns: | |
| Tuple of (document_url, error_message) | |
| """ | |
| if not GOOGLE_DOCS_AVAILABLE: | |
| return None, "Google Docs integration not available. Please install required packages." | |
| try: | |
| # Import here to avoid circular imports | |
| from google_docs_config import prepare_google_docs_content | |
| # Prepare content requests | |
| content_requests = prepare_google_docs_content(content, config) | |
| # Create document using GoogleDocsAuth | |
| doc_url, error = google_auth.create_document(content.title, content_requests) | |
| if error: | |
| return None, error | |
| return doc_url, None | |
| except Exception as e: | |
| return None, f"Error creating Google Docs document: {str(e)}" | |
| def _generate_export_summary(self, segments: List[Dict], target_language: str = 'ar') -> Optional[str]: | |
| """Generate summary for export content""" | |
| if not self.translator or not segments: | |
| return None | |
| try: | |
| # Combine all segment texts | |
| combined_text = " ".join([ | |
| segment.get('text', '') for segment in segments | |
| if segment.get('text') | |
| ]) | |
| if not combined_text.strip(): | |
| return None | |
| # Generate summary using translator | |
| if hasattr(self.translator, 'summarize_text'): | |
| summary, error = self.translator.summarize_text(combined_text, target_language) | |
| return summary if summary else None | |
| elif hasattr(self.translator, 'summarize_text_arabic'): | |
| summary, error = self.translator.summarize_text_arabic(combined_text) | |
| return summary if summary else None | |
| except Exception: | |
| pass | |
| return None | |
| def export_with_fallback(self, content: ExportContent, config: ExportConfig, google_auth=None) -> Tuple[Optional[str], Optional[str]]: | |
| """ | |
| Export with automatic fallback handling | |
| Args: | |
| content: Export content | |
| config: Export configuration | |
| google_auth: GoogleDocsAuth instance (optional) | |
| Returns: | |
| Tuple of (result_path_or_url, error_message) | |
| """ | |
| try: | |
| if config.format_type == 'google_docs' and google_auth: | |
| result, error = self.export_to_google_docs(content, config, google_auth) | |
| if result: | |
| return result, None | |
| # Fallback to Word if Google Docs fails | |
| config.format_type = 'word' | |
| # Export to Word | |
| if config.format_type == 'word': | |
| return self.export_to_word(content, config) | |
| return None, f"Unsupported export format: {config.format_type}" | |
| except Exception as e: | |
| return None, f"Export failed: {str(e)}" |