Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import tempfile | |
| import base64 | |
| import re | |
| import socket | |
| import os | |
| from pathlib import Path | |
| from typing import Optional, Tuple | |
| import threading | |
| import time | |
| import atexit | |
| # Output cleanup configuration | |
| OUTPUTS_DIR = Path(__file__).parent / 'outputs' | |
| OUTPUT_CLEANUP_TTL = 24 * 3600 # seconds, default 24 hours | |
| OUTPUT_CLEANUP_MAX_FILES = 500 # keep at most this many files | |
| OUTPUT_CLEANUP_INTERVAL = 60 * 60 # in seconds, run cleanup every hour | |
| def _cleanup_outputs(out_dir: Path = None, max_files: int = None, ttl: int = None): | |
| """Delete old files in `out_dir` older than `ttl` seconds and keep at most | |
| `max_files` newest files. If parameters are None, use module defaults.""" | |
| if out_dir is None: | |
| out_dir = OUTPUTS_DIR | |
| if not out_dir.exists(): | |
| return | |
| if max_files is None: | |
| max_files = OUTPUT_CLEANUP_MAX_FILES | |
| if ttl is None: | |
| ttl = OUTPUT_CLEANUP_TTL | |
| now = time.time() | |
| files = [p for p in out_dir.iterdir() if p.is_file()] | |
| # Remove files older than ttl | |
| for p in files: | |
| try: | |
| if now - p.stat().st_mtime > ttl: | |
| p.unlink() | |
| except Exception: | |
| pass | |
| # Re-list and trim to max_files | |
| files = sorted([p for p in out_dir.iterdir() if p.is_file()], key=lambda p: p.stat().st_mtime, reverse=True) | |
| if len(files) > max_files: | |
| for p in files[max_files:]: | |
| try: | |
| p.unlink() | |
| except Exception: | |
| pass | |
| def _cleanup_all_on_exit(): | |
| """Remove all files in outputs folder on process exit.""" | |
| try: | |
| if OUTPUTS_DIR.exists(): | |
| for p in OUTPUTS_DIR.iterdir(): | |
| try: | |
| if p.is_file(): | |
| p.unlink() | |
| except Exception: | |
| pass | |
| except Exception: | |
| pass | |
| def _start_periodic_cleanup(): | |
| def _worker(): | |
| while True: | |
| try: | |
| _cleanup_outputs(OUTPUTS_DIR) | |
| except Exception: | |
| pass | |
| time.sleep(OUTPUT_CLEANUP_INTERVAL) | |
| t = threading.Thread(target=_worker, daemon=True, name='outputs-cleaner') | |
| t.start() | |
| # Ensure outputs dir exists and start background cleaner; register atexit | |
| OUTPUTS_DIR.mkdir(parents=True, exist_ok=True) | |
| _start_periodic_cleanup() | |
| atexit.register(_cleanup_all_on_exit) | |
| # Ensure the bundled native binary is executable at runtime. This helps when | |
| # files are uploaded via the Hub API or otherwise lose their exec bit. | |
| try: | |
| os.chmod(Path(__file__).parent / "modulo1y2" / "modulo1y2", 0o755) | |
| except Exception: | |
| pass | |
| from eu_phonemizer_v2 import Phonemizer, PhonemizerError | |
| def _read_uploaded_file(file_obj) -> str: | |
| if not file_obj: | |
| return "" | |
| # gradio will provide a temporary file path | |
| p = Path(file_obj.name) if hasattr(file_obj, "name") else Path(file_obj) | |
| try: | |
| return p.read_text(encoding='utf-8') | |
| except Exception: | |
| return p.read_text(encoding='ISO-8859-15') | |
| def process(text: str, | |
| uploaded_file, | |
| language: str, | |
| symbol: str, | |
| separate_phonemes: bool) -> Tuple[str, Optional[str], str, Optional[str]]: | |
| """Process either text input or uploaded txt file and return (text_output, download_file_path) | |
| If the user uploaded a file, the function will return the path to a tmp file | |
| suitable for download as the second return value and an empty text output. | |
| If the user provided text in the box, the function will return the phonemes | |
| as text and also a downloadable txt file containing the same output. | |
| """ | |
| # Prefer uploaded file if present | |
| source_text = "" | |
| is_file_input = False | |
| if uploaded_file: | |
| source_text = _read_uploaded_file(uploaded_file) | |
| is_file_input = True | |
| else: | |
| source_text = text or "" | |
| # Try to instantiate Phonemizer using repo-local modulo1y2 and dicts | |
| try: | |
| phon = Phonemizer(language=language, symbol=symbol) | |
| except PhonemizerError as e: | |
| if language == 'eu': | |
| err = f"Ezin izan da fonemizadorea hasi: {e}\nEgiaztatu 'modulo1y2' eta 'dict' karpetak." | |
| else: | |
| err = f"No se pudo inicializar el fonemizador: {e}\nComprueba las carpetas 'modulo1y2' y 'dict'." | |
| # Return 6 outputs matching the UI: result text, file, normalized text, norm file, ph_path, norm_path | |
| return err, None, "", None, "", "" | |
| except Exception as e: | |
| if language == 'eu': | |
| return f"Hasieratze errore ezezaguna: {e}", None, "", None, "", "" | |
| return f"Error inesperado al inicializar: {e}", None | |
| # Normalize then get phonemes. Run normalization per original input line so the | |
| # external normalizer doesn't insert extra newlines across sentences and | |
| # we preserve the user's original line boundaries. | |
| try: | |
| lines = source_text.split('\n') | |
| normalized_lines = [] | |
| for ln in lines: | |
| if not ln.strip(): | |
| normalized_lines.append('') | |
| else: | |
| # normalize each line independently, collapse any internal newlines | |
| # produced by the external normalizer, collapse multiple whitespace | |
| # (this avoids producing double spaces when the normalizer inserts | |
| # a '\n' while the original text already had a space), and strip | |
| norm_line = phon.normalize(ln) | |
| norm_line = norm_line.replace('\n', ' ') | |
| norm_line = re.sub(r"\s+", ' ', norm_line).strip() | |
| normalized_lines.append(norm_line) | |
| normalized = '\n'.join(normalized_lines) | |
| phonemes = phon.getPhonemes(normalized, separate_phonemes=separate_phonemes) | |
| # Defensive cleanup: if any '|' separators remain, replace them with single spaces | |
| if isinstance(phonemes, str) and '|' in phonemes: | |
| phonemes = re.sub(r"\s*\|\s*", " ", phonemes) | |
| except PhonemizerError as e: | |
| if language == 'eu': | |
| msg = f"Fonemizazio errorea: {e}" | |
| else: | |
| msg = f"Error del fonemizador: {e}" | |
| return msg, None, "", None, "", "" | |
| except Exception as e: | |
| if language == 'eu': | |
| msg = f"Errore ezezaguna prozesatzean: {e}" | |
| else: | |
| msg = f"Error inesperado al procesar: {e}" | |
| return msg, None, "", None, "", "" | |
| # Create persistent downloadable files under outputs/ so the browser can reliably | |
| # download them using Gradio's `gr.File` component (avoid ephemeral tmp files | |
| # that some browsers may not fetch correctly). | |
| out_dir = Path(__file__).parent / 'outputs' | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| from datetime import datetime | |
| ts = datetime.now().strftime('%Y%m%d_%H%M%S') | |
| ph_file = out_dir / f'phonemes_{ts}.txt' | |
| norm_file = out_dir / f'normalized_{ts}.txt' | |
| ph_file.write_text(phonemes, encoding='utf-8') | |
| norm_file.write_text(normalized, encoding='utf-8') | |
| # Cleanup old files opportunistically after creating new ones | |
| try: | |
| _cleanup_outputs(out_dir) | |
| except Exception: | |
| pass | |
| # Return phonemes and normalized text in all cases (text or uploaded file) | |
| # so users who upload a .txt can see the processed text inline and download it. | |
| return phonemes, str(ph_file), normalized, str(norm_file), str(ph_file), str(norm_file) | |
| def download_from_text(text: str) -> Optional[str]: | |
| """Create a temporary .txt file from the given text and return its path for download.""" | |
| if not text: | |
| return None | |
| # Save into a persistent outputs/ directory with a readable timestamped filename | |
| out_dir = Path(__file__).parent / 'outputs' | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| from datetime import datetime | |
| ts = datetime.now().strftime('%Y%m%d_%H%M%S') | |
| filename = f'phonemes_{ts}.txt' | |
| out_path = out_dir / filename | |
| out_path.write_text(text, encoding='utf-8') | |
| # Return the path string so Gradio's File component can serve it | |
| return str(out_path) | |
| def build_interface(): | |
| with gr.Blocks(title="Eu/Es Phonemizer") as demo: | |
| # Simple header (image removed per user preference) | |
| header = gr.Markdown("# Fonemizadorea — Euskara (eu) eta Gaztelania (es)") | |
| # Show README instructions in a collapsible panel so users can read | |
| # usage notes directly inside the app without leaving the UI. | |
| try: | |
| readme_path = Path(__file__).parent / "README.md" | |
| readme_md = "" | |
| if readme_path.exists(): | |
| _txt = readme_path.read_text(encoding='utf-8') | |
| # strip YAML front-matter if present | |
| if _txt.startswith('---'): | |
| parts = _txt.split('---', 2) | |
| if len(parts) == 3: | |
| _txt = parts[2].lstrip('\n') | |
| readme_md = _txt | |
| else: | |
| readme_md = "" | |
| except Exception: | |
| readme_md = "" | |
| if readme_md: | |
| with gr.Accordion("Instructions / README", open=False): | |
| gr.Markdown(readme_md) | |
| # Style the Submit button to be orange for better visibility (higher specificity) | |
| gr.HTML(""" | |
| <style> | |
| /* Stronger selectors to override theme/defaults */ | |
| #submit_btn, #submit_btn button, button#submit_btn, .gradio-container #submit_btn button { | |
| background-color: #ff8c00 !important; | |
| color: white !important; | |
| border-radius: 6px !important; | |
| padding: 6px 12px !important; | |
| border: none !important; | |
| } | |
| #submit_btn:hover, #submit_btn button:hover, button#submit_btn:hover { | |
| background-color: #ff7a00 !important; | |
| } | |
| /* Don't force download buttons to orange */ | |
| #download_ph_btn button, #download_norm_btn button { background-color: transparent !important; } | |
| /* Compact upload file box */ | |
| #upload_file { max-width: 160px !important; } | |
| #upload_file .gr-file { | |
| height: 32px !important; | |
| padding: 2px 6px !important; | |
| font-size: 0.9rem !important; | |
| line-height: 1 !important; | |
| } | |
| #upload_file .gr-file input[type=file] { height: 32px !important; } | |
| /* Make textareas vertically resizable and more roomy */ | |
| #input_text textarea, #normalized_box textarea, #result_box textarea { | |
| resize: vertical !important; | |
| min-height: 120px !important; | |
| max-height: 800px !important; | |
| width: 100% !important; | |
| box-sizing: border-box !important; | |
| } | |
| /* Center container and add padding for a cleaner look */ | |
| .gradio-container { max-width: 1100px; margin: 12px auto !important; padding: 8px !important; } | |
| /* Fix controls column width so changing labels doesn't reflow layout. | |
| Use a slightly smaller fixed width so the upload column sits closer. */ | |
| /* Make controls column appear taller by increasing internal spacing | |
| between control rows rather than forcing the whole column height. | |
| This avoids adding extra vertical gap between adjacent columns | |
| (upload box / buttons). */ | |
| #controls_col { min-width: 220px; max-width: 260px; flex: 0 0 240px; align-self: flex-start; padding-top: 6px; padding-bottom: 6px; box-sizing: border-box; } | |
| /* Increase the gap between controls so the column looks taller without | |
| enlarging its outer box or shifting neighboring columns. */ | |
| #controls_col .gr-row { gap: 12px; row-gap: 12px; } | |
| #controls_col .gr-label, #controls_col label { line-height: 1.4; } | |
| /* Ensure the upload column aligns to the top of the row so it doesn't | |
| get vertically centered when other columns grow; keep the upload box | |
| compact but aligned with the controls stack. */ | |
| #upload_col { min-height: 110px; display: flex !important; align-items: flex-start !important; justify-content: center !important; align-self: flex-start; padding-top: 6px; } | |
| /* Ensure labels wrap instead of expanding layout */ | |
| #controls_col .gr-label, #controls_col label { white-space: normal !important; word-break: break-word !important; } | |
| /* Keep action buttons a fixed size so they don't push layout when language changes */ | |
| #submit_btn, #clear_btn { } | |
| /* Enforce pixel-perfect identical size and box-model for both action buttons */ | |
| #submit_btn button, #clear_btn button { | |
| width: 120px !important; | |
| height: 40px !important; | |
| min-height: 40px !important; | |
| box-sizing: border-box !important; | |
| padding: 6px 12px !important; | |
| display: inline-flex !important; | |
| align-items: center !important; | |
| justify-content: center !important; | |
| font-size: 14px !important; | |
| line-height: 1 !important; | |
| border-radius: 6px !important; | |
| border: none !important; | |
| margin: 0 !important; | |
| vertical-align: middle !important; | |
| font-family: inherit !important; | |
| background-clip: padding-box !important; | |
| } | |
| /* Make main column flexible and allow it to shrink without pushing controls */ | |
| #main_col { flex: 1 1 auto; min-width: 0; } | |
| /* Pull the upload box a bit left to close the gap if needed */ | |
| #upload_file { margin-left: -6px !important; } | |
| /* Force a compact file control so it doesn't become taller than the | |
| nearby control stack. */ | |
| /* Keep the file control compact so it doesn't exceed nearby controls */ | |
| #upload_file .gr-file { max-height: 44px !important; height: 36px !important; box-sizing: border-box !important; } | |
| /* Position decorative image absolutely so it doesn't force wrapping. | |
| Reserve space on the right of #top_row to avoid overlap. */ | |
| #top_row { position: relative !important; padding-right: 520px !important; } | |
| #img_col { position: absolute !important; right: 8px !important; top: 6px !important; width: 480px !important; max-width: 100% !important; box-sizing: border-box !important; } | |
| #download_img img { width: 480px !important; max-width: 100% !important; height: auto !important; display:block !important; pointer-events: none !important; user-select: none !important; } | |
| /* Ensure action buttons share the same height and vertical alignment. | |
| Consolidated to authoritative sizing above to avoid conflicting rules. */ | |
| /* (Sizing enforced in the main button block above.) */ | |
| </style> | |
| """) | |
| with gr.Row(): | |
| # Left controls column | |
| with gr.Column(scale=1, elem_id='controls_col'): | |
| language = gr.Radio(choices=['eu', 'es'], value='eu', label='Hizkuntza / Idioma') | |
| symbol = gr.Radio(choices=['sampa', 'ipa'], value='sampa', label='Sinboloak / Símbolos (Irteera)') | |
| # Default checked and Basque-only label; will switch to Spanish when language changes | |
| separate_phonemes = gr.Checkbox(label='Banatu fonemak espazioz', value=True) | |
| # Small column to the right of controls that holds the upload box | |
| with gr.Column(scale=1, elem_id='upload_col'): | |
| upload = gr.File(file_types=['.txt'], label='Igo .txt fitxategia / Subir archivo .txt', elem_id='upload_file') | |
| # Decorative/download image column to the right of the upload box. | |
| # We'll embed the local `img/download.png` as a base64 <img> inside | |
| # gr.HTML so Gradio doesn't add overlay controls (download/enlarge). | |
| # Use an integer `scale` to avoid Gradio warnings; keep the image | |
| # column compact by using a small integer scale and reserving width | |
| # via CSS (#img_col). Changing to `scale=1` prevents the float-scale | |
| # warning while preserving layout. | |
| # Decorative image removed to avoid layout shifting on narrow screens. | |
| # If you want to re-enable a static image later, add a small inline | |
| # SVG or a compact image component here instead of the large PNG. | |
| # Main column on the right: buttons above the wide input textbox | |
| with gr.Column(scale=3, elem_id='main_col'): | |
| with gr.Row(): | |
| submit_btn = gr.Button('Submit', elem_id='submit_btn') | |
| clear_btn = gr.Button('Clear', elem_id='clear_btn') | |
| with gr.Row(): | |
| with gr.Column(scale=5): | |
| input_text = gr.Textbox(lines=12, elem_id='input_text', label="Sarrera testua (uzteko hutsik .txt fitxategia igo behar baduzu) / Texto de entrada (dejar vacío si subes un .txt)") | |
| # Outputs area: normalized text and phoneme output side-by-side | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| normalized_box = gr.Textbox(lines=12, elem_id='normalized_box', label='Normalizatua', interactive=False) | |
| download_norm_btn = gr.DownloadButton('Deskargatu normalizatua', elem_id='download_norm_btn') | |
| with gr.Column(scale=1): | |
| result_box = gr.Textbox(lines=12, elem_id='result_box', label='Fonemak', interactive=False) | |
| download_ph_btn = gr.DownloadButton('Deskargatu fonemak', elem_id='download_ph_btn') | |
| # hidden boxes to hold latest generated file paths so download buttons can trigger | |
| ph_path_box = gr.Textbox(visible=False, elem_id='ph_path_box') | |
| norm_path_box = gr.Textbox(visible=False, elem_id='norm_path_box') | |
| def _on_click(input_text, upload, language, symbol, separate_phonemes): | |
| return process(input_text, upload, language, symbol, separate_phonemes) | |
| # When a user uploads a .txt file, read its contents and populate the | |
| # `input_text` box so they can review or edit before sending. | |
| def _on_upload(uploaded_file): | |
| if not uploaded_file: | |
| return gr.update(value="") | |
| try: | |
| content = _read_uploaded_file(uploaded_file) | |
| except Exception: | |
| content = '' | |
| return gr.update(value=content) | |
| def _clear_all(): | |
| # Clear input, outputs and any hidden path boxes so UI resets | |
| return ( | |
| gr.update(value=""), # input_text | |
| gr.update(value=None), # upload (clear any uploaded file) | |
| gr.update(value=""), # normalized_box | |
| gr.update(value=""), # result_box | |
| gr.update(value=None), # download_ph_btn | |
| gr.update(value=None), # download_norm_btn | |
| gr.update(value=""), # ph_path_box | |
| gr.update(value="") # norm_path_box | |
| ) | |
| # Re-run processing automatically when symbol or separation options change | |
| # so users don't have to press the Process button again. | |
| symbol.change(fn=_on_click, inputs=[input_text, upload, language, symbol, separate_phonemes], outputs=[result_box, download_ph_btn, normalized_box, download_norm_btn, ph_path_box, norm_path_box]) | |
| separate_phonemes.change(fn=_on_click, inputs=[input_text, upload, language, symbol, separate_phonemes], outputs=[result_box, download_ph_btn, normalized_box, download_norm_btn, ph_path_box, norm_path_box]) | |
| # Populate the input textbox when a file is uploaded so users can see/edit it | |
| # before sending. Does not auto-run processing. | |
| upload.change(fn=_on_upload, inputs=[upload], outputs=[input_text]) | |
| # Update UI texts when language selection changes | |
| def _update_language_ui(lang): | |
| # Note: we intentionally do NOT update the header here to avoid | |
| # large DOM changes that reflow the layout when switching languages. | |
| if lang == 'eu': | |
| return ( | |
| gr.update(label='Sinboloak (Irteera)'), # symbol | |
| gr.update(label='Banatu fonemak espazioz'), # separate_phonemes | |
| # keep input/upload labels stable (do not update them to avoid reflow) | |
| gr.update(label='Fonemak'), | |
| gr.update(label='Deskargatu irteera (.txt)'), | |
| gr.update(label='Normalizatua'), | |
| gr.update(label='Deskargatu normalizatua (.txt)'), | |
| gr.update(value=''), | |
| gr.update(value='') | |
| ) | |
| else: | |
| return ( | |
| gr.update(label='Símbolos (Salida)'), | |
| gr.update(label='Separar fonemas con espacios'), | |
| # keep input/upload labels stable (do not update them to avoid reflow) | |
| gr.update(label='Fonemas'), | |
| gr.update(label='Descargar salida (.txt)'), | |
| gr.update(label='Normalizado'), | |
| gr.update(label='Descargar normalizado (.txt)'), | |
| gr.update(value=''), | |
| gr.update(value='') | |
| ) | |
| # Note: don't include `header`, `input_text`, upload or action buttons | |
| # in outputs to avoid reflow when changing language. Only update the | |
| # smaller output labels and hidden path boxes which the function | |
| # actually returns (8 outputs). | |
| language.change(fn=_update_language_ui, inputs=[language], outputs=[symbol, separate_phonemes, result_box, download_ph_btn, normalized_box, download_norm_btn, ph_path_box, norm_path_box]) | |
| submit_btn.click(fn=_on_click, inputs=[input_text, upload, language, symbol, separate_phonemes], outputs=[result_box, download_ph_btn, normalized_box, download_norm_btn, ph_path_box, norm_path_box]) | |
| clear_btn.click(fn=_clear_all, inputs=[], outputs=[input_text, upload, normalized_box, result_box, download_ph_btn, download_norm_btn, ph_path_box, norm_path_box]) | |
| # Note: download buttons are created in the outputs area above. | |
| def _download_file(path: str): | |
| # Keep a simple path-return helper for backwards compatibility | |
| if not path: | |
| return None | |
| p = Path(path) | |
| if not p.exists(): | |
| return None | |
| return str(p) | |
| # Provide download callbacks that generate the outputs on-demand so a | |
| # single click will both create and return the file path to the browser. | |
| def _download_ph_from_inputs(input_text, upload, language, symbol, separate_phonemes): | |
| # Call the same `process()` function to ensure files are generated | |
| res = process(input_text, upload, language, symbol, separate_phonemes) | |
| # process() returns (result_text, ph_path, normalized_text, norm_path, ph_path, norm_path) | |
| if isinstance(res, tuple) and len(res) >= 2: | |
| return _download_file(res[1]) | |
| return None | |
| def _download_norm_from_inputs(input_text, upload, language, symbol, separate_phonemes): | |
| res = process(input_text, upload, language, symbol, separate_phonemes) | |
| if isinstance(res, tuple) and len(res) >= 4: | |
| return _download_file(res[3]) | |
| return None | |
| # Wire the DownloadButtons to generate-and-return callbacks so a single | |
| # click performs generation and triggers immediate download. | |
| download_ph_btn.click(fn=_download_ph_from_inputs, inputs=[input_text, upload, language, symbol, separate_phonemes], outputs=[download_ph_btn]) | |
| download_norm_btn.click(fn=_download_norm_from_inputs, inputs=[input_text, upload, language, symbol, separate_phonemes], outputs=[download_norm_btn]) | |
| return demo | |
| def _find_free_port(start: int = 7860, end: int = 7870) -> Optional[int]: | |
| """Find a free TCP port in the given inclusive range.""" | |
| for port in range(start, end + 1): | |
| with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: | |
| try: | |
| s.bind(('0.0.0.0', port)) | |
| return port | |
| except OSError: | |
| continue | |
| return None | |
| if __name__ == '__main__': | |
| app = build_interface() | |
| # Allow explicit override via environment variable | |
| env_port = os.environ.get('GRADIO_SERVER_PORT') | |
| if env_port: | |
| try: | |
| port = int(env_port) | |
| except ValueError: | |
| print(f"Invalid GRADIO_SERVER_PORT='{env_port}', falling back to automatic selection.") | |
| port = None | |
| else: | |
| port = None | |
| if port is None: | |
| port = _find_free_port(7860, 7880) | |
| if port is None: | |
| raise OSError("No free port found in range 7860-7880. Set GRADIO_SERVER_PORT to a free port.") | |
| print(f"Launching Gradio on port {port} (server_name=0.0.0.0)") | |
| app.launch(server_name='0.0.0.0', server_port=port) | |