Ander Arriandiaga commited on
Commit
1a5b6c9
·
1 Parent(s): 71bee8f

docs: add HF Space metadata and app metadata

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
- title: Phonemizer (EUS–ESP)
3
  emoji: "🔤"
4
- colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
  app_file: app.py
@@ -9,19 +9,25 @@ pinned: false
9
  ---
10
 
11
 
12
- # Phonemizer (EUS–ESP)
13
 
14
- # Phonemizer — Gradio demo (Hugging Face Space)
15
 
16
- This Space provides a small web UI to phonemize Basque (eu) and Spanish (es) text.
17
 
18
  How to use
19
  - Input text: paste text into the main box or upload a `.txt` file.
20
  - Language: select `eu` (Basque) or `es` (Spanish).
21
  - Symbols: choose `sampa` (default) or `ipa` for the phoneme output format.
22
- - Separate phonemes: toggle whether phonemes are separated by spaces to make easier to see multi-character phonemes.
23
- - Submit: press `Submit` to run normalization + phonemization.
24
- - Download: use the download buttons to get the phonemes or normalized text as `.txt` files.
 
 
 
 
 
 
25
 
26
  Privacy
27
  - This Space does not store user inputs beyond temporary files used to serve downloads. Do not upload sensitive data.
 
1
  ---
2
+ title: Itzultzaile fonetikoa — Phonetic Translator (EUS–ESP)
3
  emoji: "🔤"
4
+ colorFrom: orange
5
  colorTo: indigo
6
  sdk: gradio
7
  app_file: app.py
 
9
  ---
10
 
11
 
12
+ # Itzultzaile fonetikoa — Phonetic Translator (EUS–ESP)
13
 
14
+ # Itzultzaile fonetikoa Phonetic Translator (EUS–ESP) — Gradio demo (Hugging Face Space)
15
 
16
+ This Space provides a small web UI to produce phonetic transcriptions for Basque (eu) and Spanish (es) text.
17
 
18
  How to use
19
  - Input text: paste text into the main box or upload a `.txt` file.
20
  - Language: select `eu` (Basque) or `es` (Spanish).
21
  - Symbols: choose `sampa` (default) or `ipa` for the phoneme output format.
22
+ - Separate phonos: toggle whether phonos are separated by spaces to make it easier to see multi-character phonos.
23
+ - Submit: press `Submit` to run normalization + phonetic transcription.
24
+ - Download: use the download buttons to get the phonos or normalized text as `.txt` files.
25
+
26
+ ## Notes / Notas
27
+
28
+ - EN: Diacritics are NOT used.
29
+
30
+ - EN: Diphthongs: the semiconsonant is always `w`/`j` (aire -> 'a j ɾ e; causó -> k a w s 'o)
31
 
32
  Privacy
33
  - This Space does not store user inputs beyond temporary files used to serve downloads. Do not upload sensitive data.
__pycache__/eu_phonemizer_v2.cpython-312.pyc ADDED
Binary file (14.6 kB). View file
 
__pycache__/gradio_phonemizer.cpython-312.pyc ADDED
Binary file (25 kB). View file
 
gradio_phonemizer.py CHANGED
@@ -101,16 +101,17 @@ def _read_uploaded_file(file_obj) -> str:
101
 
102
 
103
  def process(text: str,
104
- uploaded_file,
105
- language: str,
106
- symbol: str,
107
- separate_phonemes: bool) -> Tuple[str, Optional[str], str, Optional[str]]:
108
- """Process either text input or uploaded txt file and return (text_output, download_file_path)
109
-
110
- If the user uploaded a file, the function will return the path to a tmp file
111
- suitable for download as the second return value and an empty text output.
112
- If the user provided text in the box, the function will return the phonemes
113
- as text and also a downloadable txt file containing the same output.
 
114
  """
115
  # Prefer uploaded file if present
116
  source_text = ""
@@ -126,15 +127,15 @@ def process(text: str,
126
  phon = Phonemizer(language=language, symbol=symbol)
127
  except PhonemizerError as e:
128
  if language == 'eu':
129
- err = f"Ezin izan da fonemizadorea hasi: {e}\nEgiaztatu 'modulo1y2' eta 'dict' karpetak."
130
  else:
131
- err = f"No se pudo inicializar el fonemizador: {e}\nComprueba las carpetas 'modulo1y2' y 'dict'."
132
  # Return 6 outputs matching the UI: result text, file, normalized text, norm file, ph_path, norm_path
133
  return err, None, "", None, "", ""
134
  except Exception as e:
135
  if language == 'eu':
136
  return f"Hasieratze errore ezezaguna: {e}", None, "", None, "", ""
137
- return f"Error inesperado al inicializar: {e}", None
138
 
139
 
140
  # Normalize then get phonemes. Run normalization per original input line so the
@@ -163,9 +164,9 @@ def process(text: str,
163
  phonemes = re.sub(r"\s*\|\s*", " ", phonemes)
164
  except PhonemizerError as e:
165
  if language == 'eu':
166
- msg = f"Fonemizazio errorea: {e}"
167
  else:
168
- msg = f"Error del fonemizador: {e}"
169
  return msg, None, "", None, "", ""
170
  except Exception as e:
171
  if language == 'eu':
@@ -181,7 +182,7 @@ def process(text: str,
181
  out_dir.mkdir(parents=True, exist_ok=True)
182
  from datetime import datetime
183
  ts = datetime.now().strftime('%Y%m%d_%H%M%S')
184
- ph_file = out_dir / f'phonemes_{ts}.txt'
185
  norm_file = out_dir / f'normalized_{ts}.txt'
186
  ph_file.write_text(phonemes, encoding='utf-8')
187
  norm_file.write_text(normalized, encoding='utf-8')
@@ -206,7 +207,7 @@ def download_from_text(text: str) -> Optional[str]:
206
  out_dir.mkdir(parents=True, exist_ok=True)
207
  from datetime import datetime
208
  ts = datetime.now().strftime('%Y%m%d_%H%M%S')
209
- filename = f'phonemes_{ts}.txt'
210
  out_path = out_dir / filename
211
  out_path.write_text(text, encoding='utf-8')
212
  # Return the path string so Gradio's File component can serve it
@@ -216,7 +217,7 @@ def download_from_text(text: str) -> Optional[str]:
216
  def build_interface():
217
  with gr.Blocks(title="Eu/Es Phonemizer") as demo:
218
  # Simple header (image removed per user preference)
219
- header = gr.Markdown("# Fonemizadorea — Euskara (eu) eta Gaztelania (es)")
220
 
221
  # Show README instructions in a collapsible panel so users can read
222
  # usage notes directly inside the app without leaving the UI.
@@ -341,7 +342,7 @@ def build_interface():
341
  language = gr.Radio(choices=['eu', 'es'], value='eu', label='Hizkuntza / Idioma')
342
  symbol = gr.Radio(choices=['sampa', 'ipa'], value='sampa', label='Sinboloak / Símbolos (Irteera)')
343
  # Default checked and Basque-only label; will switch to Spanish when language changes
344
- separate_phonemes = gr.Checkbox(label='Banatu fonemak espazioz', value=True)
345
 
346
  # Small column to the right of controls that holds the upload box
347
  with gr.Column(scale=1, elem_id='upload_col'):
@@ -374,8 +375,8 @@ def build_interface():
374
  download_norm_btn = gr.DownloadButton('Deskargatu normalizatua', elem_id='download_norm_btn')
375
 
376
  with gr.Column(scale=1):
377
- result_box = gr.Textbox(lines=12, elem_id='result_box', label='Fonemak', interactive=False)
378
- download_ph_btn = gr.DownloadButton('Deskargatu fonemak', elem_id='download_ph_btn')
379
 
380
  # hidden boxes to hold latest generated file paths so download buttons can trigger
381
  ph_path_box = gr.Textbox(visible=False, elem_id='ph_path_box')
@@ -424,9 +425,9 @@ def build_interface():
424
  if lang == 'eu':
425
  return (
426
  gr.update(label='Sinboloak (Irteera)'), # symbol
427
- gr.update(label='Banatu fonemak espazioz'), # separate_phonemes
428
  # keep input/upload labels stable (do not update them to avoid reflow)
429
- gr.update(label='Fonemak'),
430
  gr.update(label='Deskargatu irteera (.txt)'),
431
  gr.update(label='Normalizatua'),
432
  gr.update(label='Deskargatu normalizatua (.txt)'),
@@ -436,9 +437,9 @@ def build_interface():
436
  else:
437
  return (
438
  gr.update(label='Símbolos (Salida)'),
439
- gr.update(label='Separar fonemas con espacios'),
440
  # keep input/upload labels stable (do not update them to avoid reflow)
441
- gr.update(label='Fonemas'),
442
  gr.update(label='Descargar salida (.txt)'),
443
  gr.update(label='Normalizado'),
444
  gr.update(label='Descargar normalizado (.txt)'),
 
101
 
102
 
103
  def process(text: str,
104
+ uploaded_file,
105
+ language: str,
106
+ symbol: str,
107
+ separate_phonemes: bool) -> Tuple[str, Optional[str], str, Optional[str], Optional[str], Optional[str]]:
108
+ """Process either text input or uploaded txt file and return a 6-tuple:
109
+
110
+ (result_text, ph_file_path, normalized_text, norm_file_path, ph_path, norm_path)
111
+
112
+ If the user uploaded a file, the function will return the path to a file
113
+ suitable for download and also populate the inline text results. When the
114
+ user provided text, both inline text and downloadable files are produced.
115
  """
116
  # Prefer uploaded file if present
117
  source_text = ""
 
127
  phon = Phonemizer(language=language, symbol=symbol)
128
  except PhonemizerError as e:
129
  if language == 'eu':
130
+ err = f"Ezin izan da itzultzaile fonetikoa hasi: {e}\nEgiaztatu 'modulo1y2' eta 'dict' karpetak."
131
  else:
132
+ err = f"No se pudo inicializar el traductor fonético: {e}\nComprueba las carpetas 'modulo1y2' y 'dict'."
133
  # Return 6 outputs matching the UI: result text, file, normalized text, norm file, ph_path, norm_path
134
  return err, None, "", None, "", ""
135
  except Exception as e:
136
  if language == 'eu':
137
  return f"Hasieratze errore ezezaguna: {e}", None, "", None, "", ""
138
+ return f"Error inesperado al inicializar: {e}", None, "", None, "", ""
139
 
140
 
141
  # Normalize then get phonemes. Run normalization per original input line so the
 
164
  phonemes = re.sub(r"\s*\|\s*", " ", phonemes)
165
  except PhonemizerError as e:
166
  if language == 'eu':
167
+ msg = f"Itzultzaile fonetikoaren errorea: {e}"
168
  else:
169
+ msg = f"Error del traductor fonético: {e}"
170
  return msg, None, "", None, "", ""
171
  except Exception as e:
172
  if language == 'eu':
 
182
  out_dir.mkdir(parents=True, exist_ok=True)
183
  from datetime import datetime
184
  ts = datetime.now().strftime('%Y%m%d_%H%M%S')
185
+ ph_file = out_dir / f'phonos_{ts}.txt'
186
  norm_file = out_dir / f'normalized_{ts}.txt'
187
  ph_file.write_text(phonemes, encoding='utf-8')
188
  norm_file.write_text(normalized, encoding='utf-8')
 
207
  out_dir.mkdir(parents=True, exist_ok=True)
208
  from datetime import datetime
209
  ts = datetime.now().strftime('%Y%m%d_%H%M%S')
210
+ filename = f'phonos_{ts}.txt'
211
  out_path = out_dir / filename
212
  out_path.write_text(text, encoding='utf-8')
213
  # Return the path string so Gradio's File component can serve it
 
217
  def build_interface():
218
  with gr.Blocks(title="Eu/Es Phonemizer") as demo:
219
  # Simple header (image removed per user preference)
220
+ header = gr.Markdown("# Itzultzaile fonetikoa — Euskara (eu) eta Gaztelania (es)")
221
 
222
  # Show README instructions in a collapsible panel so users can read
223
  # usage notes directly inside the app without leaving the UI.
 
342
  language = gr.Radio(choices=['eu', 'es'], value='eu', label='Hizkuntza / Idioma')
343
  symbol = gr.Radio(choices=['sampa', 'ipa'], value='sampa', label='Sinboloak / Símbolos (Irteera)')
344
  # Default checked and Basque-only label; will switch to Spanish when language changes
345
+ separate_phonemes = gr.Checkbox(label='Banatu fonoak espazioz', value=True)
346
 
347
  # Small column to the right of controls that holds the upload box
348
  with gr.Column(scale=1, elem_id='upload_col'):
 
375
  download_norm_btn = gr.DownloadButton('Deskargatu normalizatua', elem_id='download_norm_btn')
376
 
377
  with gr.Column(scale=1):
378
+ result_box = gr.Textbox(lines=12, elem_id='result_box', label='Fonoak', interactive=False)
379
+ download_ph_btn = gr.DownloadButton('Deskargatu fonoak', elem_id='download_ph_btn')
380
 
381
  # hidden boxes to hold latest generated file paths so download buttons can trigger
382
  ph_path_box = gr.Textbox(visible=False, elem_id='ph_path_box')
 
425
  if lang == 'eu':
426
  return (
427
  gr.update(label='Sinboloak (Irteera)'), # symbol
428
+ gr.update(label='Banatu fonoak espazioz'), # separate_phonemes
429
  # keep input/upload labels stable (do not update them to avoid reflow)
430
+ gr.update(label='Fonoak'),
431
  gr.update(label='Deskargatu irteera (.txt)'),
432
  gr.update(label='Normalizatua'),
433
  gr.update(label='Deskargatu normalizatua (.txt)'),
 
437
  else:
438
  return (
439
  gr.update(label='Símbolos (Salida)'),
440
+ gr.update(label='Separar fonos con espacios'),
441
  # keep input/upload labels stable (do not update them to avoid reflow)
442
+ gr.update(label='Fonos'),
443
  gr.update(label='Descargar salida (.txt)'),
444
  gr.update(label='Normalizado'),
445
  gr.update(label='Descargar normalizado (.txt)'),