wagner-austin commited on
Commit
0a40f4f
·
1 Parent(s): dbf52c3

Migrate Gradio UI to main package structure, simplify HF Spaces repo

Browse files
Files changed (5) hide show
  1. .gitattributes +0 -35
  2. .gitignore +0 -4
  3. README.md +24 -3
  4. app.py +22 -188
  5. requirements.txt +1 -1
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore DELETED
@@ -1,4 +0,0 @@
1
- __pycache__/
2
- *.pyc
3
- .env/
4
- temp.txt
 
 
 
 
 
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Uci Phonotactic Calculator
3
  emoji: 📊
4
  colorFrom: purple
5
  colorTo: gray
@@ -8,7 +8,28 @@ sdk_version: 5.29.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
- short_description: Phoneme-level n-gram scorer.
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: UCI Phonotactic Calculator
3
  emoji: 📊
4
  colorFrom: purple
5
  colorTo: gray
 
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ short_description: Phoneme-level n-gram scorer for linguistic research.
12
  ---
13
 
14
+ # UCI Phonotactic Calculator
15
+
16
+ This is the official Gradio web interface for the [UCI Phonotactic Calculator](https://github.com/connormayer/uci_phonotactic_calculator) package.
17
+
18
+ ## Features
19
+
20
+ - Score phoneme sequences using n-gram models
21
+ - Upload your own training and test data
22
+ - Use built-in English demo data
23
+ - Select from multiple model implementations
24
+ - Apply various filtering options
25
+ - Get both preview and downloadable CSV results
26
+
27
+ ## Usage
28
+
29
+ 1. Choose to use demo data or upload your own CSV files
30
+ 2. Select a model and n-gram order
31
+ 3. Set any advanced filtering options if needed
32
+ 4. Click "Score" to run the calculator
33
+ 5. View results and download the full CSV
34
+
35
+ For more detailed documentation, visit the [GitHub repository](https://github.com/connormayer/uci_phonotactic_calculator).
app.py CHANGED
@@ -1,189 +1,23 @@
1
- """
2
- Gradio front-end for the UCI Phonotactic Calculator
3
- ---------------------------------------------------
4
- Works on Hugging-Face Spaces
5
- Uses the *installed* Python package – no relative “src” hacks
6
- ✓ Returns both a preview DataFrame *and* a downloadable CSV
7
- """
8
-
9
- from pathlib import Path
10
- import tempfile, os, pandas as pd, gradio as gr
11
-
12
- # --- Gradio progress adapter for Rich-style progress ---
13
- class _GradioProgressAdapter:
14
- """
15
- Drop-in replacement for uci_phonotactic_calculator.progress.progress()
16
- that streams status into the Gradio UI.
17
-
18
- It only implements the bits the library actually calls:
19
- with progress(...) as bar:
20
- tid = bar.add_task("Training", total=N)
21
- ...
22
- bar.update(tid, advance=1)
23
- """
24
- def __init__(self, enabled: bool = True):
25
- self.enabled = enabled
26
- self._g_prog = None # gr.Progress instance
27
- self._tasks = {} # local id ➜ (current, total)
28
-
29
- def __enter__(self):
30
- if self.enabled:
31
- # keep both the CM *and* the callable tracker
32
- self._cm = gr.Progress() # context-manager
33
- self._g_prog = self._cm.__enter__() # callable returned by __enter__
34
- return self
35
-
36
- def __exit__(self, exc_type, exc, tb):
37
- if getattr(self, "_cm", None):
38
- self._cm.__exit__(exc_type, exc, tb)
39
-
40
- # ─── Rich-look-alike API ─────────────────────────────────────────
41
- def add_task(self, description: str, total: int | None = None):
42
- task_id = len(self._tasks) + 1
43
- self._tasks[task_id] = [0, total or 0]
44
- if self._g_prog:
45
- # The callable has set_description() only on Gradio ≥4.3
46
- if hasattr(self._g_prog, "set_description"):
47
- self._g_prog.set_description(description)
48
- self._g_prog(0, total or 0)
49
- return task_id
50
-
51
- def update(self, task_id: int, advance: int = 1):
52
- cur, tot = self._tasks[task_id]
53
- cur += advance
54
- self._tasks[task_id][0] = cur
55
- if self._g_prog:
56
- self._g_prog(cur, tot)
57
-
58
- # ---> public, documented API wrapper around the CLI
59
- from uci_phonotactic_calculator.ngram_calculator import run as ngram_run
60
- from uci_phonotactic_calculator.plugins import PluginRegistry
61
- from uci_phonotactic_calculator.cli_demo_data import get_demo_paths
62
-
63
- TMP_DIR = Path(tempfile.gettempdir())
64
- from uuid import uuid4, uuid1
65
-
66
- # ---------------------------------------------------------------------
67
- # Back-end helper
68
- # ---------------------------------------------------------------------
69
- def score(
70
- train_csv, # gr.File or None
71
- test_csv, # gr.File or None
72
- model, # str
73
- run_full_grid, # bool
74
- ngram_order, # int
75
- use_demo, # bool
76
- filter_string, # str like "weight_mode=raw prob_mode=joint"
77
- hide_progress # bool
78
- ):
79
- """
80
- Execute the scorer and return (DataFrame, CSV-path) for Gradio.
81
- """
82
- # -------------------- resolve input paths -----------------------
83
- if use_demo:
84
- train_path, test_path = get_demo_paths()
85
- else:
86
- if train_csv is None or test_csv is None:
87
- raise gr.Error("Upload BOTH training & test CSVs *or* tick the demo-data box.")
88
- train_path, test_path = train_csv.name, test_csv.name
89
-
90
- # ------------------------------------------------------------------
91
- # Legacy-mode override for demo data
92
- # ------------------------------------------------------------------
93
- if use_demo:
94
- run_full_grid = False # ignore any mischievous client-side tweak
95
- model = None # guarantees legacy path (no --model)
96
-
97
- out_file = TMP_DIR / f"scores_{uuid4().hex}.csv"
98
- import atexit, functools
99
- atexit.register(functools.partial(out_file.unlink, missing_ok=True))
100
-
101
- # -------------------- translate filters -------------------------
102
- filters = {}
103
- tokens = filter_string.split()
104
- if tokens and tokens[0] == "--filter":
105
- tokens = tokens[1:] # drop the flag if present
106
- if tokens:
107
- for tok in tokens:
108
- if "=" not in tok:
109
- raise gr.Error(f"Filter “{tok}” must look like key=value")
110
- k, v = tok.split("=", 1)
111
- filters[k] = v
112
-
113
- # -------------------- invoke library with Gradio progress patch ---------------------------
114
- import uci_phonotactic_calculator.progress as _p
115
- _orig_progress = _p.progress # keep to restore later
116
- _p.progress = lambda enabled=True: _GradioProgressAdapter(
117
- enabled=enabled and not hide_progress
118
- )
119
- try:
120
- ngram_run(
121
- train_file=train_path,
122
- test_file=test_path,
123
- output_file=str(out_file),
124
- model=None if run_full_grid else model,
125
- run_all=run_full_grid,
126
- filters=filters,
127
- show_progress=not hide_progress, # still disables library chatter
128
- extra_args=["-n", str(ngram_order)],
129
- )
130
- finally:
131
- _p.progress = _orig_progress # guarantee cleanup
132
-
133
- df = pd.read_csv(out_file)
134
- df_preview = df.head(50).iloc[:, :30] # show only first 50 rows, 30 cols in UI
135
- return df_preview, str(out_file)
136
-
137
- # ---------------------------------------------------------------------
138
- # Gradio UI
139
- # ---------------------------------------------------------------------
140
- with gr.Blocks(title="UCI Phonotactic Calculator") as demo:
141
- gr.Markdown(
142
- "## UCI Phonotactic Calculator\n"
143
- "Upload training & test corpora – or pick the built-in English demo – "
144
- "choose a model, and get probability scores."
145
- )
146
-
147
- with gr.Row():
148
- with gr.Column():
149
- train_in = gr.File(label="Training CSV")
150
- test_in = gr.File(label="Test CSV")
151
-
152
- use_demo = gr.Checkbox(
153
- label="Use packaged English demo data (16-col legacy mode)",
154
- value=True,
155
- info="Runs the original 2018 output format. Untick and upload your own data to use any model/grid."
156
- )
157
-
158
- model_dd = gr.Dropdown(
159
- choices=sorted(PluginRegistry),
160
- value="ngram",
161
- label="Model plug-in"
162
- )
163
- # Hidden checkbox keeps the variable alive for go_btn.click;
164
- # power-users can un-hide it in the inspector if they want.
165
- run_grid = gr.Checkbox(visible=False, value=False, label="Run full variant grid")
166
- n_slider = gr.Slider(1, 4, step=1, value=2, label="n-gram order")
167
-
168
- with gr.Accordion("Advanced", open=False):
169
- filt_txt = gr.Textbox(
170
- label="Filter (space-separated key=value …)",
171
- placeholder="example: weight_mode=raw prob_mode=joint"
172
- )
173
- hide_prog = gr.Checkbox(label="Hide progress indicator", value=False)
174
-
175
- go_btn = gr.Button("Score")
176
-
177
- with gr.Column():
178
- out_df = gr.Dataframe(label="Scores (preview)", interactive=False)
179
- out_csv = gr.File(label="Download full CSV")
180
-
181
- go_btn.click(
182
- fn=score,
183
- inputs=[train_in, test_in, model_dd, run_grid, n_slider,
184
- use_demo, filt_txt, hide_prog],
185
- outputs=[out_df, out_csv]
186
- )
187
-
188
  if __name__ == "__main__":
189
- demo.launch(share=False)
 
1
+ # UCI Phonotactic Calculator Gradio UI for Hugging Face Spaces
2
+ # This application demonstrates the UCI Phonotactic Calculator web interface
3
+
4
+ # Import the web demo UI builder from the uci_phonotactic_calculator package
5
+ from uci_phonotactic_calculator.web.web_demo import build_ui
6
+
7
+ # Create the Gradio interface with default settings
8
+ # The build_ui function configures a Gradio Blocks interface with:
9
+ # - Input fields for training and test CSV files
10
+ # - Model selection
11
+ # - n-gram order selection
12
+ # - Filtering options
13
+ # - Results preview and download
14
+ demo = build_ui()
15
+
16
+ # Enable queuing for better performance with multiple users
17
+ # This prevents the server from being overwhelmed by concurrent requests
18
+ demo.queue()
19
+
20
+ # Launch the web application
21
+ # In Hugging Face Spaces, this will make the app available to users
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  if __name__ == "__main__":
23
+ demo.launch()
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- uci-phonotactic-calculator[ui]>=0.2.2 # latest published wheel
2
  gradio
 
1
+ uci-phonotactic-calculator[ui]>=0.2.3 # latest published wheel
2
  gradio