Spaces:

AustinWagner
/

uci_phonotactic_calculator_gradio

Sleeping

App Files Files Community

wagner-austin commited on May 22

Commit

0a40f4f

1 Parent(s): dbf52c3

Migrate Gradio UI to main package structure, simplify HF Spaces repo

Browse files

Files changed (5) hide show

.gitattributes +0 -35
.gitignore +0 -4
README.md +24 -3
app.py +22 -188
requirements.txt +1 -1

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore DELETED Viewed

@@ -1,4 +0,0 @@
-__pycache__/
-*.pyc
-.env/
-temp.txt

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Uci Phonotactic Calculator
 emoji: 📊
 colorFrom: purple
 colorTo: gray
@@ -8,7 +8,28 @@ sdk_version: 5.29.0
 app_file: app.py
 pinned: false
 license: apache-2.0
-short_description: Phoneme-level n-gram scorer.
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: UCI Phonotactic Calculator
 emoji: 📊
 colorFrom: purple
 colorTo: gray
 app_file: app.py
 pinned: false
 license: apache-2.0
+short_description: Phoneme-level n-gram scorer for linguistic research.
 ---
+# UCI Phonotactic Calculator
+This is the official Gradio web interface for the [UCI Phonotactic Calculator](https://github.com/connormayer/uci_phonotactic_calculator) package.
+## Features
+- Score phoneme sequences using n-gram models
+- Upload your own training and test data
+- Use built-in English demo data
+- Select from multiple model implementations
+- Apply various filtering options
+- Get both preview and downloadable CSV results
+## Usage
+1. Choose to use demo data or upload your own CSV files
+2. Select a model and n-gram order
+3. Set any advanced filtering options if needed
+4. Click "Score" to run the calculator
+5. View results and download the full CSV
+For more detailed documentation, visit the [GitHub repository](https://github.com/connormayer/uci_phonotactic_calculator).

app.py CHANGED Viewed

@@ -1,189 +1,23 @@
-"""
-Gradio front-end for the UCI Phonotactic Calculator
----------------------------------------------------
-✓ Works on Hugging-Face Spaces
-✓ Uses the *installed* Python package – no relative “src” hacks
-✓ Returns both a preview DataFrame *and* a downloadable CSV
-"""
-from pathlib import Path
-import tempfile, os, pandas as pd, gradio as gr
-# --- Gradio progress adapter for Rich-style progress ---
-class _GradioProgressAdapter:
-    """
-    Drop-in replacement for uci_phonotactic_calculator.progress.progress()
-    that streams status into the Gradio UI.
-    It only implements the bits the library actually calls:
-        with progress(...) as bar:
-            tid = bar.add_task("Training", total=N)
-            ...
-            bar.update(tid, advance=1)
-    """
-    def __init__(self, enabled: bool = True):
-        self.enabled = enabled
-        self._g_prog = None        # gr.Progress instance
-        self._tasks  = {}          # local id ➜ (current, total)
-    def __enter__(self):
-        if self.enabled:
-            # keep both the CM *and* the callable tracker
-            self._cm      = gr.Progress()          # context-manager
-            self._g_prog  = self._cm.__enter__()   # callable returned by __enter__
-        return self
-    def __exit__(self, exc_type, exc, tb):
-        if getattr(self, "_cm", None):
-            self._cm.__exit__(exc_type, exc, tb)
-    # ─── Rich-look-alike API ─────────────────────────────────────────
-    def add_task(self, description: str, total: int | None = None):
-        task_id = len(self._tasks) + 1
-        self._tasks[task_id] = [0, total or 0]
-        if self._g_prog:
-            # The callable has set_description() only on Gradio ≥4.3
-            if hasattr(self._g_prog, "set_description"):
-                self._g_prog.set_description(description)
-            self._g_prog(0, total or 0)
-        return task_id
-    def update(self, task_id: int, advance: int = 1):
-        cur, tot = self._tasks[task_id]
-        cur += advance
-        self._tasks[task_id][0] = cur
-        if self._g_prog:
-            self._g_prog(cur, tot)
-# ---> public, documented API wrapper around the CLI
-from uci_phonotactic_calculator.ngram_calculator import run as ngram_run
-from uci_phonotactic_calculator.plugins import PluginRegistry
-from uci_phonotactic_calculator.cli_demo_data import get_demo_paths
-TMP_DIR = Path(tempfile.gettempdir())
-from uuid import uuid4, uuid1
-# ---------------------------------------------------------------------
-# Back-end helper
-# ---------------------------------------------------------------------
-def score(
-    train_csv,           # gr.File or None
-    test_csv,            # gr.File or None
-    model,               # str
-    run_full_grid,       # bool
-    ngram_order,         # int
-    use_demo,            # bool
-    filter_string,       # str like "weight_mode=raw prob_mode=joint"
-    hide_progress        # bool
-):
-    """
-    Execute the scorer and return (DataFrame, CSV-path) for Gradio.
-    """
-    # -------------------- resolve input paths -----------------------
-    if use_demo:
-        train_path, test_path = get_demo_paths()
-    else:
-        if train_csv is None or test_csv is None:
-            raise gr.Error("Upload BOTH training & test CSVs *or* tick the demo-data box.")
-        train_path, test_path = train_csv.name, test_csv.name
-    # ------------------------------------------------------------------
-    # Legacy-mode override for demo data
-    # ------------------------------------------------------------------
-    if use_demo:
-        run_full_grid = False   # ignore any mischievous client-side tweak
-        model = None            # guarantees legacy path (no --model)
-    out_file = TMP_DIR / f"scores_{uuid4().hex}.csv"
-    import atexit, functools
-    atexit.register(functools.partial(out_file.unlink, missing_ok=True))
-    # -------------------- translate filters -------------------------
-    filters = {}
-    tokens = filter_string.split()
-    if tokens and tokens[0] == "--filter":
-        tokens = tokens[1:]  # drop the flag if present
-    if tokens:
-        for tok in tokens:
-            if "=" not in tok:
-                raise gr.Error(f"Filter “{tok}” must look like key=value")
-            k, v = tok.split("=", 1)
-            filters[k] = v
-    # -------------------- invoke library with Gradio progress patch ---------------------------
-    import uci_phonotactic_calculator.progress as _p
-    _orig_progress = _p.progress  # keep to restore later
-    _p.progress = lambda enabled=True: _GradioProgressAdapter(
-        enabled=enabled and not hide_progress
-    )
-    try:
-        ngram_run(
-            train_file=train_path,
-            test_file=test_path,
-            output_file=str(out_file),
-            model=None if run_full_grid else model,
-            run_all=run_full_grid,
-            filters=filters,
-            show_progress=not hide_progress,   # still disables library chatter
-            extra_args=["-n", str(ngram_order)],
-        )
-    finally:
-        _p.progress = _orig_progress  # guarantee cleanup
-    df = pd.read_csv(out_file)
-    df_preview = df.head(50).iloc[:, :30]    # show only first 50 rows, 30 cols in UI
-    return df_preview, str(out_file)
-# ---------------------------------------------------------------------
-# Gradio UI
-# ---------------------------------------------------------------------
-with gr.Blocks(title="UCI Phonotactic Calculator") as demo:
-    gr.Markdown(
-        "## UCI Phonotactic Calculator\n"
-        "Upload training & test corpora – or pick the built-in English demo – "
-        "choose a model, and get probability scores."
-    )
-    with gr.Row():
-        with gr.Column():
-            train_in = gr.File(label="Training CSV")
-            test_in  = gr.File(label="Test CSV")
-            use_demo = gr.Checkbox(
-                label="Use packaged English demo data (16-col legacy mode)",
-                value=True,
-                info="Runs the original 2018 output format. Untick and upload your own data to use any model/grid."
-            )
-            model_dd = gr.Dropdown(
-                choices=sorted(PluginRegistry),
-                value="ngram",
-                label="Model plug-in"
-            )
-            # Hidden checkbox keeps the variable alive for go_btn.click;
-            # power-users can un-hide it in the inspector if they want.
-            run_grid = gr.Checkbox(visible=False, value=False, label="Run full variant grid")
-            n_slider = gr.Slider(1, 4, step=1, value=2, label="n-gram order")
-            with gr.Accordion("Advanced", open=False):
-                filt_txt = gr.Textbox(
-                    label="Filter (space-separated key=value …)",
-                    placeholder="example: weight_mode=raw prob_mode=joint"
-                )
-                hide_prog = gr.Checkbox(label="Hide progress indicator", value=False)
-            go_btn = gr.Button("Score")
-        with gr.Column():
-            out_df  = gr.Dataframe(label="Scores (preview)", interactive=False)
-            out_csv = gr.File(label="Download full CSV")
-    go_btn.click(
-        fn=score,
-        inputs=[train_in, test_in, model_dd, run_grid, n_slider,
-                use_demo, filt_txt, hide_prog],
-        outputs=[out_df, out_csv]
-    )
 if __name__ == "__main__":
-    demo.launch(share=False)

+# UCI Phonotactic Calculator Gradio UI for Hugging Face Spaces
+# This application demonstrates the UCI Phonotactic Calculator web interface
+# Import the web demo UI builder from the uci_phonotactic_calculator package
+from uci_phonotactic_calculator.web.web_demo import build_ui
+# Create the Gradio interface with default settings
+# The build_ui function configures a Gradio Blocks interface with:
+# - Input fields for training and test CSV files
+# - Model selection
+# - n-gram order selection
+# - Filtering options
+# - Results preview and download
+demo = build_ui()
+# Enable queuing for better performance with multiple users
+# This prevents the server from being overwhelmed by concurrent requests
+demo.queue()
+# Launch the web application
+# In Hugging Face Spaces, this will make the app available to users
 if __name__ == "__main__":
+    demo.launch()

requirements.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- uci-phonotactic-calculator[ui]>=0.2.2 # latest published wheel
2	gradio


1	+ uci-phonotactic-calculator[ui]>=0.2.3 # latest published wheel
2	gradio