Spaces:

Kalana001
/

SinCode

Running

Kalana commited on 8 days ago

Commit

5a42ed8

1 Parent(s): a31933e

Add correction mode with word-swap popovers, feedback system, and fix copy button

- Interactive word-click alternatives using st.popover (FR6)
- HITL feedback: save corrections to feedback.csv for future retraining
- Replace broken JS clipboard with native st.code copy button (FR5)
- Guard against duplicate feedback submissions
- Fix stale session state on new transliteration

Files changed (1) hide show

app.py +151 -43

app.py CHANGED Viewed

@@ -5,36 +5,55 @@ SinCode Web UI — Streamlit interface for the transliteration engine.
 import streamlit as st
 import time
 import os
 import base64
 from PIL import Image
 from sincode_model import BeamSearchDecoder
 st.set_page_config(page_title="සිංCode", page_icon="🇱🇰", layout="centered")
 # ─── Helpers ─────────────────────────────────────────────────────────────────
-def _set_background(image_file: str) -> None:
-    """Inject a dark-overlay background from a local image."""
     try:
         with open(image_file, "rb") as f:
             b64 = base64.b64encode(f.read()).decode()
-        st.markdown(
-            f"""
-            <style>
-            .stApp {{
-                background-image: linear-gradient(rgba(0,0,0,0.7), rgba(0,0,0,0.7)),
-                                  url(data:image/png;base64,{b64});
-                background-size: cover;
-                background-position: center;
-                background-attachment: fixed;
-            }}
-            </style>
-            """,
-            unsafe_allow_html=True,
         )
     except FileNotFoundError:
-        pass
 @st.cache_resource
@@ -52,7 +71,7 @@ def _load_decoder() -> BeamSearchDecoder:
 _set_background("images/background.png")
 with st.sidebar:
-    st.image(Image.open("images/SinCodeLogo.jpg"), width=200)
     st.title("සිංCode Project")
     st.info("Prototype")
@@ -99,34 +118,123 @@ if st.button("Transliterate", type="primary", use_container_width=True) and inpu
         with st.spinner("Processing..."):
             decoder = _load_decoder()
             t0 = time.time()
-            result, trace_logs = decoder.decode(input_text, mode=decode_mode)
             elapsed = time.time() - t0
-        st.success("Transliteration Complete")
-        st.markdown(f"### {result}")
-        col1, col2 = st.columns([3, 1])
-        with col1:
-            st.caption(f"Mode: {decode_mode} · Time: {round(elapsed, 2)}s")
-        with col2:
-            if st.button("📋 Copy", key="copy_result"):
-                st.session_state["copied"] = True
-        if st.session_state.get("copied"):
-            st.components.v1.html(
-                f"""<script>navigator.clipboard.writeText(`{result}`);</script>""",
-                height=0,
-            )
-            st.toast("Copied to clipboard!")
-            st.session_state["copied"] = False
-        with st.expander("Scoring Breakdown", expanded=True):
-            st.caption(
-                "MLM = contextual fit · Fid = transliteration fidelity · "
-                "Rank = dictionary prior · 🔤 = English"
-            )
-            for log in trace_logs:
-                st.markdown(log)
-                st.divider()
     except Exception as e:
         st.error(f"Error: {e}")

 import streamlit as st
 import time
 import os
+import csv
+import html as html_lib
 import base64
+from datetime import datetime
+from pathlib import Path
 from PIL import Image
 from sincode_model import BeamSearchDecoder
+FEEDBACK_FILE = Path("feedback.csv")
 st.set_page_config(page_title="සිංCode", page_icon="🇱🇰", layout="centered")
 # ─── Helpers ─────────────────────────────────────────────────────────────────
+@st.cache_data
+def _background_css(image_file: str) -> str:
+    """Return the CSS string for the background image (cached after first read)."""
     try:
         with open(image_file, "rb") as f:
             b64 = base64.b64encode(f.read()).decode()
+        return (
+            f"<style>.stApp {{background-image: linear-gradient(rgba(0,0,0,0.7),"
+            f"rgba(0,0,0,0.7)),url(data:image/png;base64,{b64});"
+            f"background-size:cover;background-position:center;"
+            f"background-attachment:fixed;}}</style>"
         )
     except FileNotFoundError:
+        return ""
+def _set_background(image_file: str) -> None:
+    css = _background_css(image_file)
+    if css:
+        st.markdown(css, unsafe_allow_html=True)
+@st.cache_data
+def _load_logo(image_file: str):
+    return Image.open(image_file)
+def _save_feedback(input_sentence: str, original_output: str, corrected_output: str) -> None:
+    """Append a full-sentence correction to the feedback CSV."""
+    with FEEDBACK_FILE.open("a", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        if f.tell() == 0:
+            writer.writerow(["timestamp", "input_sentence", "original_output", "corrected_output"])
+        writer.writerow([datetime.now().isoformat(), input_sentence, original_output, corrected_output])
 @st.cache_resource
 _set_background("images/background.png")
 with st.sidebar:
+    st.image(_load_logo("images/SinCodeLogo.jpg"), width=200)
     st.title("සිංCode Project")
     st.info("Prototype")
         with st.spinner("Processing..."):
             decoder = _load_decoder()
             t0 = time.time()
+            if decode_mode == "greedy":
+                result, trace_logs, diagnostics = decoder.greedy_decode_with_diagnostics(input_text)
+            else:
+                result, trace_logs, diagnostics = decoder.decode_with_diagnostics(input_text)
             elapsed = time.time() - t0
+        # Store results in session state for interactive word swapping
+        selected = [d.selected_candidate for d in diagnostics]
+        st.session_state["diagnostics"] = diagnostics
+        st.session_state["output_words"] = selected
+        st.session_state["original_words"] = list(selected)
+        st.session_state["input_sentence"] = input_text
+        st.session_state["trace_logs"] = trace_logs
+        st.session_state["elapsed"] = elapsed
+        st.session_state["correction_mode"] = False
+        st.session_state["correction_submitted_for"] = None
     except Exception as e:
         st.error(f"Error: {e}")
+# ─── Render output (persists across reruns for word swapping) ─────────────
+if "output_words" in st.session_state and st.session_state["output_words"]:
+    diagnostics = st.session_state["diagnostics"]
+    output_words = st.session_state["output_words"]
+    original_words = st.session_state.get("original_words", list(output_words))
+    trace_logs = st.session_state["trace_logs"]
+    elapsed = st.session_state["elapsed"]
+    current_result = " ".join(output_words)
+    original_result = " ".join(original_words)
+    has_changes = output_words != original_words
+    st.success("Transliteration Complete")
+    # Output display with native copy button (st.code has built-in clipboard support)
+    safe_display = html_lib.escape(current_result)
+    st.markdown(
+        f'<span style="font-size:1.4em;font-weight:700;">{safe_display}</span>',
+        unsafe_allow_html=True,
+    )
+    st.code(current_result, language=None)
+    st.caption(f"Mode: {decode_mode} · Time: {round(elapsed, 2)}s")
+    # ── Correction mode toggle ────────────────────────────────────────
+    correction_mode = st.toggle(
+        "Correct this translation",
+        value=st.session_state.get("correction_mode", False),
+        key="correction_toggle",
+    )
+    if correction_mode:
+        st.caption("Click any highlighted word to see alternatives and swap it.")
+        # Word chips in rows — only ambiguous words are interactive
+        ROW_SIZE = 6
+        for row_start in range(0, len(output_words), ROW_SIZE):
+            row_slice = list(enumerate(diagnostics[row_start:row_start + ROW_SIZE], start=row_start))
+            cols = st.columns(len(row_slice))
+            for col, (i, diag) in zip(cols, row_slice):
+                has_alts = len(diag.candidate_breakdown) > 1
+                was_changed = output_words[i] != original_words[i]
+                with col:
+                    if has_alts:
+                        chip = f":green[**{output_words[i]}**] :material/check:" if was_changed else f":blue[**{output_words[i]}**]"
+                        with st.popover(chip, use_container_width=True):
+                            st.markdown(f"**`{diag.input_word}`** — pick alternative:")
+                            for scored in diag.candidate_breakdown[:5]:
+                                eng_tag = " 🔤" if scored.is_english else ""
+                                is_sel = scored.text == output_words[i]
+                                if st.button(
+                                    f"{'✅ ' if is_sel else ''}{scored.text}{eng_tag}",
+                                    key=f"alt_{i}_{scored.text}",
+                                    help=f"Score: {scored.combined_score:.2f}",
+                                    use_container_width=True,
+                                    type="primary" if is_sel else "secondary",
+                                ):
+                                    st.session_state["output_words"][i] = scored.text
+                                    st.rerun()
+                            st.markdown("---")
+                            custom = st.text_input(
+                                "Not listed? Type correct word:",
+                                key=f"custom_{i}",
+                                placeholder="Type Sinhala word",
+                            )
+                            if custom and st.button("Use this", key=f"custom_apply_{i}", use_container_width=True):
+                                st.session_state["output_words"][i] = custom
+                                st.rerun()
+                    else:
+                        st.markdown(f"**{output_words[i]}**")
+        # ── Submit correction button (only when changes exist, once per result) ──
+        # Guard key: (original sentence, original output) — stable regardless of swaps
+        submit_key = (st.session_state["input_sentence"], original_result)
+        already_submitted = st.session_state.get("correction_submitted_for") == submit_key
+        if has_changes and not already_submitted:
+            st.info(f"**Original:** {original_result}\n\n**Corrected:** {current_result}")
+            if st.button("Submit Correction", type="primary", use_container_width=True):
+                _save_feedback(
+                    input_sentence=st.session_state["input_sentence"],
+                    original_output=original_result,
+                    corrected_output=current_result,
+                )
+                st.session_state["correction_submitted_for"] = submit_key
+                st.session_state["correction_mode"] = False
+                st.toast("Correction submitted — thank you!")
+                st.rerun()
+    # Show outside toggle so it remains visible after submission closes the toggle
+    input_sent = st.session_state.get("input_sentence", "")
+    if st.session_state.get("correction_submitted_for") == (input_sent, original_result):
+        st.success("Correction already submitted.")
+    with st.expander("Scoring Breakdown", expanded=False):
+        st.caption(
+            "MLM = contextual fit · Fid = transliteration fidelity · "
+            "Rank = dictionary prior · 🔤 = English"
+        )
+        st.markdown("\n\n---\n\n".join(trace_logs))