import pandas as pd import gradio as gr CSV_PATH = "./judged-fast-accurate-downsampled.csv" def load_csv_from_path(path: str): try: df = pd.read_csv(path) return df, "" except Exception as e: return None, f"Failed to read CSV at {path}: {e}" def filter_rows(df, mode, similar, text_filter, max_rows): if df is None: return "No data loaded. Click Reload.", "", None if df.empty: return "No data in CSV.", "", None # Expected columns from judged-fast-accurate CSV required = [ "frame_idx", "t_sec", "truth_text", "fast_text", "fast_similar", "fast_score", "fast_reason", "accurate_text", "accurate_similar", "accurate_score", "accurate_reason", ] missing = [c for c in required if c not in df.columns] if missing: return f"Missing columns: {missing}", "", None df["fast_similar"] = df["fast_similar"].astype(str) df["accurate_similar"] = df["accurate_similar"].astype(str) if mode != "both": if mode == "fast": df = df[df["fast_text"].notna()] else: df = df[df["accurate_text"].notna()] if similar != "all": val = "True" if similar == "true" else "False" df = df[(df["fast_similar"] == val) | (df["accurate_similar"] == val)] if text_filter: t = text_filter.lower() cols = [ "truth_text", "fast_text", "fast_reason", "accurate_text", "accurate_reason", ] mask = df[cols].fillna("").apply(lambda x: x.str.lower().str.contains(t)) df = df[mask.any(axis=1)] df = df.copy() # Pretty display columns df["fast"] = ( "similar=" + df["fast_similar"].fillna("") + " score=" + df["fast_score"].fillna("").astype(str) + " | " + df["fast_text"].fillna("") + " | reason: " + df["fast_reason"].fillna("") ) df["accurate"] = ( "similar=" + df["accurate_similar"].fillna("") + " score=" + df["accurate_score"].fillna("").astype(str) + " | " + df["accurate_text"].fillna("") + " | reason: " + df["accurate_reason"].fillna("") ) display_cols = ["frame_idx", "t_sec", "truth_text", "fast", "accurate"] subset = df[display_cols].head(max_rows) summary = ( f"Rows: {len(df)} | " f"fast similar: {sum(df['fast_similar']=='True')} | " f"accurate similar: {sum(df['accurate_similar']=='True')}" ) return "", summary, subset def run_all(path, mode, similar, text_filter, max_rows): df, err = load_csv_from_path(path) if err: return err, "", None return filter_rows(df, mode, similar, text_filter, max_rows) with gr.Blocks() as demo: gr.Markdown("# OCR Judge Viewer") with gr.Row(): path_in = gr.Textbox(value=CSV_PATH, label="CSV path", interactive=True) reload_btn = gr.Button("Reload") mode = gr.Dropdown(["both", "fast", "accurate"], value="both", label="Mode") similar = gr.Dropdown(["all", "true", "false"], value="all", label="similar flag") text_filter = gr.Textbox(label="Search text", placeholder="search in truth/reasons") max_rows = gr.Slider(10, 500, value=100, step=10, label="Max rows") err_box = gr.Markdown() summary_box = gr.Markdown() table = gr.Dataframe(wrap=True) reload_btn.click(run_all, [path_in, mode, similar, text_filter, max_rows], [err_box, summary_box, table]) mode.change(run_all, [path_in, mode, similar, text_filter, max_rows], [err_box, summary_box, table]) similar.change(run_all, [path_in, mode, similar, text_filter, max_rows], [err_box, summary_box, table]) text_filter.change(run_all, [path_in, mode, similar, text_filter, max_rows], [err_box, summary_box, table]) max_rows.change(run_all, [path_in, mode, similar, text_filter, max_rows], [err_box, summary_box, table]) demo.load(run_all, [path_in, mode, similar, text_filter, max_rows], [err_box, summary_box, table]) if __name__ == "__main__": demo.queue().launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False, show_error=True)