Spaces:

clementBE
/

transcrib_coder

Sleeping

App Files Files Community

clementBE commited on Nov 25, 2025

Commit

97ab6fa

verified ·

1 Parent(s): 70f6e49

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -52

app.py CHANGED Viewed

@@ -7,9 +7,6 @@ try:
 except ImportError:
     docx = None
-# ------------------------------
-# CONFIG
-# ------------------------------
 DEFAULT_CODES = [
     "Communication Barrier",
     "Emotional Support",
@@ -33,15 +30,14 @@ COLOR_MAP = {
     "Follow-up Needed": "orange",
 }
-# ------------------------------
-# FILE PROCESSING
-# ------------------------------
 def read_docx(path):
     if not docx:
         return "Error: python-docx not installed."
     d = docx.Document(path)
     return "\n".join([p.text for p in d.paragraphs])
 def read_vtt(path):
     with open(path, "r", encoding="utf-8") as f:
         lines = f.read().split("\n")
@@ -52,11 +48,13 @@ def read_vtt(path):
     ]
     return " ".join(cleaned)
 def get_empty_df():
     return pd.DataFrame(
         columns=["File ID", "Coded Segment", "Code"] + list(METADATA_FIELDS.keys())
     )
 def process_file(file_obj):
     if file_obj is None:
         return "", "", get_empty_df()
@@ -71,40 +69,40 @@ def process_file(file_obj):
             text = f.read()
     return text, name, get_empty_df()
-# ------------------------------
-# BUILD TRANSCRIPT HTML
-# ------------------------------
 def build_transcript_html(text, df):
     display_text = text
     if df is not None and not df.empty:
         for _, row in df.iterrows():
             seg = row["Coded Segment"]
             color = COLOR_MAP.get(row["Code"], "yellow")
-            display_text = display_text.replace(seg, f"<span style='background-color:{color}'>{seg}</span>", 1)
     safe_text = display_text.replace("\n", "<br>")
     html = f"""
     <div id='transcript' style='white-space: pre-wrap; font-size:16px; line-height:1.5; max-height:600px; overflow:auto; border:1px solid #ccc; padding:5px;'>
         {safe_text}
     </div>
     <script>
-    const transcript = document.getElementById('transcript');
-    transcript.addEventListener('mouseup', function() {{
-        const sel = window.getSelection().toString();
-        if(sel.length>0){{
-            const state_input = document.querySelector('#selected_segment_state');
-            if(state_input) {{
-                state_input.value = sel;
-                state_input.dispatchEvent(new Event("input", {{bubbles:true}}));
             }}
-        }}
     }});
     </script>
     """
     return html
-# ------------------------------
-# APPLY CODE
-# ------------------------------
 def apply_code(df, segment, code, file_id, *metadata_values):
     if not segment or not code or not file_id:
         return df, "⚠️ Select text and file first"
@@ -113,17 +111,13 @@ def apply_code(df, segment, code, file_id, *metadata_values):
     df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
     return df, f"✅ Segment coded as '{code}'"
-# ------------------------------
-# ADD NEW CODE TO DROPDOWN
-# ------------------------------
 def add_new_code(new_code, code_list):
     if new_code and new_code not in code_list:
         code_list.append(new_code)
     return code_list
-# ------------------------------
-# EXPORT XLSX
-# ------------------------------
 def export_excel(df):
     if df.empty:
         return None, "Nothing to export"
@@ -131,11 +125,8 @@ def export_excel(df):
     df.to_excel(path, index=False)
     return path, "Excel ready"
-# ------------------------------
-# GRADIO UI
-# ------------------------------
-with gr.Blocks() as demo:
     # States
     full_text = gr.State("")
     file_id = gr.State("")
@@ -143,20 +134,22 @@ with gr.Blocks() as demo:
     selected_segment_state = gr.State("")
     code_categories_state = gr.State(DEFAULT_CODES)
-    # ---------------- Metadata ----------------
     with gr.Row():
         metadata_inputs = []
-        for k,lbl in METADATA_FIELDS.items():
             metadata_inputs.append(gr.Textbox(label=lbl))
-    # ---------------- Transcript + Coding ----------------
     with gr.Row():
-        # Left
         with gr.Column(scale=3):
             transcript_html = gr.HTML()
-            hidden_segment = gr.Textbox(interactive=False, visible=False, elem_id="selected_segment_state")
-        # Right
         with gr.Column(scale=2):
             gr.Markdown("## 🏷️ Code Category")
             code_dropdown = gr.Dropdown(label="Select code", choices=DEFAULT_CODES)
@@ -171,33 +164,48 @@ with gr.Blocks() as demo:
             export_btn = gr.Button("Export XLSX")
             export_file = gr.File(visible=False)
-            file_input = gr.File(label="Upload transcript", file_types=[".docx",".vtt",".txt"])
             status = gr.Textbox(label="Status", value="Ready")
-    # ---------------- Callbacks ----------------
-    file_input.change(fn=process_file, inputs=file_input, outputs=[full_text, file_id, coded_df_state])
-    # Update transcript HTML
     def update_transcript(text, df):
         return build_transcript_html(text, df)
-    full_text.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
-    coded_df_state.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
-    # Add new code
-    add_code_btn.click(add_new_code, inputs=[code_input, code_categories_state], outputs=[code_categories_state])
-    code_categories_state.change(lambda codes: gr.update(choices=codes), inputs=code_categories_state, outputs=code_dropdown)
-    # Apply code
-    apply_btn.click(apply_code, inputs=[coded_df_state, selected_segment_state, code_dropdown, file_id] + metadata_inputs, outputs=[coded_df_state, status])
-    # Update table
     coded_df_state.change(lambda df: df, inputs=coded_df_state, outputs=table)
-    # Export
     export_btn.click(export_excel, inputs=coded_df_state, outputs=[export_file, status]).then(
         lambda f: gr.update(visible=f is not None),
         inputs=export_file,
-        outputs=export_file
     )
 demo.launch()

 except ImportError:
     docx = None
 DEFAULT_CODES = [
     "Communication Barrier",
     "Emotional Support",
     "Follow-up Needed": "orange",
 }
 def read_docx(path):
     if not docx:
         return "Error: python-docx not installed."
     d = docx.Document(path)
     return "\n".join([p.text for p in d.paragraphs])
 def read_vtt(path):
     with open(path, "r", encoding="utf-8") as f:
         lines = f.read().split("\n")
     ]
     return " ".join(cleaned)
 def get_empty_df():
     return pd.DataFrame(
         columns=["File ID", "Coded Segment", "Code"] + list(METADATA_FIELDS.keys())
     )
 def process_file(file_obj):
     if file_obj is None:
         return "", "", get_empty_df()
             text = f.read()
     return text, name, get_empty_df()
 def build_transcript_html(text, df):
     display_text = text
     if df is not None and not df.empty:
         for _, row in df.iterrows():
             seg = row["Coded Segment"]
             color = COLOR_MAP.get(row["Code"], "yellow")
+            display_text = display_text.replace(
+                seg, f"<span style='background-color:{color}'>{seg}</span>", 1
+            )
     safe_text = display_text.replace("\n", "<br>")
     html = f"""
     <div id='transcript' style='white-space: pre-wrap; font-size:16px; line-height:1.5; max-height:600px; overflow:auto; border:1px solid #ccc; padding:5px;'>
         {safe_text}
     </div>
     <script>
+    document.addEventListener('DOMContentLoaded', function() {{
+        const transcript = document.getElementById('transcript');
+        transcript.addEventListener('mouseup', function() {{
+            const sel = window.getSelection().toString();
+            if(sel.length>0){{
+                const state_input = document.getElementById('selected_segment_state');
+                if(state_input) {{
+                    state_input.value = sel;
+                    state_input.dispatchEvent(new Event("input", {{bubbles:true}}));
+                }}
             }}
+        }});
     }});
     </script>
     """
     return html
 def apply_code(df, segment, code, file_id, *metadata_values):
     if not segment or not code or not file_id:
         return df, "⚠️ Select text and file first"
     df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
     return df, f"✅ Segment coded as '{code}'"
 def add_new_code(new_code, code_list):
     if new_code and new_code not in code_list:
         code_list.append(new_code)
     return code_list
 def export_excel(df):
     if df.empty:
         return None, "Nothing to export"
     df.to_excel(path, index=False)
     return path, "Excel ready"
+with gr.Blocks() as demo:
     # States
     full_text = gr.State("")
     file_id = gr.State("")
     selected_segment_state = gr.State("")
     code_categories_state = gr.State(DEFAULT_CODES)
+    # Metadata
     with gr.Row():
         metadata_inputs = []
+        for k, lbl in METADATA_FIELDS.items():
             metadata_inputs.append(gr.Textbox(label=lbl))
+    # Transcript + coding
     with gr.Row():
+        # Left: transcript
         with gr.Column(scale=3):
             transcript_html = gr.HTML()
+            hidden_segment = gr.Textbox(
+                interactive=False, visible=False, elem_id="selected_segment_state"
+            )
+        # Right: controls
         with gr.Column(scale=2):
             gr.Markdown("## 🏷️ Code Category")
             code_dropdown = gr.Dropdown(label="Select code", choices=DEFAULT_CODES)
             export_btn = gr.Button("Export XLSX")
             export_file = gr.File(visible=False)
+            file_input = gr.File(
+                label="Upload transcript", file_types=[".docx", ".vtt", ".txt"]
+            )
             status = gr.Textbox(label="Status", value="Ready")
+    # Callbacks
+    file_input.change(
+        fn=process_file, inputs=file_input, outputs=[full_text, file_id, coded_df_state]
+    )
     def update_transcript(text, df):
         return build_transcript_html(text, df)
+    full_text.change(
+        update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html
+    )
+    coded_df_state.change(
+        update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html
+    )
+    add_code_btn.click(
+        add_new_code, inputs=[code_input, code_categories_state], outputs=[code_categories_state]
+    )
+    code_categories_state.change(
+        lambda codes: gr.update(choices=codes),
+        inputs=code_categories_state,
+        outputs=code_dropdown,
+    )
+    apply_btn.click(
+        apply_code,
+        inputs=[coded_df_state, selected_segment_state, code_dropdown, file_id]
+        + metadata_inputs,
+        outputs=[coded_df_state, status],
+    )
     coded_df_state.change(lambda df: df, inputs=coded_df_state, outputs=table)
     export_btn.click(export_excel, inputs=coded_df_state, outputs=[export_file, status]).then(
         lambda f: gr.update(visible=f is not None),
         inputs=export_file,
+        outputs=export_file,
     )
 demo.launch()