clementBE commited on
Commit
af98792
·
verified ·
1 Parent(s): 4741434

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -24
app.py CHANGED
@@ -1,49 +1,196 @@
1
  import gradio as gr
2
  import pandas as pd
 
3
 
4
- # Default codes
5
- CODES = ["Theme: Communication Barrier","Theme: Emotional Support"]
 
 
6
 
7
- # State
8
- coded_df_state = pd.DataFrame(columns=["Segment","Code"])
 
 
 
 
 
 
 
 
9
 
10
- # Build transcript HTML with JS to store selection
11
- def build_transcript_html(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  html = f"""
13
- <div id="transcript" style='white-space: pre-wrap; border:1px solid #ccc; padding:5px; max-height:400px; overflow:auto;'>{text}</div>
 
 
14
  <script>
15
- const transcript = document.getElementById("transcript");
16
- transcript.addEventListener("mouseup", function() {{
17
  const sel = window.getSelection().toString();
18
  if(sel.length>0){{
19
- document.querySelector("#selected_segment").value = sel;
 
 
 
 
 
20
  }}
21
  }});
22
  </script>
23
  """
24
  return html
25
 
26
- # Apply code to selected segment
27
- def apply_code(selected_segment, code, df):
28
- if not selected_segment or not code:
29
- return df, "⚠️ Select segment and code first"
30
- new_row = {"Segment": selected_segment, "Code": code}
 
 
 
31
  df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
32
- return df, f"✅ Segment coded as {code}"
 
 
 
 
 
 
 
 
 
 
33
 
34
- # Gradio interface
 
 
35
  with gr.Blocks() as demo:
36
- transcript_text = "This is a sample transcript. You can select any part of this text to code it."
37
 
 
 
 
 
 
 
 
38
  with gr.Row():
 
 
 
 
 
 
 
39
  with gr.Column(scale=3):
40
- transcript_html = gr.HTML(build_transcript_html(transcript_text))
 
 
 
 
41
  with gr.Column(scale=2):
42
- selected_segment = gr.Textbox(label="Selected Segment", interactive=False, elem_id="selected_segment")
43
- code_dropdown = gr.Dropdown(label="Select Code", choices=CODES)
44
- code_btn = gr.Button("Apply Code")
45
- coded_table = gr.Dataframe(headers=["Segment","Code"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- code_btn.click(apply_code, inputs=[selected_segment, code_dropdown, coded_table], outputs=[coded_table, gr.Textbox(label="Status")])
 
 
 
 
 
48
 
49
  demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import os
4
 
5
+ try:
6
+ import docx
7
+ except ImportError:
8
+ docx = None
9
 
10
+ # ------------------------------
11
+ # CONFIG
12
+ # ------------------------------
13
+ CODES = [
14
+ "Communication Barrier",
15
+ "Emotional Support",
16
+ "Future Aspirations",
17
+ "Financial Stress",
18
+ "Follow-up Needed",
19
+ ]
20
 
21
+ METADATA_FIELDS = {
22
+ "interview_id": "Interview ID",
23
+ "interview_date": "Interview Date",
24
+ "occupation": "Occupation",
25
+ "age": "Age",
26
+ }
27
+
28
+ COLOR_MAP = {
29
+ "Communication Barrier": "lightblue",
30
+ "Emotional Support": "lightgreen",
31
+ "Future Aspirations": "khaki",
32
+ "Financial Stress": "lightpink",
33
+ "Follow-up Needed": "orange",
34
+ }
35
+
36
+ # ------------------------------
37
+ # FILE PROCESSING
38
+ # ------------------------------
39
+ def read_docx(path):
40
+ if not docx:
41
+ return "Error: python-docx not installed."
42
+ d = docx.Document(path)
43
+ return "\n".join([p.text for p in d.paragraphs])
44
+
45
+ def read_vtt(path):
46
+ with open(path, "r", encoding="utf-8") as f:
47
+ lines = f.read().split("\n")
48
+ cleaned = [
49
+ l.strip()
50
+ for l in lines
51
+ if l and "WEBVTT" not in l and "-->" not in l and not l.strip().isdigit()
52
+ ]
53
+ return " ".join(cleaned)
54
+
55
+ def get_empty_df():
56
+ return pd.DataFrame(
57
+ columns=["File ID", "Coded Segment", "Code"] + list(METADATA_FIELDS.keys())
58
+ )
59
+
60
+ def process_file(file_obj):
61
+ if file_obj is None:
62
+ return "", "", get_empty_df()
63
+ path = file_obj.name
64
+ name = os.path.basename(path)
65
+ if name.lower().endswith(".docx"):
66
+ text = read_docx(path)
67
+ elif name.lower().endswith(".vtt"):
68
+ text = read_vtt(path)
69
+ else:
70
+ with open(path, "r", encoding="utf-8") as f:
71
+ text = f.read()
72
+ return text, name, get_empty_df()
73
+
74
+ # ------------------------------
75
+ # BUILD TRANSCRIPT HTML
76
+ # ------------------------------
77
+ def build_transcript_html(text, df):
78
+ display_text = text
79
+ if df is not None and not df.empty:
80
+ for _, row in df.iterrows():
81
+ seg = row["Coded Segment"]
82
+ color = COLOR_MAP.get(row["Code"], "yellow")
83
+ display_text = display_text.replace(seg, f"<span style='background-color:{color}'>{seg}</span>", 1)
84
+ safe_text = display_text.replace("\n", "<br>")
85
  html = f"""
86
+ <div id='transcript' style='white-space: pre-wrap; font-size:16px; line-height:1.5; max-height:600px; overflow:auto; border:1px solid #ccc; padding:5px;'>
87
+ {safe_text}
88
+ </div>
89
  <script>
90
+ const transcript = document.getElementById('transcript');
91
+ transcript.addEventListener('mouseup', function() {{
92
  const sel = window.getSelection().toString();
93
  if(sel.length>0){{
94
+ // store in hidden input
95
+ const state_input = document.querySelector('#selected_segment_state');
96
+ if(state_input) {{
97
+ state_input.value = sel;
98
+ state_input.dispatchEvent(new Event("input",{ {bubbles:true} }));
99
+ }}
100
  }}
101
  }});
102
  </script>
103
  """
104
  return html
105
 
106
+ # ------------------------------
107
+ # APPLY CODE
108
+ # ------------------------------
109
+ def apply_code(df, segment, code, file_id, *metadata_values):
110
+ if not segment or not code or not file_id:
111
+ return df, "⚠️ Select text and file first"
112
+ meta_dict = dict(zip(METADATA_FIELDS.keys(), metadata_values))
113
+ new_row = {"File ID": file_id, "Coded Segment": segment, "Code": code, **meta_dict}
114
  df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
115
+ return df, f"✅ Segment coded as '{code}'"
116
+
117
+ # ------------------------------
118
+ # EXPORT XLSX
119
+ # ------------------------------
120
+ def export_excel(df):
121
+ if df.empty:
122
+ return None, "Nothing to export"
123
+ path = "coded_segments.xlsx"
124
+ df.to_excel(path, index=False)
125
+ return path, "Excel ready"
126
 
127
+ # ------------------------------
128
+ # GRADIO UI
129
+ # ------------------------------
130
  with gr.Blocks() as demo:
 
131
 
132
+ # States
133
+ full_text = gr.State("")
134
+ file_id = gr.State("")
135
+ coded_df_state = gr.State(get_empty_df())
136
+ selected_segment_state = gr.State("")
137
+
138
+ # ---------------- Metadata Top ----------------
139
  with gr.Row():
140
+ metadata_inputs = []
141
+ for k,lbl in METADATA_FIELDS.items():
142
+ metadata_inputs.append(gr.Textbox(label=lbl))
143
+
144
+ # ---------------- Transcript + Coding ----------------
145
+ with gr.Row():
146
+ # Left: transcript
147
  with gr.Column(scale=3):
148
+ transcript_html = gr.HTML()
149
+ # Hidden state to store selected segment
150
+ selected_segment = gr.Textbox(label="Selected segment (hidden)", interactive=False, visible=False, elem_id="selected_segment_state")
151
+
152
+ # Right: code buttons + table
153
  with gr.Column(scale=2):
154
+ gr.Markdown("## 🏷️ Code Categories")
155
+ code_buttons = []
156
+ for c in CODES:
157
+ btn = gr.Button(c)
158
+ code_buttons.append(btn)
159
+ gr.Markdown("## 📊 Coded Segments")
160
+ table = gr.Dataframe(interactive=False)
161
+
162
+ export_btn = gr.Button("Export XLSX")
163
+ export_file = gr.File(visible=False)
164
+
165
+ file_input = gr.File(label="Upload transcript", file_types=[".docx",".vtt",".txt"])
166
+ status = gr.Textbox(label="Status", value="Ready")
167
+
168
+ # ---------------- Callbacks ----------------
169
+ # Load file
170
+ file_input.change(fn=process_file, inputs=file_input, outputs=[full_text, file_id, coded_df_state])
171
+
172
+ # Update transcript HTML
173
+ def update_transcript(text, df):
174
+ return build_transcript_html(text, df)
175
+ full_text.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
176
+ coded_df_state.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
177
+
178
+ # Apply code buttons
179
+ for btn, code_name in zip(code_buttons, CODES):
180
+ btn.click(
181
+ apply_code,
182
+ inputs=[coded_df_state, selected_segment_state, gr.State(code_name), file_id] + metadata_inputs,
183
+ outputs=[coded_df_state, status]
184
+ )
185
+
186
+ # Update table
187
+ coded_df_state.change(lambda df: df, inputs=coded_df_state, outputs=table)
188
 
189
+ # Export
190
+ export_btn.click(export_excel, inputs=coded_df_state, outputs=[export_file, status]).then(
191
+ lambda f: gr.update(visible=f is not None),
192
+ inputs=export_file,
193
+ outputs=export_file
194
+ )
195
 
196
  demo.launch()