clementBE commited on
Commit
b9fd23f
ยท
verified ยท
1 Parent(s): c3119af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -60
app.py CHANGED
@@ -75,52 +75,72 @@ def process_file(file_obj):
75
  return text, name, get_empty_df()
76
 
77
  # ------------------------------
78
- # BUILD HIGHLIGHTED TEXT
79
- # ------------------------------
80
- def build_highlighted_text(text, coded_df):
81
- entities = []
 
 
 
 
82
  if coded_df is not None and not coded_df.empty:
 
83
  for _, row in coded_df.iterrows():
84
- segment = row["Coded Segment"]
85
- start = text.find(segment)
86
- if start != -1:
87
- end = start + len(segment)
88
- entities.append({"start": start, "end": end, "label": row["Code"]})
89
- return {"text": text, "entities": entities}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # ------------------------------
92
  # APPLY CODE
93
  # ------------------------------
94
- def apply_code(coded_df, file_id, full_text, selected_segment, code, *metadata_values):
95
- if not selected_segment:
96
- return coded_df, "Select a segment first."
97
  if not code:
98
- return coded_df, "Select a code first."
99
  if not file_id:
100
- return coded_df, "Upload a file first."
101
 
102
  meta_dict = dict(zip(METADATA_FIELDS.keys(), metadata_values))
103
-
104
- # extract context
105
  context = "Context unavailable"
106
  try:
107
  n_full = " ".join(full_text.split())
108
- n_seg = " ".join(selected_segment.split())
109
  idx = n_full.index(n_seg)
110
- context = "..." + n_full[max(0, idx - 100):idx]
111
  except:
112
  pass
113
 
114
  new_row = {
115
  "File ID": file_id,
 
116
  "Code": code,
117
- "Coded Segment": selected_segment,
118
  "Context (100 chars)": context,
119
- **meta_dict,
120
  }
121
-
122
  new_df = pd.concat([coded_df, pd.DataFrame([new_row])], ignore_index=True)
123
- return new_df, f"Segment coded as '{code}'!"
124
 
125
  # ------------------------------
126
  # EXPORT XLSX
@@ -137,64 +157,56 @@ def export_excel(df):
137
  # ------------------------------
138
  with gr.Blocks(title="Interactive Qualitative Coding") as demo:
139
  gr.Markdown("# ๐Ÿ“‘ Interactive Qualitative Coding Tool")
140
- gr.Markdown("Click or highlight segments in the transcript to code them.")
141
 
142
  # states
143
  file_id = gr.State("")
144
  full_text = gr.State("")
145
  coded_df_state = gr.State(get_empty_df())
146
 
147
- # ---------------- FILE UPLOAD ----------------
148
  with gr.Row():
149
- file_input = gr.File(label="Upload transcript (.docx, .vtt, .txt)", file_types=[".docx", ".vtt", ".txt"])
150
- status = gr.Textbox(label="Status", value="Ready")
 
 
 
 
 
151
 
152
- file_input.change(
153
- fn=process_file,
154
- inputs=file_input,
155
- outputs=[full_text, file_id, coded_df_state]
156
- )
157
 
158
- # ---------------- METADATA ----------------
159
- metadata_inputs = []
160
- with gr.Row():
161
- for k, lbl in METADATA_FIELDS.items():
162
- metadata_inputs.append(gr.Textbox(label=lbl))
163
 
164
- # ---------------- HIGHLIGHTED TRANSCRIPT ----------------
165
- gr.Markdown("## ๐Ÿ“– Transcript (click or highlight segments)")
166
- highlighted_text = gr.HighlightedText(value={"text": "", "entities": []}, label="Transcript")
167
 
168
- # update highlighted text when full_text or coded_df_state changes
169
- def update_highlighted(text, df):
170
- return build_highlighted_text(text, df)
171
 
172
- full_text.change(update_highlighted, inputs=[full_text, coded_df_state], outputs=highlighted_text)
173
- coded_df_state.change(update_highlighted, inputs=[full_text, coded_df_state], outputs=highlighted_text)
174
 
175
- # clicking/highlighting text sets segment box
176
- segment_box = gr.Textbox(label="Selected Segment", placeholder="Selected segment appears here", lines=3)
177
- highlighted_text.select(lambda x: x, inputs=highlighted_text, outputs=segment_box)
178
 
179
- # ---------------- CODING ----------------
180
- code_dropdown = gr.Dropdown(label="Select Code", choices=DEFAULT_CODES)
181
- code_btn = gr.Button("Apply Code")
 
182
  code_btn.click(
183
  fn=apply_code,
184
  inputs=[coded_df_state, file_id, full_text, segment_box, code_dropdown] + metadata_inputs,
185
  outputs=[coded_df_state, status]
186
  )
187
-
188
- # ---------------- CODED DATA ----------------
189
- gr.Markdown("## ๐Ÿ“Š Coded Segments")
190
- table = gr.Dataframe(interactive=False)
191
  coded_df_state.change(lambda x: x, inputs=coded_df_state, outputs=table)
192
 
193
- # ---------------- EXPORT ----------------
194
- with gr.Row():
195
- export_btn = gr.Button("Export XLSX")
196
- export_file = gr.File(visible=False)
197
-
198
  export_btn.click(
199
  export_excel,
200
  inputs=coded_df_state,
 
75
  return text, name, get_empty_df()
76
 
77
  # ------------------------------
78
+ # BUILD HTML TRANSCRIPT
79
+ # ------------------------------
80
+ def build_html_transcript(text, coded_df):
81
+ """
82
+ Return HTML with optional colored highlights for coded segments.
83
+ Also inject JS to auto-fill segment box when user highlights text.
84
+ """
85
+ display_text = text
86
  if coded_df is not None and not coded_df.empty:
87
+ # sort by start index to avoid overlapping HTML issues
88
  for _, row in coded_df.iterrows():
89
+ seg = row["Coded Segment"]
90
+ color = COLOR_MAP.get(row["Code"], "yellow")
91
+ display_text = display_text.replace(seg, f"<span style='background-color:{color}'>{seg}</span>", 1)
92
+
93
+ # HTML with JS
94
+ html = f"""
95
+ <div style='white-space: pre-wrap; font-size: 16px; line-height:1.5; max-height:600px; overflow:auto; border:1px solid #ccc; padding:5px;' id='transcript'>
96
+ {display_text.replace('\n','<br>')}
97
+ </div>
98
+ <script>
99
+ const transcript = document.getElementById('transcript');
100
+ transcript.addEventListener('mouseup', function() {{
101
+ const selection = window.getSelection().toString();
102
+ if(selection.length>0){{
103
+ const tb = document.querySelector('textarea[placeholder="Selected segment"]');
104
+ if(tb) {{
105
+ tb.value = selection;
106
+ tb.dispatchEvent(new Event("input", {{bubbles:true}}));
107
+ }}
108
+ }}
109
+ }});
110
+ </script>
111
+ """
112
+ return html
113
 
114
  # ------------------------------
115
  # APPLY CODE
116
  # ------------------------------
117
+ def apply_code(coded_df, file_id, full_text, segment, code, *metadata_values):
118
+ if not segment:
119
+ return coded_df, "โš ๏ธ Select a segment first."
120
  if not code:
121
+ return coded_df, "โš ๏ธ Select a code first."
122
  if not file_id:
123
+ return coded_df, "โš ๏ธ Upload a file first."
124
 
125
  meta_dict = dict(zip(METADATA_FIELDS.keys(), metadata_values))
 
 
126
  context = "Context unavailable"
127
  try:
128
  n_full = " ".join(full_text.split())
129
+ n_seg = " ".join(segment.split())
130
  idx = n_full.index(n_seg)
131
+ context = "..." + n_full[max(0, idx-100):idx]
132
  except:
133
  pass
134
 
135
  new_row = {
136
  "File ID": file_id,
137
+ "Coded Segment": segment,
138
  "Code": code,
 
139
  "Context (100 chars)": context,
140
+ **meta_dict
141
  }
 
142
  new_df = pd.concat([coded_df, pd.DataFrame([new_row])], ignore_index=True)
143
+ return new_df, f"โœ… Segment coded as '{code}'!"
144
 
145
  # ------------------------------
146
  # EXPORT XLSX
 
157
  # ------------------------------
158
  with gr.Blocks(title="Interactive Qualitative Coding") as demo:
159
  gr.Markdown("# ๐Ÿ“‘ Interactive Qualitative Coding Tool")
160
+ gr.Markdown("Select text in the transcript (left) to auto-fill coding tools (right).")
161
 
162
  # states
163
  file_id = gr.State("")
164
  full_text = gr.State("")
165
  coded_df_state = gr.State(get_empty_df())
166
 
167
+ # ---------------- ROW LAYOUT ----------------
168
  with gr.Row():
169
+ # LEFT: transcript
170
+ with gr.Column(scale=3):
171
+ transcript_html = gr.HTML()
172
+ # RIGHT: coding tools
173
+ with gr.Column(scale=2):
174
+ file_input = gr.File(label="Upload transcript (.docx, .vtt, .txt)", file_types=[".docx",".vtt",".txt"])
175
+ status = gr.Textbox(label="Status", value="Ready")
176
 
177
+ gr.Markdown("## ๐Ÿ“ Metadata")
178
+ metadata_inputs = []
179
+ for k, lbl in METADATA_FIELDS.items():
180
+ metadata_inputs.append(gr.Textbox(label=lbl))
 
181
 
182
+ segment_box = gr.Textbox(label="Selected Segment", placeholder="Selected segment", lines=3)
183
+ code_dropdown = gr.Dropdown(label="Select Code", choices=DEFAULT_CODES)
184
+ code_btn = gr.Button("Apply Code")
 
 
185
 
186
+ gr.Markdown("## ๐Ÿ“Š Coded Segments")
187
+ table = gr.Dataframe(interactive=False)
 
188
 
189
+ export_btn = gr.Button("Export XLSX")
190
+ export_file = gr.File(visible=False)
 
191
 
192
+ # ---------------- CALLBACKS ----------------
193
+ file_input.change(fn=process_file, inputs=file_input, outputs=[full_text, file_id, coded_df_state])
194
 
195
+ def update_transcript(text, df):
196
+ return build_html_transcript(text, df)
 
197
 
198
+ full_text.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
199
+ coded_df_state.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
200
+
201
+ # Apply code
202
  code_btn.click(
203
  fn=apply_code,
204
  inputs=[coded_df_state, file_id, full_text, segment_box, code_dropdown] + metadata_inputs,
205
  outputs=[coded_df_state, status]
206
  )
 
 
 
 
207
  coded_df_state.change(lambda x: x, inputs=coded_df_state, outputs=table)
208
 
209
+ # Export
 
 
 
 
210
  export_btn.click(
211
  export_excel,
212
  inputs=coded_df_state,