clementBE commited on
Commit
97ab6fa
·
verified ·
1 Parent(s): 70f6e49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -52
app.py CHANGED
@@ -7,9 +7,6 @@ try:
7
  except ImportError:
8
  docx = None
9
 
10
- # ------------------------------
11
- # CONFIG
12
- # ------------------------------
13
  DEFAULT_CODES = [
14
  "Communication Barrier",
15
  "Emotional Support",
@@ -33,15 +30,14 @@ COLOR_MAP = {
33
  "Follow-up Needed": "orange",
34
  }
35
 
36
- # ------------------------------
37
- # FILE PROCESSING
38
- # ------------------------------
39
  def read_docx(path):
40
  if not docx:
41
  return "Error: python-docx not installed."
42
  d = docx.Document(path)
43
  return "\n".join([p.text for p in d.paragraphs])
44
 
 
45
  def read_vtt(path):
46
  with open(path, "r", encoding="utf-8") as f:
47
  lines = f.read().split("\n")
@@ -52,11 +48,13 @@ def read_vtt(path):
52
  ]
53
  return " ".join(cleaned)
54
 
 
55
  def get_empty_df():
56
  return pd.DataFrame(
57
  columns=["File ID", "Coded Segment", "Code"] + list(METADATA_FIELDS.keys())
58
  )
59
 
 
60
  def process_file(file_obj):
61
  if file_obj is None:
62
  return "", "", get_empty_df()
@@ -71,40 +69,40 @@ def process_file(file_obj):
71
  text = f.read()
72
  return text, name, get_empty_df()
73
 
74
- # ------------------------------
75
- # BUILD TRANSCRIPT HTML
76
- # ------------------------------
77
  def build_transcript_html(text, df):
78
  display_text = text
79
  if df is not None and not df.empty:
80
  for _, row in df.iterrows():
81
  seg = row["Coded Segment"]
82
  color = COLOR_MAP.get(row["Code"], "yellow")
83
- display_text = display_text.replace(seg, f"<span style='background-color:{color}'>{seg}</span>", 1)
 
 
84
  safe_text = display_text.replace("\n", "<br>")
85
  html = f"""
86
  <div id='transcript' style='white-space: pre-wrap; font-size:16px; line-height:1.5; max-height:600px; overflow:auto; border:1px solid #ccc; padding:5px;'>
87
  {safe_text}
88
  </div>
89
  <script>
90
- const transcript = document.getElementById('transcript');
91
- transcript.addEventListener('mouseup', function() {{
92
- const sel = window.getSelection().toString();
93
- if(sel.length>0){{
94
- const state_input = document.querySelector('#selected_segment_state');
95
- if(state_input) {{
96
- state_input.value = sel;
97
- state_input.dispatchEvent(new Event("input", {{bubbles:true}}));
 
 
98
  }}
99
- }}
100
  }});
101
  </script>
102
  """
103
  return html
104
 
105
- # ------------------------------
106
- # APPLY CODE
107
- # ------------------------------
108
  def apply_code(df, segment, code, file_id, *metadata_values):
109
  if not segment or not code or not file_id:
110
  return df, "⚠️ Select text and file first"
@@ -113,17 +111,13 @@ def apply_code(df, segment, code, file_id, *metadata_values):
113
  df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
114
  return df, f"✅ Segment coded as '{code}'"
115
 
116
- # ------------------------------
117
- # ADD NEW CODE TO DROPDOWN
118
- # ------------------------------
119
  def add_new_code(new_code, code_list):
120
  if new_code and new_code not in code_list:
121
  code_list.append(new_code)
122
  return code_list
123
 
124
- # ------------------------------
125
- # EXPORT XLSX
126
- # ------------------------------
127
  def export_excel(df):
128
  if df.empty:
129
  return None, "Nothing to export"
@@ -131,11 +125,8 @@ def export_excel(df):
131
  df.to_excel(path, index=False)
132
  return path, "Excel ready"
133
 
134
- # ------------------------------
135
- # GRADIO UI
136
- # ------------------------------
137
- with gr.Blocks() as demo:
138
 
 
139
  # States
140
  full_text = gr.State("")
141
  file_id = gr.State("")
@@ -143,20 +134,22 @@ with gr.Blocks() as demo:
143
  selected_segment_state = gr.State("")
144
  code_categories_state = gr.State(DEFAULT_CODES)
145
 
146
- # ---------------- Metadata ----------------
147
  with gr.Row():
148
  metadata_inputs = []
149
- for k,lbl in METADATA_FIELDS.items():
150
  metadata_inputs.append(gr.Textbox(label=lbl))
151
 
152
- # ---------------- Transcript + Coding ----------------
153
  with gr.Row():
154
- # Left
155
  with gr.Column(scale=3):
156
  transcript_html = gr.HTML()
157
- hidden_segment = gr.Textbox(interactive=False, visible=False, elem_id="selected_segment_state")
 
 
158
 
159
- # Right
160
  with gr.Column(scale=2):
161
  gr.Markdown("## 🏷️ Code Category")
162
  code_dropdown = gr.Dropdown(label="Select code", choices=DEFAULT_CODES)
@@ -171,33 +164,48 @@ with gr.Blocks() as demo:
171
  export_btn = gr.Button("Export XLSX")
172
  export_file = gr.File(visible=False)
173
 
174
- file_input = gr.File(label="Upload transcript", file_types=[".docx",".vtt",".txt"])
 
 
175
  status = gr.Textbox(label="Status", value="Ready")
176
 
177
- # ---------------- Callbacks ----------------
178
- file_input.change(fn=process_file, inputs=file_input, outputs=[full_text, file_id, coded_df_state])
 
 
179
 
180
- # Update transcript HTML
181
  def update_transcript(text, df):
182
  return build_transcript_html(text, df)
183
- full_text.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
184
- coded_df_state.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
185
 
186
- # Add new code
187
- add_code_btn.click(add_new_code, inputs=[code_input, code_categories_state], outputs=[code_categories_state])
188
- code_categories_state.change(lambda codes: gr.update(choices=codes), inputs=code_categories_state, outputs=code_dropdown)
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
- # Apply code
191
- apply_btn.click(apply_code, inputs=[coded_df_state, selected_segment_state, code_dropdown, file_id] + metadata_inputs, outputs=[coded_df_state, status])
 
 
 
 
192
 
193
- # Update table
194
  coded_df_state.change(lambda df: df, inputs=coded_df_state, outputs=table)
195
 
196
- # Export
197
  export_btn.click(export_excel, inputs=coded_df_state, outputs=[export_file, status]).then(
198
  lambda f: gr.update(visible=f is not None),
199
  inputs=export_file,
200
- outputs=export_file
201
  )
202
 
203
  demo.launch()
 
7
  except ImportError:
8
  docx = None
9
 
 
 
 
10
  DEFAULT_CODES = [
11
  "Communication Barrier",
12
  "Emotional Support",
 
30
  "Follow-up Needed": "orange",
31
  }
32
 
33
+
 
 
34
  def read_docx(path):
35
  if not docx:
36
  return "Error: python-docx not installed."
37
  d = docx.Document(path)
38
  return "\n".join([p.text for p in d.paragraphs])
39
 
40
+
41
  def read_vtt(path):
42
  with open(path, "r", encoding="utf-8") as f:
43
  lines = f.read().split("\n")
 
48
  ]
49
  return " ".join(cleaned)
50
 
51
+
52
  def get_empty_df():
53
  return pd.DataFrame(
54
  columns=["File ID", "Coded Segment", "Code"] + list(METADATA_FIELDS.keys())
55
  )
56
 
57
+
58
  def process_file(file_obj):
59
  if file_obj is None:
60
  return "", "", get_empty_df()
 
69
  text = f.read()
70
  return text, name, get_empty_df()
71
 
72
+
 
 
73
  def build_transcript_html(text, df):
74
  display_text = text
75
  if df is not None and not df.empty:
76
  for _, row in df.iterrows():
77
  seg = row["Coded Segment"]
78
  color = COLOR_MAP.get(row["Code"], "yellow")
79
+ display_text = display_text.replace(
80
+ seg, f"<span style='background-color:{color}'>{seg}</span>", 1
81
+ )
82
  safe_text = display_text.replace("\n", "<br>")
83
  html = f"""
84
  <div id='transcript' style='white-space: pre-wrap; font-size:16px; line-height:1.5; max-height:600px; overflow:auto; border:1px solid #ccc; padding:5px;'>
85
  {safe_text}
86
  </div>
87
  <script>
88
+ document.addEventListener('DOMContentLoaded', function() {{
89
+ const transcript = document.getElementById('transcript');
90
+ transcript.addEventListener('mouseup', function() {{
91
+ const sel = window.getSelection().toString();
92
+ if(sel.length>0){{
93
+ const state_input = document.getElementById('selected_segment_state');
94
+ if(state_input) {{
95
+ state_input.value = sel;
96
+ state_input.dispatchEvent(new Event("input", {{bubbles:true}}));
97
+ }}
98
  }}
99
+ }});
100
  }});
101
  </script>
102
  """
103
  return html
104
 
105
+
 
 
106
  def apply_code(df, segment, code, file_id, *metadata_values):
107
  if not segment or not code or not file_id:
108
  return df, "⚠️ Select text and file first"
 
111
  df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
112
  return df, f"✅ Segment coded as '{code}'"
113
 
114
+
 
 
115
  def add_new_code(new_code, code_list):
116
  if new_code and new_code not in code_list:
117
  code_list.append(new_code)
118
  return code_list
119
 
120
+
 
 
121
  def export_excel(df):
122
  if df.empty:
123
  return None, "Nothing to export"
 
125
  df.to_excel(path, index=False)
126
  return path, "Excel ready"
127
 
 
 
 
 
128
 
129
+ with gr.Blocks() as demo:
130
  # States
131
  full_text = gr.State("")
132
  file_id = gr.State("")
 
134
  selected_segment_state = gr.State("")
135
  code_categories_state = gr.State(DEFAULT_CODES)
136
 
137
+ # Metadata
138
  with gr.Row():
139
  metadata_inputs = []
140
+ for k, lbl in METADATA_FIELDS.items():
141
  metadata_inputs.append(gr.Textbox(label=lbl))
142
 
143
+ # Transcript + coding
144
  with gr.Row():
145
+ # Left: transcript
146
  with gr.Column(scale=3):
147
  transcript_html = gr.HTML()
148
+ hidden_segment = gr.Textbox(
149
+ interactive=False, visible=False, elem_id="selected_segment_state"
150
+ )
151
 
152
+ # Right: controls
153
  with gr.Column(scale=2):
154
  gr.Markdown("## 🏷️ Code Category")
155
  code_dropdown = gr.Dropdown(label="Select code", choices=DEFAULT_CODES)
 
164
  export_btn = gr.Button("Export XLSX")
165
  export_file = gr.File(visible=False)
166
 
167
+ file_input = gr.File(
168
+ label="Upload transcript", file_types=[".docx", ".vtt", ".txt"]
169
+ )
170
  status = gr.Textbox(label="Status", value="Ready")
171
 
172
+ # Callbacks
173
+ file_input.change(
174
+ fn=process_file, inputs=file_input, outputs=[full_text, file_id, coded_df_state]
175
+ )
176
 
 
177
  def update_transcript(text, df):
178
  return build_transcript_html(text, df)
 
 
179
 
180
+ full_text.change(
181
+ update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html
182
+ )
183
+ coded_df_state.change(
184
+ update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html
185
+ )
186
+
187
+ add_code_btn.click(
188
+ add_new_code, inputs=[code_input, code_categories_state], outputs=[code_categories_state]
189
+ )
190
+ code_categories_state.change(
191
+ lambda codes: gr.update(choices=codes),
192
+ inputs=code_categories_state,
193
+ outputs=code_dropdown,
194
+ )
195
 
196
+ apply_btn.click(
197
+ apply_code,
198
+ inputs=[coded_df_state, selected_segment_state, code_dropdown, file_id]
199
+ + metadata_inputs,
200
+ outputs=[coded_df_state, status],
201
+ )
202
 
 
203
  coded_df_state.change(lambda df: df, inputs=coded_df_state, outputs=table)
204
 
 
205
  export_btn.click(export_excel, inputs=coded_df_state, outputs=[export_file, status]).then(
206
  lambda f: gr.update(visible=f is not None),
207
  inputs=export_file,
208
+ outputs=export_file,
209
  )
210
 
211
  demo.launch()