clementBE commited on
Commit
111da6d
·
verified ·
1 Parent(s): 1359c1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -13
app.py CHANGED
@@ -7,6 +7,7 @@ try:
7
  except ImportError:
8
  docx = None
9
 
 
10
  DEFAULT_CODES = [
11
  "Communication Barrier",
12
  "Emotional Support",
@@ -30,6 +31,7 @@ COLOR_MAP = {
30
  "Follow-up Needed": "orange",
31
  }
32
 
 
33
  def read_docx(path):
34
  if not docx:
35
  return "Error: python-docx not installed."
@@ -51,6 +53,7 @@ def get_empty_df():
51
  columns=["File ID", "Coded Segment", "Code"] + list(METADATA_FIELDS.keys())
52
  )
53
 
 
54
  def process_file(file_obj):
55
  if file_obj is None:
56
  return "", "", get_empty_df()
@@ -65,6 +68,40 @@ def process_file(file_obj):
65
  text = f.read()
66
  return text, name, get_empty_df()
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  def apply_code(df, segment, code, file_id, *metadata_values):
69
  if not file_id:
70
  return df, "⚠️ Upload a file first"
@@ -77,11 +114,13 @@ def apply_code(df, segment, code, file_id, *metadata_values):
77
  df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
78
  return df, f"✅ Segment coded as '{code}'"
79
 
 
80
  def add_new_code(new_code, code_list):
81
  if new_code and new_code not in code_list:
82
  code_list.append(new_code)
83
  return code_list
84
 
 
85
  def export_excel(df):
86
  if df.empty:
87
  return None, "Nothing to export"
@@ -99,24 +138,20 @@ with gr.Blocks() as demo:
99
  coded_df_state = gr.State(get_empty_df())
100
  code_categories_state = gr.State(DEFAULT_CODES)
101
 
102
- # Metadata
103
  with gr.Row():
104
  metadata_inputs = []
105
  for k, lbl in METADATA_FIELDS.items():
106
  metadata_inputs.append(gr.Textbox(label=lbl))
107
 
108
- # Main interface
109
  with gr.Row():
110
- # Left: transcript selection
111
  with gr.Column(scale=3):
112
- transcript_text = gr.Textbox(
113
- label="Transcript (Select text below to code)",
114
- lines=25,
115
- interactive=True,
116
- placeholder="Select a segment of this text..."
117
- )
118
 
119
- # Right: coding
120
  with gr.Column(scale=2):
121
  gr.Markdown("## 🏷️ Code Category")
122
  code_dropdown = gr.Dropdown(label="Select code", choices=DEFAULT_CODES)
@@ -133,19 +168,29 @@ with gr.Blocks() as demo:
133
  file_input = gr.File(label="Upload transcript", file_types=[".docx", ".vtt", ".txt"])
134
  status = gr.Textbox(label="Status", value="Ready")
135
 
136
- # Callbacks
137
- file_input.change(fn=process_file, inputs=file_input, outputs=[transcript_text, file_id, coded_df_state])
 
 
 
 
 
 
 
 
138
 
139
  add_code_btn.click(add_new_code, inputs=[code_input, code_categories_state], outputs=[code_categories_state])
140
  code_categories_state.change(lambda codes: gr.update(choices=codes), inputs=code_categories_state, outputs=code_dropdown)
141
 
 
142
  apply_btn.click(
143
  apply_code,
144
- inputs=[coded_df_state, transcript_text, code_dropdown, file_id] + metadata_inputs,
145
  outputs=[coded_df_state, status],
146
  )
147
 
148
  coded_df_state.change(lambda df: df, inputs=coded_df_state, outputs=table)
 
149
  export_btn.click(export_excel, inputs=coded_df_state, outputs=[export_file, status]).then(
150
  lambda f: gr.update(visible=f is not None),
151
  inputs=export_file,
 
7
  except ImportError:
8
  docx = None
9
 
10
+ # Default codes
11
  DEFAULT_CODES = [
12
  "Communication Barrier",
13
  "Emotional Support",
 
31
  "Follow-up Needed": "orange",
32
  }
33
 
34
+ # --- File reading functions
35
  def read_docx(path):
36
  if not docx:
37
  return "Error: python-docx not installed."
 
53
  columns=["File ID", "Coded Segment", "Code"] + list(METADATA_FIELDS.keys())
54
  )
55
 
56
+ # --- Process uploaded file
57
  def process_file(file_obj):
58
  if file_obj is None:
59
  return "", "", get_empty_df()
 
68
  text = f.read()
69
  return text, name, get_empty_df()
70
 
71
+ # --- Build transcript HTML with highlight
72
+ def build_transcript_html(text, df):
73
+ display_text = text
74
+ if df is not None and not df.empty:
75
+ for _, row in df.iterrows():
76
+ seg = row["Coded Segment"]
77
+ color = COLOR_MAP.get(row["Code"], "yellow")
78
+ display_text = display_text.replace(
79
+ seg, f"<span style='background-color:{color}'>{seg}</span>", 1
80
+ )
81
+ safe_text = display_text.replace("\n", "<br>")
82
+ html = f"""
83
+ <div id='transcript' style='white-space: pre-wrap; font-size:16px; line-height:1.5; max-height:600px; overflow:auto; border:1px solid #ccc; padding:5px;'>
84
+ {safe_text}
85
+ </div>
86
+ <script>
87
+ document.addEventListener('DOMContentLoaded', function() {{
88
+ const transcript = document.getElementById('transcript');
89
+ transcript.addEventListener('mouseup', function() {{
90
+ const sel = window.getSelection().toString();
91
+ if(sel.length>0){{
92
+ const state_input = document.getElementById('selected_segment_state');
93
+ if(state_input) {{
94
+ state_input.value = sel;
95
+ state_input.dispatchEvent(new Event("input", {{bubbles:true}}));
96
+ }}
97
+ }}
98
+ }});
99
+ }});
100
+ </script>
101
+ """
102
+ return html
103
+
104
+ # --- Apply code to selected segment
105
  def apply_code(df, segment, code, file_id, *metadata_values):
106
  if not file_id:
107
  return df, "⚠️ Upload a file first"
 
114
  df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
115
  return df, f"✅ Segment coded as '{code}'"
116
 
117
+ # --- Add new code to dropdown
118
  def add_new_code(new_code, code_list):
119
  if new_code and new_code not in code_list:
120
  code_list.append(new_code)
121
  return code_list
122
 
123
+ # --- Export coded data
124
  def export_excel(df):
125
  if df.empty:
126
  return None, "Nothing to export"
 
138
  coded_df_state = gr.State(get_empty_df())
139
  code_categories_state = gr.State(DEFAULT_CODES)
140
 
141
+ # --- Metadata
142
  with gr.Row():
143
  metadata_inputs = []
144
  for k, lbl in METADATA_FIELDS.items():
145
  metadata_inputs.append(gr.Textbox(label=lbl))
146
 
147
+ # --- Main interface
148
  with gr.Row():
149
+ # Left: transcript
150
  with gr.Column(scale=3):
151
+ transcript_html = gr.HTML()
152
+ hidden_segment = gr.Textbox(interactive=False, visible=False, elem_id="selected_segment_state")
 
 
 
 
153
 
154
+ # Right: coding controls
155
  with gr.Column(scale=2):
156
  gr.Markdown("## 🏷️ Code Category")
157
  code_dropdown = gr.Dropdown(label="Select code", choices=DEFAULT_CODES)
 
168
  file_input = gr.File(label="Upload transcript", file_types=[".docx", ".vtt", ".txt"])
169
  status = gr.Textbox(label="Status", value="Ready")
170
 
171
+ # --- Callbacks
172
+ file_input.change(
173
+ fn=process_file, inputs=file_input, outputs=[full_text, file_id, coded_df_state]
174
+ )
175
+
176
+ def update_transcript(text, df):
177
+ return build_transcript_html(text, df)
178
+
179
+ full_text.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
180
+ coded_df_state.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
181
 
182
  add_code_btn.click(add_new_code, inputs=[code_input, code_categories_state], outputs=[code_categories_state])
183
  code_categories_state.change(lambda codes: gr.update(choices=codes), inputs=code_categories_state, outputs=code_dropdown)
184
 
185
+ # Apply button
186
  apply_btn.click(
187
  apply_code,
188
+ inputs=[coded_df_state, hidden_segment, code_dropdown, file_id] + metadata_inputs,
189
  outputs=[coded_df_state, status],
190
  )
191
 
192
  coded_df_state.change(lambda df: df, inputs=coded_df_state, outputs=table)
193
+
194
  export_btn.click(export_excel, inputs=coded_df_state, outputs=[export_file, status]).then(
195
  lambda f: gr.update(visible=f is not None),
196
  inputs=export_file,