clementBE commited on
Commit
2415814
·
verified ·
1 Parent(s): 111da6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -60
app.py CHANGED
@@ -7,7 +7,7 @@ try:
7
  except ImportError:
8
  docx = None
9
 
10
- # Default codes
11
  DEFAULT_CODES = [
12
  "Communication Barrier",
13
  "Emotional Support",
@@ -31,7 +31,7 @@ COLOR_MAP = {
31
  "Follow-up Needed": "orange",
32
  }
33
 
34
- # --- File reading functions
35
  def read_docx(path):
36
  if not docx:
37
  return "Error: python-docx not installed."
@@ -53,7 +53,6 @@ def get_empty_df():
53
  columns=["File ID", "Coded Segment", "Code"] + list(METADATA_FIELDS.keys())
54
  )
55
 
56
- # --- Process uploaded file
57
  def process_file(file_obj):
58
  if file_obj is None:
59
  return "", "", get_empty_df()
@@ -68,45 +67,12 @@ def process_file(file_obj):
68
  text = f.read()
69
  return text, name, get_empty_df()
70
 
71
- # --- Build transcript HTML with highlight
72
- def build_transcript_html(text, df):
73
- display_text = text
74
- if df is not None and not df.empty:
75
- for _, row in df.iterrows():
76
- seg = row["Coded Segment"]
77
- color = COLOR_MAP.get(row["Code"], "yellow")
78
- display_text = display_text.replace(
79
- seg, f"<span style='background-color:{color}'>{seg}</span>", 1
80
- )
81
- safe_text = display_text.replace("\n", "<br>")
82
- html = f"""
83
- <div id='transcript' style='white-space: pre-wrap; font-size:16px; line-height:1.5; max-height:600px; overflow:auto; border:1px solid #ccc; padding:5px;'>
84
- {safe_text}
85
- </div>
86
- <script>
87
- document.addEventListener('DOMContentLoaded', function() {{
88
- const transcript = document.getElementById('transcript');
89
- transcript.addEventListener('mouseup', function() {{
90
- const sel = window.getSelection().toString();
91
- if(sel.length>0){{
92
- const state_input = document.getElementById('selected_segment_state');
93
- if(state_input) {{
94
- state_input.value = sel;
95
- state_input.dispatchEvent(new Event("input", {{bubbles:true}}));
96
- }}
97
- }}
98
- }});
99
- }});
100
- </script>
101
- """
102
- return html
103
-
104
- # --- Apply code to selected segment
105
  def apply_code(df, segment, code, file_id, *metadata_values):
106
  if not file_id:
107
  return df, "⚠️ Upload a file first"
108
  if not segment:
109
- return df, "⚠️ Select text in transcript"
110
  if not code:
111
  return df, "⚠️ Select a code"
112
  meta_dict = dict(zip(METADATA_FIELDS.keys(), metadata_values))
@@ -114,13 +80,13 @@ def apply_code(df, segment, code, file_id, *metadata_values):
114
  df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
115
  return df, f"✅ Segment coded as '{code}'"
116
 
117
- # --- Add new code to dropdown
118
  def add_new_code(new_code, code_list):
119
  if new_code and new_code not in code_list:
120
  code_list.append(new_code)
121
  return code_list
122
 
123
- # --- Export coded data
124
  def export_excel(df):
125
  if df.empty:
126
  return None, "Nothing to export"
@@ -133,31 +99,40 @@ def export_excel(df):
133
  # ----------------------------
134
  with gr.Blocks() as demo:
135
 
 
136
  full_text = gr.State("")
137
  file_id = gr.State("")
138
  coded_df_state = gr.State(get_empty_df())
139
  code_categories_state = gr.State(DEFAULT_CODES)
140
 
141
- # --- Metadata
142
  with gr.Row():
143
  metadata_inputs = []
144
  for k, lbl in METADATA_FIELDS.items():
145
  metadata_inputs.append(gr.Textbox(label=lbl))
146
 
147
- # --- Main interface
148
  with gr.Row():
149
  # Left: transcript
150
  with gr.Column(scale=3):
151
- transcript_html = gr.HTML()
152
- hidden_segment = gr.Textbox(interactive=False, visible=False, elem_id="selected_segment_state")
 
 
 
 
153
 
154
- # Right: coding controls
155
  with gr.Column(scale=2):
156
- gr.Markdown("## 🏷️ Code Category")
 
 
 
 
157
  code_dropdown = gr.Dropdown(label="Select code", choices=DEFAULT_CODES)
158
  code_input = gr.Textbox(label="Or type new code")
159
  add_code_btn = gr.Button("Add new code")
160
- apply_btn = gr.Button("Apply code to selected text")
161
 
162
  gr.Markdown("## 📊 Coded Segments")
163
  table = gr.Dataframe(interactive=False)
@@ -168,30 +143,38 @@ with gr.Blocks() as demo:
168
  file_input = gr.File(label="Upload transcript", file_types=[".docx", ".vtt", ".txt"])
169
  status = gr.Textbox(label="Status", value="Ready")
170
 
171
- # --- Callbacks
172
  file_input.change(
173
- fn=process_file, inputs=file_input, outputs=[full_text, file_id, coded_df_state]
 
 
174
  )
175
 
176
- def update_transcript(text, df):
177
- return build_transcript_html(text, df)
178
-
179
- full_text.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
180
- coded_df_state.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
181
 
182
- add_code_btn.click(add_new_code, inputs=[code_input, code_categories_state], outputs=[code_categories_state])
183
- code_categories_state.change(lambda codes: gr.update(choices=codes), inputs=code_categories_state, outputs=code_dropdown)
 
 
 
184
 
185
- # Apply button
186
  apply_btn.click(
187
  apply_code,
188
- inputs=[coded_df_state, hidden_segment, code_dropdown, file_id] + metadata_inputs,
189
- outputs=[coded_df_state, status],
190
  )
191
 
192
  coded_df_state.change(lambda df: df, inputs=coded_df_state, outputs=table)
193
 
194
- export_btn.click(export_excel, inputs=coded_df_state, outputs=[export_file, status]).then(
 
 
 
 
195
  lambda f: gr.update(visible=f is not None),
196
  inputs=export_file,
197
  outputs=export_file
 
7
  except ImportError:
8
  docx = None
9
 
10
+ # --- Default codes and metadata ---
11
  DEFAULT_CODES = [
12
  "Communication Barrier",
13
  "Emotional Support",
 
31
  "Follow-up Needed": "orange",
32
  }
33
 
34
+ # --- File processing ---
35
  def read_docx(path):
36
  if not docx:
37
  return "Error: python-docx not installed."
 
53
  columns=["File ID", "Coded Segment", "Code"] + list(METADATA_FIELDS.keys())
54
  )
55
 
 
56
  def process_file(file_obj):
57
  if file_obj is None:
58
  return "", "", get_empty_df()
 
67
  text = f.read()
68
  return text, name, get_empty_df()
69
 
70
+ # --- Apply coding ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def apply_code(df, segment, code, file_id, *metadata_values):
72
  if not file_id:
73
  return df, "⚠️ Upload a file first"
74
  if not segment:
75
+ return df, "⚠️ Paste a segment first"
76
  if not code:
77
  return df, "⚠️ Select a code"
78
  meta_dict = dict(zip(METADATA_FIELDS.keys(), metadata_values))
 
80
  df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
81
  return df, f"✅ Segment coded as '{code}'"
82
 
83
+ # --- Add new code ---
84
  def add_new_code(new_code, code_list):
85
  if new_code and new_code not in code_list:
86
  code_list.append(new_code)
87
  return code_list
88
 
89
+ # --- Export to Excel ---
90
  def export_excel(df):
91
  if df.empty:
92
  return None, "Nothing to export"
 
99
  # ----------------------------
100
  with gr.Blocks() as demo:
101
 
102
+ # --- States ---
103
  full_text = gr.State("")
104
  file_id = gr.State("")
105
  coded_df_state = gr.State(get_empty_df())
106
  code_categories_state = gr.State(DEFAULT_CODES)
107
 
108
+ # --- Metadata on top ---
109
  with gr.Row():
110
  metadata_inputs = []
111
  for k, lbl in METADATA_FIELDS.items():
112
  metadata_inputs.append(gr.Textbox(label=lbl))
113
 
114
+ # --- Main interface ---
115
  with gr.Row():
116
  # Left: transcript
117
  with gr.Column(scale=3):
118
+ transcript_box = gr.Textbox(
119
+ label="Transcript (copy the text you want to code)",
120
+ lines=25,
121
+ interactive=True,
122
+ placeholder="Upload a file to see transcript..."
123
+ )
124
 
125
+ # Right: coding tools
126
  with gr.Column(scale=2):
127
+ gr.Markdown("## 🏷️ Code Segment")
128
+ segment_box = gr.Textbox(
129
+ label="Segment to code (paste here)",
130
+ lines=4,
131
+ )
132
  code_dropdown = gr.Dropdown(label="Select code", choices=DEFAULT_CODES)
133
  code_input = gr.Textbox(label="Or type new code")
134
  add_code_btn = gr.Button("Add new code")
135
+ apply_btn = gr.Button("Apply code")
136
 
137
  gr.Markdown("## 📊 Coded Segments")
138
  table = gr.Dataframe(interactive=False)
 
143
  file_input = gr.File(label="Upload transcript", file_types=[".docx", ".vtt", ".txt"])
144
  status = gr.Textbox(label="Status", value="Ready")
145
 
146
+ # --- Callbacks ---
147
  file_input.change(
148
+ fn=process_file,
149
+ inputs=file_input,
150
+ outputs=[transcript_box, file_id, coded_df_state]
151
  )
152
 
153
+ add_code_btn.click(
154
+ add_new_code,
155
+ inputs=[code_input, code_categories_state],
156
+ outputs=[code_categories_state]
157
+ )
158
 
159
+ code_categories_state.change(
160
+ lambda codes: gr.update(choices=codes),
161
+ inputs=code_categories_state,
162
+ outputs=code_dropdown
163
+ )
164
 
 
165
  apply_btn.click(
166
  apply_code,
167
+ inputs=[coded_df_state, segment_box, code_dropdown, file_id] + metadata_inputs,
168
+ outputs=[coded_df_state, status]
169
  )
170
 
171
  coded_df_state.change(lambda df: df, inputs=coded_df_state, outputs=table)
172
 
173
+ export_btn.click(
174
+ export_excel,
175
+ inputs=coded_df_state,
176
+ outputs=[export_file, status]
177
+ ).then(
178
  lambda f: gr.update(visible=f is not None),
179
  inputs=export_file,
180
  outputs=export_file