clementBE commited on
Commit
4741434
·
verified ·
1 Parent(s): aebd496

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -214
app.py CHANGED
@@ -1,235 +1,49 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import os
4
 
5
- try:
6
- import docx
7
- except ImportError:
8
- docx = None
9
 
10
- # ------------------------------
11
- # CONFIGURATION
12
- # ------------------------------
13
- DEFAULT_CODES = [
14
- "Theme: Communication Barrier",
15
- "Theme: Emotional Support",
16
- "Theme: Future Aspirations",
17
- "Theme: Financial Stress",
18
- "Other: Follow-up Needed",
19
- ]
20
 
21
- METADATA_FIELDS = {
22
- "interview_id": "Interview ID (e.g., I-001)",
23
- "interview_date": "Date of Interview (YYYY-MM-DD)",
24
- "occupation": "Participant Occupation",
25
- "age": "Participant Age",
26
- }
27
-
28
- COLOR_MAP = {
29
- "Theme: Communication Barrier": "lightblue",
30
- "Theme: Emotional Support": "lightgreen",
31
- "Theme: Future Aspirations": "khaki",
32
- "Theme: Financial Stress": "lightpink",
33
- "Other: Follow-up Needed": "orange",
34
- }
35
-
36
- # ------------------------------
37
- # FILE READERS
38
- # ------------------------------
39
- def read_docx(path):
40
- if not docx:
41
- return "Error: python-docx not installed."
42
- d = docx.Document(path)
43
- return "\n".join([p.text for p in d.paragraphs])
44
-
45
- def read_vtt(path):
46
- with open(path, "r", encoding="utf-8") as f:
47
- lines = f.read().split("\n")
48
- cleaned = [
49
- l.strip()
50
- for l in lines
51
- if l and "WEBVTT" not in l and "-->" not in l and not l.strip().isdigit()
52
- ]
53
- return " ".join(cleaned)
54
-
55
- def get_empty_df():
56
- return pd.DataFrame(
57
- columns=["File ID", "Coded Segment", "Code", "Context (100 chars)"] + list(METADATA_FIELDS.keys())
58
- )
59
-
60
- # ------------------------------
61
- # PROCESS FILE
62
- # ------------------------------
63
- def process_file(file_obj):
64
- if file_obj is None:
65
- return "", "", get_empty_df()
66
- path = file_obj.name
67
- name = os.path.basename(path)
68
- if name.lower().endswith(".docx"):
69
- text = read_docx(path)
70
- elif name.lower().endswith(".vtt"):
71
- text = read_vtt(path)
72
- else:
73
- with open(path, "r", encoding="utf-8") as f:
74
- text = f.read()
75
- return text, name, get_empty_df()
76
-
77
- # ------------------------------
78
- # BUILD HTML TRANSCRIPT
79
- # ------------------------------
80
- def build_transcript_html(text, coded_df):
81
- display_text = text
82
- if coded_df is not None and not coded_df.empty:
83
- for _, row in coded_df.iterrows():
84
- seg = row["Coded Segment"]
85
- color = COLOR_MAP.get(row["Code"], "yellow")
86
- display_text = display_text.replace(seg, f"<span style='background-color:{color}'>{seg}</span>", 1)
87
- safe_text = display_text.replace("\n", "<br>")
88
  html = f"""
89
- <div id='transcript' style='white-space: pre-wrap; font-size:16px; line-height:1.5; max-height:600px; overflow:auto; border:1px solid #ccc; padding:5px;'>
90
- {safe_text}
91
- </div>
92
  <script>
93
- const transcript = document.getElementById('transcript');
94
- transcript.addEventListener('mouseup', function() {{
95
- const selection = window.getSelection().toString();
96
- if(selection.length>0){{
97
- const tb = document.querySelector('textarea[placeholder="Selected segment"]');
98
- if(tb) {{
99
- tb.value = selection;
100
- tb.dispatchEvent(new Event("input", {{bubbles:true}}));
101
- }}
102
  }}
103
  }});
104
  </script>
105
  """
106
  return html
107
 
108
- # ------------------------------
109
- # APPLY CODE
110
- # ------------------------------
111
- def apply_code(coded_df, file_id, full_text, segment, code, code_list, *metadata_values):
112
- if not segment:
113
- return coded_df, code_list, "⚠️ Select a segment first."
114
- if not code:
115
- return coded_df, code_list, "⚠️ Enter or select a code first."
116
- if not file_id:
117
- return coded_df, code_list, "⚠️ Upload a file first."
118
-
119
- # Add new code to code list if not exists
120
- if code not in code_list:
121
- code_list.append(code)
122
-
123
- meta_dict = dict(zip(METADATA_FIELDS.keys(), metadata_values))
124
- context = "Context unavailable"
125
- try:
126
- n_full = " ".join(full_text.split())
127
- n_seg = " ".join(segment.split())
128
- idx = n_full.index(n_seg)
129
- context = "..." + n_full[max(0, idx-100):idx]
130
- except:
131
- pass
132
-
133
- new_row = {
134
- "File ID": file_id,
135
- "Coded Segment": segment,
136
- "Code": code,
137
- "Context (100 chars)": context,
138
- **meta_dict
139
- }
140
- new_df = pd.concat([coded_df, pd.DataFrame([new_row])], ignore_index=True)
141
- return new_df, code_list, f"✅ Segment coded as '{code}'!"
142
-
143
- # ------------------------------
144
- # EXPORT XLSX
145
- # ------------------------------
146
- def export_excel(df):
147
- if df.empty:
148
- return None, "Nothing to export."
149
- path = "qualitative_codes.xlsx"
150
- df.to_excel(path, index=False)
151
- return path, "Excel ready."
152
-
153
- # ------------------------------
154
- # GRADIO INTERFACE
155
- # ------------------------------
156
- with gr.Blocks(title="Direct Selection Coding") as demo:
157
- gr.Markdown("# 📑 Direct Selection Coding Tool")
158
-
159
- # States
160
- file_id = gr.State("")
161
- full_text = gr.State("")
162
- coded_df_state = gr.State(get_empty_df())
163
- code_categories_state = gr.State(DEFAULT_CODES)
164
 
165
- # ---------------- METADATA TOP ----------------
166
- with gr.Row():
167
- metadata_inputs = []
168
- for k, lbl in METADATA_FIELDS.items():
169
- metadata_inputs.append(gr.Textbox(label=lbl))
170
 
171
- # ---------------- TRANSCRIPT + CODING ----------------
172
  with gr.Row():
173
- # LEFT: transcript
174
  with gr.Column(scale=3):
175
- transcript_html = gr.HTML()
176
-
177
- # RIGHT: coding panel
178
  with gr.Column(scale=2):
179
- gr.Markdown("## 🏷️ Code Categories")
180
- code_input = gr.Textbox(label="Enter code or select from dropdown", value="", interactive=True)
181
- code_dropdown = gr.Dropdown(label="Select existing code", choices=DEFAULT_CODES)
182
- segment_box = gr.Textbox(label="Selected Segment", placeholder="Selected segment", lines=3, interactive=False)
183
  code_btn = gr.Button("Apply Code")
 
184
 
185
- gr.Markdown("## 📊 Coded Segments")
186
- table = gr.Dataframe(interactive=False)
187
-
188
- export_btn = gr.Button("Export XLSX")
189
- export_file = gr.File(visible=False)
190
-
191
- file_input = gr.File(label="Upload transcript (.docx, .vtt, .txt)", file_types=[".docx",".vtt",".txt"])
192
- status = gr.Textbox(label="Status", value="Ready")
193
-
194
- # ---------------- CALLBACKS ----------------
195
- file_input.change(fn=process_file, inputs=file_input, outputs=[full_text, file_id, coded_df_state])
196
-
197
- # Update transcript when text or coded_df changes
198
- def update_transcript(text, df):
199
- return build_transcript_html(text, df)
200
-
201
- full_text.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
202
- coded_df_state.change(update_transcript, inputs=[full_text, coded_df_state], outputs=transcript_html)
203
-
204
- # Fill code input when selecting dropdown
205
- code_dropdown.change(lambda x: x, inputs=code_dropdown, outputs=code_input)
206
-
207
- # Apply code
208
- code_btn.click(
209
- fn=apply_code,
210
- inputs=[coded_df_state, file_id, full_text, segment_box, code_input, code_categories_state] + metadata_inputs,
211
- outputs=[coded_df_state, code_categories_state, status]
212
- )
213
-
214
- # Update dropdown when code list changes
215
- code_categories_state.change(lambda codes: gr.update(choices=codes), inputs=code_categories_state, outputs=code_dropdown)
216
-
217
- # Update table
218
- coded_df_state.change(lambda x: x, inputs=coded_df_state, outputs=table)
219
-
220
- # Export
221
- export_btn.click(
222
- export_excel,
223
- inputs=coded_df_state,
224
- outputs=[export_file, status]
225
- ).then(
226
- lambda f: gr.update(visible=f is not None),
227
- inputs=export_file,
228
- outputs=export_file
229
- )
230
 
231
- # ------------------------------
232
- # LAUNCH
233
- # ------------------------------
234
- if __name__ == "__main__":
235
- demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
 
3
 
4
+ # Default codes
5
+ CODES = ["Theme: Communication Barrier","Theme: Emotional Support"]
 
 
6
 
7
+ # State
8
+ coded_df_state = pd.DataFrame(columns=["Segment","Code"])
 
 
 
 
 
 
 
 
9
 
10
+ # Build transcript HTML with JS to store selection
11
+ def build_transcript_html(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  html = f"""
13
+ <div id="transcript" style='white-space: pre-wrap; border:1px solid #ccc; padding:5px; max-height:400px; overflow:auto;'>{text}</div>
 
 
14
  <script>
15
+ const transcript = document.getElementById("transcript");
16
+ transcript.addEventListener("mouseup", function() {{
17
+ const sel = window.getSelection().toString();
18
+ if(sel.length>0){{
19
+ document.querySelector("#selected_segment").value = sel;
 
 
 
 
20
  }}
21
  }});
22
  </script>
23
  """
24
  return html
25
 
26
+ # Apply code to selected segment
27
+ def apply_code(selected_segment, code, df):
28
+ if not selected_segment or not code:
29
+ return df, "⚠️ Select segment and code first"
30
+ new_row = {"Segment": selected_segment, "Code": code}
31
+ df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
32
+ return df, f"✅ Segment coded as {code}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ # Gradio interface
35
+ with gr.Blocks() as demo:
36
+ transcript_text = "This is a sample transcript. You can select any part of this text to code it."
 
 
37
 
 
38
  with gr.Row():
 
39
  with gr.Column(scale=3):
40
+ transcript_html = gr.HTML(build_transcript_html(transcript_text))
 
 
41
  with gr.Column(scale=2):
42
+ selected_segment = gr.Textbox(label="Selected Segment", interactive=False, elem_id="selected_segment")
43
+ code_dropdown = gr.Dropdown(label="Select Code", choices=CODES)
 
 
44
  code_btn = gr.Button("Apply Code")
45
+ coded_table = gr.Dataframe(headers=["Segment","Code"])
46
 
47
+ code_btn.click(apply_code, inputs=[selected_segment, code_dropdown, coded_table], outputs=[coded_table, gr.Textbox(label="Status")])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ demo.launch()