dhammo2 commited on
Commit
e69d505
·
verified ·
1 Parent(s): 985b78f

Added MultiSelect Meeting Types

Browse files
Files changed (1) hide show
  1. app.py +466 -466
app.py CHANGED
@@ -1,466 +1,466 @@
1
- import gradio as gr
2
- import psycopg2
3
- import numpy as np
4
- import pandas as pd
5
- from datetime import datetime, timedelta, date
6
- from urllib.parse import quote
7
- import re
8
- import os
9
-
10
- # Connect to the database
11
- def connect_db():
12
- return psycopg2.connect(
13
- dbname=os.getenv("credDB_NAME"),
14
- user=os.getenv("credDB_USER"),
15
- password=os.getenv("credDB_PASS"),
16
- host=os.getenv("credDB_HOST"),
17
- port=os.getenv("credDB_PORT")
18
- )
19
-
20
- # Fetch meeting types
21
- def get_meeting_types():
22
- conn = connect_db()
23
- cur = conn.cursor()
24
- cur.execute("SELECT DISTINCT meeting_type FROM mopacdb_webcasts.webcasts;")
25
- meeting_types = [row[0] for row in cur.fetchall()]
26
- cur.close()
27
- conn.close()
28
- return ["No Filter"] + meeting_types
29
-
30
- # Format seconds as HH:MM:SS
31
- def format_seconds(seconds):
32
- return str(timedelta(seconds=seconds)).split('.')[0] if seconds else None
33
-
34
- def add_duration_to_datetime(datetime_str, duration_str):
35
- # Parse the input datetime string into a datetime object
36
- dt = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S")
37
-
38
- # Split the duration string into hours, minutes, and seconds
39
- hours, minutes, seconds = map(int, duration_str.split(':'))
40
-
41
- # Create a timedelta object from the duration
42
- duration = timedelta(hours=hours, minutes=minutes, seconds=seconds)
43
-
44
- # Add the duration to the original datetime
45
- new_datetime = dt + duration
46
-
47
- # Return the new datetime as a string in the original format
48
- return new_datetime.strftime("%Y-%m-%d %H:%M:%S")
49
-
50
- def convert_to_url_string(date_str, loc):
51
- # Parse the input string into a datetime object
52
- dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
53
-
54
- # Format the datetime object to ISO 8601 format
55
- iso_format = dt.strftime("%Y-%m-%dT%H:%M:%S")
56
-
57
- # Add timezone offset (assuming UTC+00:00)
58
- iso_format_with_tz = iso_format + "+00:00"
59
-
60
- # URL encode the string
61
- url_encoded = quote(iso_format_with_tz)
62
-
63
- # Return the final URL query string
64
- if loc.lower() == "start":
65
- return f"?in={url_encoded}"
66
- if loc.lower() == "end":
67
- return f"&out={url_encoded}"
68
-
69
-
70
- # Search function
71
- def search_transcripts(query, meeting_type, start_date, end_date):
72
- conn = connect_db()
73
- cur = conn.cursor()
74
-
75
- sql = """
76
- SELECT w.meeting_type, w.meeting_date, t.start_time, t.end_time,
77
- REPLACE(t.speaker, 'speaker ', ''), t.transcript, w.url, w.time_origin, t.id
78
- FROM mopacdb_webcasts.transcripts t
79
- JOIN mopacdb_webcasts.webcasts w ON t.webcast_id = w.id
80
- WHERE t.transcript ILIKE %s
81
- """
82
- params = [f"%{query}%"]
83
-
84
- if meeting_type != "No Filter":
85
- sql += " AND w.meeting_type = %s"
86
- params.append(meeting_type)
87
-
88
- if start_date and end_date:
89
- sql += " AND w.meeting_date BETWEEN %s AND %s"
90
- params.extend([start_date, end_date])
91
- elif start_date:
92
- sql += " AND w.meeting_date >= %s"
93
- params.append(start_date)
94
- elif end_date:
95
- sql += " AND w.meeting_date <= %s"
96
- params.append(end_date)
97
-
98
- # Add ORDER BY clause after all the filters are applied
99
- sql += " ORDER BY w.meeting_date DESC, t.start_time ASC"
100
-
101
- cur.execute(sql, params)
102
- rows = cur.fetchall()
103
- cur.close()
104
- conn.close()
105
-
106
- # Return both the full data (including identifier) and the displayed data
107
- full_data = [
108
- [row[0], row[1], format_seconds(row[2]), format_seconds(row[3]), row[4], row[5], row[6], row[7], row[8]]
109
- for row in rows
110
- ]
111
- displayed_data = [row[:-3] for row in full_data] # Remove identifier column for display
112
-
113
- return displayed_data, full_data # Return both datasets
114
-
115
-
116
- # Function to highlight query in the transcript
117
- def highlight_query(query, transcript):
118
- if query and transcript:
119
- # Simulate highlight by making words uppercase or surrounding them with markers
120
- highlighted_text = re.sub(f"({re.escape(query)})", r'🔶\1🔶', transcript, flags=re.IGNORECASE)
121
- return highlighted_text
122
- return transcript
123
-
124
-
125
- # Function to identify the hidden table row index
126
- def get_matching_index(full_data: list, selected_row_values: list):
127
- """
128
- Returns the index of the row in `full_data` that matches the `selected_row_values`.
129
- Converts the selected meeting_date to a date object for comparison.
130
- """
131
- # Convert the selected meeting_date (which is in string format) to a date object
132
- selected_row_values[1] = datetime.strptime(selected_row_values[1], "%Y-%m-%d").date()
133
-
134
- # Find and return the matching row index
135
- return next(
136
- (i for i, row in enumerate(full_data)
137
- if [row[0], row[1], row[2], row[3], row[4], row[5]] == selected_row_values),
138
- None
139
- )
140
-
141
- # Callback for transcript selection
142
- def df_select_callback(full_data: list, evt: gr.SelectData, query = ""):
143
- selected_row_values = evt.row_value if evt.row_value else None
144
-
145
- # Use the helper function to find the matching row index
146
- matching_index = get_matching_index(full_data, selected_row_values)
147
-
148
- if matching_index is not None:
149
- transcript = str(full_data[matching_index][5]) # Extract transcript
150
- return highlight_query(query, transcript)
151
-
152
- return ""
153
-
154
-
155
-
156
- # Callback for video iframe
157
- def df_video_callback(full_data: list, evt: gr.SelectData):
158
-
159
- selected_row_values = evt.row_value if evt.row_value else None
160
-
161
- # Use the helper function to find the matching row index
162
- matching_index = get_matching_index(full_data, selected_row_values)
163
-
164
- if matching_index is not None:
165
-
166
- identifier = str(full_data[matching_index][6]) # Access identifier from stored data
167
-
168
- if identifier.lower() == "replace":
169
- return "<center><span style='color: red;'>Video currently not availabile for this transcript due to GLA archiving policies.</span></center>"
170
-
171
- #Get Origin Timestamp
172
- timestamp = str(full_data[matching_index][7])
173
- #Add Start Time
174
- ts_start = convert_to_url_string(add_duration_to_datetime(timestamp, str(full_data[matching_index][2])), "start")
175
- ts_end = convert_to_url_string(add_duration_to_datetime(timestamp, str(full_data[matching_index][3])), "end")
176
-
177
- return f"<iframe src='https://player.london.gov.uk/Player/Index/{identifier}{ts_start}{ts_end}' width='100%' height='360' frameborder='0' scrolling='no' allowfullscreen allow='encrypted-media; autoplay; fullscreen'></iframe>"
178
- return ""
179
-
180
-
181
- def df_video_button_callback(full_data: list, evt: gr.SelectData):
182
-
183
- selected_row_values = evt.row_value if evt.row_value else None
184
-
185
- # Use the helper function to find the matching row index
186
- matching_index = get_matching_index(full_data, selected_row_values)
187
-
188
- if matching_index is not None:
189
-
190
- identifier = str(full_data[matching_index][6]) # Access identifier from stored data
191
-
192
- if identifier.lower() == "replace":
193
- return ""
194
-
195
- #Get Origin Timestamp
196
- timestamp = str(full_data[matching_index][7])
197
- #Add Start Time
198
- ts_start = convert_to_url_string(add_duration_to_datetime(timestamp, str(full_data[matching_index][2])), "start")
199
- ts_end = convert_to_url_string(add_duration_to_datetime(timestamp, str(full_data[matching_index][3])), "end")
200
-
201
- #Set URLs
202
- url_1 = f"https://player.london.gov.uk/Player/Index/{identifier}"
203
- url_2 = f"https://player.london.gov.uk/Player/Index/{identifier}{ts_start}{ts_end}"
204
-
205
- return f"""
206
- <div style="display: flex; justify-content: space-between; margin: 20px;">
207
- <a href="{url_1}" target="_blank" style="text-decoration: none; width: 48%;">
208
- <button class="lg secondary svelte-1ixn6qd"
209
- style="width: 100%; text-align: center; padding: 10px; font-size: 16px;">
210
- Access the Full Video
211
- </button>
212
- </a>
213
- <a href="{url_2}" target="_blank" style="text-decoration: none; width: 48%;">
214
- <button class="lg secondary svelte-1ixn6qd"
215
- style="width: 100%; text-align: center; padding: 10px; font-size: 16px;">
216
- Share this Clip
217
- </button>
218
- </a>
219
- </div>
220
- """
221
-
222
- return ""
223
-
224
-
225
-
226
- # Callback for transcript selection
227
- def df_prior_callback(full_data: list, evt: gr.SelectData):
228
-
229
- selected_row_values = evt.row_value if evt.row_value else None
230
-
231
- # Use the helper function to find the matching row index
232
- matching_index = get_matching_index(full_data, selected_row_values)
233
-
234
- if matching_index is not None:
235
- transcript_id = int(full_data[matching_index][-1]) # Access identifier from stored data
236
-
237
- conn = connect_db()
238
- cur = conn.cursor()
239
-
240
- sql = """
241
- WITH target_transcript AS (
242
- SELECT webcast_id, start_time
243
- FROM mopacdb_webcasts.transcripts
244
- WHERE id = %s
245
- )
246
- SELECT transcript
247
- FROM mopacdb_webcasts.transcripts
248
- WHERE webcast_id = (SELECT webcast_id FROM target_transcript)
249
- AND start_time < (SELECT start_time FROM target_transcript)
250
- ORDER BY start_time DESC
251
- LIMIT 1;
252
- """
253
-
254
- cur.execute(sql, (transcript_id,))
255
- prior_transcript = cur.fetchone()
256
- cur.close()
257
- conn.close()
258
-
259
- # Check if rows are fetched
260
- if not prior_transcript:
261
- return "" # No data, return None to keep button hidden
262
-
263
- return prior_transcript[0]
264
-
265
- # Callback for transcript selection
266
- def df_posterior_callback(full_data: list, evt: gr.SelectData):
267
-
268
- selected_row_values = evt.row_value if evt.row_value else None
269
-
270
- # Use the helper function to find the matching row index
271
- matching_index = get_matching_index(full_data, selected_row_values)
272
-
273
- if matching_index is not None:
274
-
275
- transcript_id = int(full_data[matching_index][-1]) # Access identifier from stored data
276
-
277
- conn = connect_db()
278
- cur = conn.cursor()
279
-
280
- sql = """
281
- WITH target_transcript AS (
282
- SELECT webcast_id, start_time
283
- FROM mopacdb_webcasts.transcripts
284
- WHERE id = %s
285
- )
286
- SELECT transcript
287
- FROM mopacdb_webcasts.transcripts
288
- WHERE webcast_id = (SELECT webcast_id FROM target_transcript)
289
- AND start_time > (SELECT start_time FROM target_transcript)
290
- ORDER BY start_time ASC
291
- LIMIT 1;
292
- """
293
-
294
- cur.execute(sql, (transcript_id,))
295
- prior_transcript = cur.fetchone()
296
- cur.close()
297
- conn.close()
298
-
299
- # Check if rows are fetched
300
- if not prior_transcript:
301
- return "" # No data, return None to keep button hidden
302
-
303
- return prior_transcript[0]
304
-
305
-
306
- def df_transcript_callback(full_data: list, evt: gr.SelectData):
307
- selected_row_values = evt.row_value if evt.row_value else None
308
-
309
- # Use the helper function to find the matching row index
310
- matching_index = get_matching_index(full_data, selected_row_values)
311
-
312
- if matching_index is not None:
313
- transcript_id = int(full_data[matching_index][-1]) # Access identifier from stored data
314
-
315
- conn = connect_db()
316
- cur = conn.cursor()
317
-
318
- sql = """
319
- SELECT start_time, end_time, speaker, transcript
320
- FROM mopacdb_webcasts.transcripts
321
- WHERE webcast_id = (
322
- SELECT webcast_id
323
- FROM mopacdb_webcasts.transcripts
324
- WHERE id = %s
325
- )
326
- ORDER BY start_time ASC;
327
- """
328
-
329
- cur.execute(sql, (transcript_id,))
330
- rows = cur.fetchall()
331
- cur.close()
332
- conn.close()
333
-
334
- # Check if rows are fetched
335
- if not rows:
336
- return None # No data, return None to keep button hidden
337
-
338
- # Create a DataFrame and save it as a CSV
339
- df = pd.DataFrame(rows, columns=["start_time", "end_time", "speaker", "transcript"])
340
-
341
- #Create Filename
342
- conn = connect_db()
343
- cur = conn.cursor()
344
-
345
- sql = """
346
- SELECT CONCAT(meeting_type, ' - ', meeting_date) AS meeting_details
347
- FROM mopacdb_webcasts.webcasts
348
- WHERE id = (
349
- SELECT webcast_id
350
- FROM mopacdb_webcasts.transcripts
351
- WHERE id = %s
352
- );
353
- """
354
-
355
- cur.execute(sql, (transcript_id,))
356
- filename = cur.fetchone()[0]
357
- cur.close()
358
- conn.close()
359
-
360
- csv_file_path = str(filename)+".csv" #Set File path
361
- df.to_csv(csv_file_path, index=False) # Save to CSV
362
-
363
- return csv_file_path # This should show the button
364
-
365
-
366
- # Gradio interface function
367
- def gradio_interface(query, meeting_type, start_date, end_date):
368
- if start_date:
369
- start_date = datetime.fromtimestamp(start_date) if isinstance(start_date, float) else datetime.strptime(start_date, "%Y-%m-%d")
370
- if end_date:
371
- end_date = datetime.fromtimestamp(end_date) if isinstance(end_date, float) else datetime.strptime(end_date, "%Y-%m-%d")
372
-
373
- displayed_results, full_results = search_transcripts(query, meeting_type, start_date, end_date)
374
-
375
- no_results_message = "<span style='color: red;'>No results found. Please try a different search.</span>" if not displayed_results else ""
376
-
377
- return displayed_results, full_results, no_results_message
378
-
379
-
380
- #Set custom CSS
381
- custom_css = """
382
- <style>
383
- #purple-textbox textarea {
384
- background-color: #F3E5F5 !important;
385
- }
386
- #yellow-textbox textarea {
387
- background-color: #FFF2D1 !important;
388
- }
389
- textarea {
390
- font-family: Arial, sans-serif;
391
- font-size: 14px;
392
- white-space: pre-wrap;
393
- color: black;
394
- }
395
-
396
- </style>
397
- """
398
-
399
- # Build the Gradio app
400
- with gr.Blocks() as app:
401
-
402
- gr.Markdown(
403
- """
404
- <div style="background-color: #4B23C0; color: white; padding: 20px; text-align: left; font-size: 24px; font-weight: bold; margin: 0;">
405
- MOPAC | DS &nbsp;-&nbsp;🔍 Webcast Transcript Search Tool
406
- </div>
407
- """,
408
- sanitize_html=False
409
- )
410
-
411
- #Load TextBox CSS
412
- gr.HTML(custom_css)
413
-
414
- meeting_types = get_meeting_types()
415
-
416
- with gr.Row():
417
- query = gr.Textbox(label="Search Query (optional)")
418
- meeting_type = gr.Dropdown(label="Meeting Type (optional)", choices=meeting_types)
419
- start_date = gr.DateTime(label="Start Date (optional)", include_time=False, info="Enter Date in YYYY-MM-DD Format")
420
- end_date = gr.DateTime(label="End Date (optional)", include_time=False, info="Enter Date in YYYY-MM-DD Format")
421
-
422
- search_btn = gr.Button("Search")
423
-
424
- # Table for results (without "Identifier" column)
425
- results_table = gr.DataFrame(
426
- headers=["Meeting Type", "Date", "Start Time", "End Time", "Speaker", "Transcript"],
427
- datatype=["str", "date", "str", "str", "str", "str"],
428
- interactive=False
429
- )
430
-
431
- no_results_text = gr.Markdown()
432
-
433
- # Store full data (including "Identifier") separately
434
- full_results_state = gr.State([])
435
-
436
- # Search button updates the table and stores full results
437
- search_btn.click(fn=gradio_interface, inputs=[query, meeting_type, start_date, end_date], outputs=[results_table, full_results_state, no_results_text])
438
-
439
- with gr.Row():
440
- with gr.Column(scale=2):
441
- transcript_prior = gr.Textbox(label="Prior Transcript", interactive=False, elem_id="purple-textbox")
442
- selected_transcript = gr.Textbox(label="Selected Transcript",
443
- info ="Search Queries will appear demarcated using orange markers (🔶).",
444
- interactive=False, elem_id="yellow-textbox")
445
- transcript_posterior = gr.Textbox(label="Posterior Transcript", interactive=False, elem_id="purple-textbox")
446
-
447
- gr.Markdown("**The full transcript of any selected speech segment will appear available for download here...**")
448
- transcript_bttn = gr.File(label="Download Full Transcript")
449
-
450
- with gr.Column(scale=3):
451
-
452
- video_iframe = gr.HTML("")
453
- video_bttn_full = gr.HTML("")
454
-
455
-
456
- # Update transcript and video using the stored full data
457
- results_table.select(fn=df_select_callback, inputs=[full_results_state, query], outputs=[selected_transcript])
458
- results_table.select(fn=df_prior_callback, inputs=[full_results_state], outputs=[transcript_prior])
459
- results_table.select(fn=df_posterior_callback, inputs=[full_results_state], outputs=[transcript_posterior])
460
- results_table.select(fn=df_video_callback, inputs=[full_results_state], outputs=[video_iframe])
461
- results_table.select(fn=df_video_button_callback, inputs=[full_results_state], outputs=[video_bttn_full])
462
- results_table.select(fn=df_transcript_callback, inputs=[full_results_state], outputs=[transcript_bttn])
463
-
464
-
465
- app.launch()
466
-
 
1
+ import gradio as gr
2
+ import psycopg2
3
+ import numpy as np
4
+ import pandas as pd
5
+ from datetime import datetime, timedelta, date
6
+ from urllib.parse import quote
7
+ import re
8
+ import os
9
+
10
+ # Connect to the database
11
+ def connect_db():
12
+ return psycopg2.connect(
13
+ dbname=os.getenv("credDB_NAME"),
14
+ user=os.getenv("credDB_USER"),
15
+ password=os.getenv("credDB_PASS"),
16
+ host=os.getenv("credDB_HOST"),
17
+ port=os.getenv("credDB_PORT")
18
+ )
19
+
20
+ # Fetch meeting types
21
+ def get_meeting_types():
22
+ conn = connect_db()
23
+ cur = conn.cursor()
24
+ cur.execute("SELECT DISTINCT meeting_type FROM mopacdb_webcasts.webcasts;")
25
+ meeting_types = [row[0] for row in cur.fetchall()]
26
+ cur.close()
27
+ conn.close()
28
+ return ["No Filter"] + meeting_types
29
+
30
+ # Format seconds as HH:MM:SS
31
+ def format_seconds(seconds):
32
+ return str(timedelta(seconds=seconds)).split('.')[0] if seconds else None
33
+
34
+ def add_duration_to_datetime(datetime_str, duration_str):
35
+ # Parse the input datetime string into a datetime object
36
+ dt = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S")
37
+
38
+ # Split the duration string into hours, minutes, and seconds
39
+ hours, minutes, seconds = map(int, duration_str.split(':'))
40
+
41
+ # Create a timedelta object from the duration
42
+ duration = timedelta(hours=hours, minutes=minutes, seconds=seconds)
43
+
44
+ # Add the duration to the original datetime
45
+ new_datetime = dt + duration
46
+
47
+ # Return the new datetime as a string in the original format
48
+ return new_datetime.strftime("%Y-%m-%d %H:%M:%S")
49
+
50
+ def convert_to_url_string(date_str, loc):
51
+ # Parse the input string into a datetime object
52
+ dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
53
+
54
+ # Format the datetime object to ISO 8601 format
55
+ iso_format = dt.strftime("%Y-%m-%dT%H:%M:%S")
56
+
57
+ # Add timezone offset (assuming UTC+00:00)
58
+ iso_format_with_tz = iso_format + "+00:00"
59
+
60
+ # URL encode the string
61
+ url_encoded = quote(iso_format_with_tz)
62
+
63
+ # Return the final URL query string
64
+ if loc.lower() == "start":
65
+ return f"?in={url_encoded}"
66
+ if loc.lower() == "end":
67
+ return f"&out={url_encoded}"
68
+
69
+
70
+ # Search function
71
+ def search_transcripts(query, meeting_type, start_date, end_date):
72
+ conn = connect_db()
73
+ cur = conn.cursor()
74
+
75
+ sql = """
76
+ SELECT w.meeting_type, w.meeting_date, t.start_time, t.end_time,
77
+ REPLACE(t.speaker, 'speaker ', ''), t.transcript, w.url, w.time_origin, t.id
78
+ FROM mopacdb_webcasts.transcripts t
79
+ JOIN mopacdb_webcasts.webcasts w ON t.webcast_id = w.id
80
+ WHERE t.transcript ILIKE %s
81
+ """
82
+ params = [f"%{query}%"]
83
+
84
+ if meeting_type and "No Filter" not in meeting_type:
85
+ sql += " AND w.meeting_type = ANY(%s)"
86
+ params.append(meeting_type)
87
+
88
+ if start_date and end_date:
89
+ sql += " AND w.meeting_date BETWEEN %s AND %s"
90
+ params.extend([start_date, end_date])
91
+ elif start_date:
92
+ sql += " AND w.meeting_date >= %s"
93
+ params.append(start_date)
94
+ elif end_date:
95
+ sql += " AND w.meeting_date <= %s"
96
+ params.append(end_date)
97
+
98
+ # Add ORDER BY clause after all the filters are applied
99
+ sql += " ORDER BY w.meeting_date DESC, t.start_time ASC"
100
+
101
+ cur.execute(sql, params)
102
+ rows = cur.fetchall()
103
+ cur.close()
104
+ conn.close()
105
+
106
+ # Return both the full data (including identifier) and the displayed data
107
+ full_data = [
108
+ [row[0], row[1], format_seconds(row[2]), format_seconds(row[3]), row[4], row[5], row[6], row[7], row[8]]
109
+ for row in rows
110
+ ]
111
+ displayed_data = [row[:-3] for row in full_data] # Remove identifier column for display
112
+
113
+ return displayed_data, full_data # Return both datasets
114
+
115
+
116
+ # Function to highlight query in the transcript
117
+ def highlight_query(query, transcript):
118
+ if query and transcript:
119
+ # Simulate highlight by making words uppercase or surrounding them with markers
120
+ highlighted_text = re.sub(f"({re.escape(query)})", r'🔶\1🔶', transcript, flags=re.IGNORECASE)
121
+ return highlighted_text
122
+ return transcript
123
+
124
+
125
+ # Function to identify the hidden table row index
126
+ def get_matching_index(full_data: list, selected_row_values: list):
127
+ """
128
+ Returns the index of the row in `full_data` that matches the `selected_row_values`.
129
+ Converts the selected meeting_date to a date object for comparison.
130
+ """
131
+ # Convert the selected meeting_date (which is in string format) to a date object
132
+ selected_row_values[1] = datetime.strptime(selected_row_values[1], "%Y-%m-%d").date()
133
+
134
+ # Find and return the matching row index
135
+ return next(
136
+ (i for i, row in enumerate(full_data)
137
+ if [row[0], row[1], row[2], row[3], row[4], row[5]] == selected_row_values),
138
+ None
139
+ )
140
+
141
+ # Callback for transcript selection
142
+ def df_select_callback(full_data: list, evt: gr.SelectData, query = ""):
143
+ selected_row_values = evt.row_value if evt.row_value else None
144
+
145
+ # Use the helper function to find the matching row index
146
+ matching_index = get_matching_index(full_data, selected_row_values)
147
+
148
+ if matching_index is not None:
149
+ transcript = str(full_data[matching_index][5]) # Extract transcript
150
+ return highlight_query(query, transcript)
151
+
152
+ return ""
153
+
154
+
155
+
156
+ # Callback for video iframe
157
+ def df_video_callback(full_data: list, evt: gr.SelectData):
158
+
159
+ selected_row_values = evt.row_value if evt.row_value else None
160
+
161
+ # Use the helper function to find the matching row index
162
+ matching_index = get_matching_index(full_data, selected_row_values)
163
+
164
+ if matching_index is not None:
165
+
166
+ identifier = str(full_data[matching_index][6]) # Access identifier from stored data
167
+
168
+ if identifier.lower() == "replace":
169
+ return "<center><span style='color: red;'>Video currently not availabile for this transcript due to GLA archiving policies.</span></center>"
170
+
171
+ #Get Origin Timestamp
172
+ timestamp = str(full_data[matching_index][7])
173
+ #Add Start Time
174
+ ts_start = convert_to_url_string(add_duration_to_datetime(timestamp, str(full_data[matching_index][2])), "start")
175
+ ts_end = convert_to_url_string(add_duration_to_datetime(timestamp, str(full_data[matching_index][3])), "end")
176
+
177
+ return f"<iframe src='https://player.london.gov.uk/Player/Index/{identifier}{ts_start}{ts_end}' width='100%' height='360' frameborder='0' scrolling='no' allowfullscreen allow='encrypted-media; autoplay; fullscreen'></iframe>"
178
+ return ""
179
+
180
+
181
+ def df_video_button_callback(full_data: list, evt: gr.SelectData):
182
+
183
+ selected_row_values = evt.row_value if evt.row_value else None
184
+
185
+ # Use the helper function to find the matching row index
186
+ matching_index = get_matching_index(full_data, selected_row_values)
187
+
188
+ if matching_index is not None:
189
+
190
+ identifier = str(full_data[matching_index][6]) # Access identifier from stored data
191
+
192
+ if identifier.lower() == "replace":
193
+ return ""
194
+
195
+ #Get Origin Timestamp
196
+ timestamp = str(full_data[matching_index][7])
197
+ #Add Start Time
198
+ ts_start = convert_to_url_string(add_duration_to_datetime(timestamp, str(full_data[matching_index][2])), "start")
199
+ ts_end = convert_to_url_string(add_duration_to_datetime(timestamp, str(full_data[matching_index][3])), "end")
200
+
201
+ #Set URLs
202
+ url_1 = f"https://player.london.gov.uk/Player/Index/{identifier}"
203
+ url_2 = f"https://player.london.gov.uk/Player/Index/{identifier}{ts_start}{ts_end}"
204
+
205
+ return f"""
206
+ <div style="display: flex; justify-content: space-between; margin: 20px;">
207
+ <a href="{url_1}" target="_blank" style="text-decoration: none; width: 48%;">
208
+ <button class="lg secondary svelte-1ixn6qd"
209
+ style="width: 100%; text-align: center; padding: 10px; font-size: 16px;">
210
+ Access the Full Video
211
+ </button>
212
+ </a>
213
+ <a href="{url_2}" target="_blank" style="text-decoration: none; width: 48%;">
214
+ <button class="lg secondary svelte-1ixn6qd"
215
+ style="width: 100%; text-align: center; padding: 10px; font-size: 16px;">
216
+ Share this Clip
217
+ </button>
218
+ </a>
219
+ </div>
220
+ """
221
+
222
+ return ""
223
+
224
+
225
+
226
+ # Callback for transcript selection
227
+ def df_prior_callback(full_data: list, evt: gr.SelectData):
228
+
229
+ selected_row_values = evt.row_value if evt.row_value else None
230
+
231
+ # Use the helper function to find the matching row index
232
+ matching_index = get_matching_index(full_data, selected_row_values)
233
+
234
+ if matching_index is not None:
235
+ transcript_id = int(full_data[matching_index][-1]) # Access identifier from stored data
236
+
237
+ conn = connect_db()
238
+ cur = conn.cursor()
239
+
240
+ sql = """
241
+ WITH target_transcript AS (
242
+ SELECT webcast_id, start_time
243
+ FROM mopacdb_webcasts.transcripts
244
+ WHERE id = %s
245
+ )
246
+ SELECT transcript
247
+ FROM mopacdb_webcasts.transcripts
248
+ WHERE webcast_id = (SELECT webcast_id FROM target_transcript)
249
+ AND start_time < (SELECT start_time FROM target_transcript)
250
+ ORDER BY start_time DESC
251
+ LIMIT 1;
252
+ """
253
+
254
+ cur.execute(sql, (transcript_id,))
255
+ prior_transcript = cur.fetchone()
256
+ cur.close()
257
+ conn.close()
258
+
259
+ # Check if rows are fetched
260
+ if not prior_transcript:
261
+ return "" # No data, return None to keep button hidden
262
+
263
+ return prior_transcript[0]
264
+
265
+ # Callback for transcript selection
266
+ def df_posterior_callback(full_data: list, evt: gr.SelectData):
267
+
268
+ selected_row_values = evt.row_value if evt.row_value else None
269
+
270
+ # Use the helper function to find the matching row index
271
+ matching_index = get_matching_index(full_data, selected_row_values)
272
+
273
+ if matching_index is not None:
274
+
275
+ transcript_id = int(full_data[matching_index][-1]) # Access identifier from stored data
276
+
277
+ conn = connect_db()
278
+ cur = conn.cursor()
279
+
280
+ sql = """
281
+ WITH target_transcript AS (
282
+ SELECT webcast_id, start_time
283
+ FROM mopacdb_webcasts.transcripts
284
+ WHERE id = %s
285
+ )
286
+ SELECT transcript
287
+ FROM mopacdb_webcasts.transcripts
288
+ WHERE webcast_id = (SELECT webcast_id FROM target_transcript)
289
+ AND start_time > (SELECT start_time FROM target_transcript)
290
+ ORDER BY start_time ASC
291
+ LIMIT 1;
292
+ """
293
+
294
+ cur.execute(sql, (transcript_id,))
295
+ prior_transcript = cur.fetchone()
296
+ cur.close()
297
+ conn.close()
298
+
299
+ # Check if rows are fetched
300
+ if not prior_transcript:
301
+ return "" # No data, return None to keep button hidden
302
+
303
+ return prior_transcript[0]
304
+
305
+
306
+ def df_transcript_callback(full_data: list, evt: gr.SelectData):
307
+ selected_row_values = evt.row_value if evt.row_value else None
308
+
309
+ # Use the helper function to find the matching row index
310
+ matching_index = get_matching_index(full_data, selected_row_values)
311
+
312
+ if matching_index is not None:
313
+ transcript_id = int(full_data[matching_index][-1]) # Access identifier from stored data
314
+
315
+ conn = connect_db()
316
+ cur = conn.cursor()
317
+
318
+ sql = """
319
+ SELECT start_time, end_time, speaker, transcript
320
+ FROM mopacdb_webcasts.transcripts
321
+ WHERE webcast_id = (
322
+ SELECT webcast_id
323
+ FROM mopacdb_webcasts.transcripts
324
+ WHERE id = %s
325
+ )
326
+ ORDER BY start_time ASC;
327
+ """
328
+
329
+ cur.execute(sql, (transcript_id,))
330
+ rows = cur.fetchall()
331
+ cur.close()
332
+ conn.close()
333
+
334
+ # Check if rows are fetched
335
+ if not rows:
336
+ return None # No data, return None to keep button hidden
337
+
338
+ # Create a DataFrame and save it as a CSV
339
+ df = pd.DataFrame(rows, columns=["start_time", "end_time", "speaker", "transcript"])
340
+
341
+ #Create Filename
342
+ conn = connect_db()
343
+ cur = conn.cursor()
344
+
345
+ sql = """
346
+ SELECT CONCAT(meeting_type, ' - ', meeting_date) AS meeting_details
347
+ FROM mopacdb_webcasts.webcasts
348
+ WHERE id = (
349
+ SELECT webcast_id
350
+ FROM mopacdb_webcasts.transcripts
351
+ WHERE id = %s
352
+ );
353
+ """
354
+
355
+ cur.execute(sql, (transcript_id,))
356
+ filename = cur.fetchone()[0]
357
+ cur.close()
358
+ conn.close()
359
+
360
+ csv_file_path = str(filename)+".csv" #Set File path
361
+ df.to_csv(csv_file_path, index=False) # Save to CSV
362
+
363
+ return csv_file_path # This should show the button
364
+
365
+
366
+ # Gradio interface function
367
+ def gradio_interface(query, meeting_type, start_date, end_date):
368
+ if start_date:
369
+ start_date = datetime.fromtimestamp(start_date) if isinstance(start_date, float) else datetime.strptime(start_date, "%Y-%m-%d")
370
+ if end_date:
371
+ end_date = datetime.fromtimestamp(end_date) if isinstance(end_date, float) else datetime.strptime(end_date, "%Y-%m-%d")
372
+
373
+ displayed_results, full_results = search_transcripts(query, meeting_type, start_date, end_date)
374
+
375
+ no_results_message = "<span style='color: red;'>No results found. Please try a different search.</span>" if not displayed_results else ""
376
+
377
+ return displayed_results, full_results, no_results_message
378
+
379
+
380
+ #Set custom CSS
381
+ custom_css = """
382
+ <style>
383
+ #purple-textbox textarea {
384
+ background-color: #F3E5F5 !important;
385
+ }
386
+ #yellow-textbox textarea {
387
+ background-color: #FFF2D1 !important;
388
+ }
389
+ textarea {
390
+ font-family: Arial, sans-serif;
391
+ font-size: 14px;
392
+ white-space: pre-wrap;
393
+ color: black;
394
+ }
395
+
396
+ </style>
397
+ """
398
+
399
+ # Build the Gradio app
400
+ with gr.Blocks() as app:
401
+
402
+ gr.Markdown(
403
+ """
404
+ <div style="background-color: #4B23C0; color: white; padding: 20px; text-align: left; font-size: 24px; font-weight: bold; margin: 0;">
405
+ MOPAC | DS &nbsp;-&nbsp;🔍 Webcast Transcript Search Tool
406
+ </div>
407
+ """,
408
+ sanitize_html=False
409
+ )
410
+
411
+ #Load TextBox CSS
412
+ gr.HTML(custom_css)
413
+
414
+ meeting_types = get_meeting_types()
415
+
416
+ with gr.Row():
417
+ query = gr.Textbox(label="Search Query (optional)")
418
+ meeting_type = gr.Dropdown(label="Meeting Type (optional)", choices=meeting_types, multiselect=True)
419
+ start_date = gr.DateTime(label="Start Date (optional)", include_time=False, info="Enter Date in YYYY-MM-DD Format")
420
+ end_date = gr.DateTime(label="End Date (optional)", include_time=False, info="Enter Date in YYYY-MM-DD Format")
421
+
422
+ search_btn = gr.Button("Search")
423
+
424
+ # Table for results (without "Identifier" column)
425
+ results_table = gr.DataFrame(
426
+ headers=["Meeting Type", "Date", "Start Time", "End Time", "Speaker", "Transcript"],
427
+ datatype=["str", "date", "str", "str", "str", "str"],
428
+ interactive=False
429
+ )
430
+
431
+ no_results_text = gr.Markdown()
432
+
433
+ # Store full data (including "Identifier") separately
434
+ full_results_state = gr.State([])
435
+
436
+ # Search button updates the table and stores full results
437
+ search_btn.click(fn=gradio_interface, inputs=[query, meeting_type, start_date, end_date], outputs=[results_table, full_results_state, no_results_text])
438
+
439
+ with gr.Row():
440
+ with gr.Column(scale=2):
441
+ transcript_prior = gr.Textbox(label="Prior Transcript", interactive=False, elem_id="purple-textbox")
442
+ selected_transcript = gr.Textbox(label="Selected Transcript",
443
+ info ="Search Queries will appear demarcated using orange markers (🔶).",
444
+ interactive=False, elem_id="yellow-textbox")
445
+ transcript_posterior = gr.Textbox(label="Posterior Transcript", interactive=False, elem_id="purple-textbox")
446
+
447
+ gr.Markdown("**The full transcript of any selected speech segment will appear available for download here...**")
448
+ transcript_bttn = gr.File(label="Download Full Transcript")
449
+
450
+ with gr.Column(scale=3):
451
+
452
+ video_iframe = gr.HTML("")
453
+ video_bttn_full = gr.HTML("")
454
+
455
+
456
+ # Update transcript and video using the stored full data
457
+ results_table.select(fn=df_select_callback, inputs=[full_results_state, query], outputs=[selected_transcript])
458
+ results_table.select(fn=df_prior_callback, inputs=[full_results_state], outputs=[transcript_prior])
459
+ results_table.select(fn=df_posterior_callback, inputs=[full_results_state], outputs=[transcript_posterior])
460
+ results_table.select(fn=df_video_callback, inputs=[full_results_state], outputs=[video_iframe])
461
+ results_table.select(fn=df_video_button_callback, inputs=[full_results_state], outputs=[video_bttn_full])
462
+ results_table.select(fn=df_transcript_callback, inputs=[full_results_state], outputs=[transcript_bttn])
463
+
464
+
465
+ app.launch()
466
+