Spaces:
Running
Running
| import json | |
| ########################### Chapter rendering functions ########################### | |
| def get_chapters(paragraphs, table_of_content): | |
| chapters = [] | |
| for i in range(len(table_of_content)): | |
| if i < len(table_of_content) - 1: | |
| chapter = {'num_chapter': i, | |
| 'title': table_of_content[i]['title'], | |
| 'start_paragraph_number': table_of_content[i]['start_paragraph_number'], | |
| 'end_paragraph_number': table_of_content[i + 1]['start_paragraph_number'], | |
| 'start_time': paragraphs[table_of_content[i]['start_paragraph_number']]['start_time'], | |
| 'end_time': paragraphs[table_of_content[i + 1]['start_paragraph_number']]['start_time'], | |
| } | |
| else: | |
| chapter = {'num_chapter': i, | |
| 'title': table_of_content[i]['title'], | |
| 'start_paragraph_number': table_of_content[i]['start_paragraph_number'], | |
| 'end_paragraph_number': len(paragraphs), | |
| 'start_time': paragraphs[table_of_content[i]['start_paragraph_number']]['start_time'], | |
| 'end_time': paragraphs[-1]['start_time'], | |
| } | |
| paragraphs_chapter = [paragraphs[j]['paragraph_text'] for j in | |
| range(chapter['start_paragraph_number'], chapter['end_paragraph_number'])] | |
| paragraph_timestamps_chapter = [paragraphs[j]['start_time'] for j in | |
| range(chapter['start_paragraph_number'], chapter['end_paragraph_number'])] | |
| chapter['paragraphs'] = paragraphs_chapter | |
| chapter['paragraph_timestamps'] = paragraph_timestamps_chapter | |
| chapters.append(chapter) | |
| return chapters | |
| def convert_seconds_to_hms(seconds): | |
| # Calculate hours, minutes, and remaining seconds | |
| hours = seconds // 3600 | |
| minutes = (seconds % 3600) // 60 | |
| remaining_seconds = seconds % 60 | |
| # Format the result as HH:MM:SS | |
| return f"{hours:02}:{minutes:02}:{remaining_seconds:02}" | |
| def toc_to_html(chapters): | |
| toc_html = "<h1>Video chapters</h1><p>\n" | |
| for chapter in chapters: | |
| num_chapter = chapter['num_chapter'] | |
| title = chapter['title'] | |
| from_to = convert_seconds_to_hms(int(chapter['start_time'])) + " - " | |
| toc_html += f"""{from_to}<a href = "#{num_chapter}" >{num_chapter+1} - {title}</a><br>\n""" | |
| return toc_html | |
| def section_to_html(section_json_data): | |
| formatted_section = "" | |
| paragraphs = section_json_data['paragraphs'] | |
| paragraphs_timestamp_hms = [convert_seconds_to_hms(int(section_json_data['paragraph_timestamps'][i])) for i in range(len(paragraphs))] | |
| for i, (paragraph, paragraph_timestamp_hms) in enumerate(zip(paragraphs, paragraphs_timestamp_hms)): | |
| formatted_section += f""" | |
| <div class="row mb-4"> | |
| <div class="col-md-1"> | |
| {paragraph_timestamp_hms} | |
| </div> | |
| <div class="col-md-11"> | |
| <p>{paragraph}</p> | |
| </div> | |
| </div>""" | |
| num_section = section_json_data['num_chapter'] | |
| from_to = "From "+convert_seconds_to_hms(int(section_json_data['start_time'])) + " to " + convert_seconds_to_hms( | |
| int(section_json_data['end_time'])) | |
| title = f"{section_json_data['title']}" | |
| title_link = f"""<div class="transcript-title-icon" " id="{num_section}">{num_section+1} - {title}</div>""" | |
| summary_section = f""" | |
| <a id="{num_section}"><h2 id="{num_section}">{title_link}</h2></a> | |
| {from_to} | |
| <p> | |
| <div class="summary-section"> | |
| <div class="summary-text" > | |
| {formatted_section} | |
| </div> | |
| </div> | |
| """ | |
| return summary_section | |
| def get_result_as_html(chapters, video_id): | |
| video_embed = f""" | |
| <iframe width="100%" height="400" src="https://www.youtube.com/embed/{video_id}" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> | |
| """ | |
| toc = toc_to_html(chapters) | |
| edited_transcript = f""" | |
| <h1>Structured transcript</h1> | |
| <p> | |
| """ | |
| for i in range(len(chapters)): | |
| chapter_json_data = chapters[i] | |
| edited_transcript += section_to_html(chapter_json_data) | |
| result_as_html = f""" | |
| <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"> | |
| <div class="container mt-4"> | |
| <div class="content"> | |
| {video_embed} | |
| </div> | |
| <p> | |
| <div class="content"> | |
| {toc} | |
| </div> | |
| <p> | |
| <div class="content"> | |
| {edited_transcript} | |
| </div> | |
| </div> | |
| <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js"></script> | |
| """ | |
| return result_as_html | |
| def get_transcript_as_text(transcript): | |
| temp_list = [convert_seconds_to_hms(int(s['start']))+' '+s['text'] for s in transcript] | |
| transcript_as_text = '\n'.join(temp_list) | |
| return transcript_as_text | |
| def load_transcript(video_id): | |
| file_name = f"examples/{video_id}_transcript.json" | |
| with open(file_name, 'r') as file: | |
| transcript = json.load(file) | |
| transcript_as_text = get_transcript_as_text(transcript) | |
| return transcript_as_text | |
| def load_json_chapters(video_id): | |
| file_name = f"examples/{video_id}.json" | |
| with open(file_name, 'r') as file: | |
| chapters = json.load(file) | |
| return chapters | |