Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| def get_youtube_transcript(video_id: str) -> str: | |
| """ | |
| Fetches and formats the transcript for a given YouTube video ID. | |
| Args: | |
| video_id (str): The YouTube video ID (e.g., 'dQw4w9WgXcQ'). | |
| Returns: | |
| str: A formatted string containing the full transcript and detailed segments, | |
| or an error message if the transcript cannot be retrieved. | |
| """ | |
| if not video_id: | |
| return "Please enter a YouTube video ID." | |
| try: | |
| # Fetch the transcript list | |
| transcript_list = YouTubeTranscriptApi.get_transcript(video_id) | |
| # Extract full transcript text | |
| full_transcript_text = " ".join([item['text'] for item in transcript_list]) | |
| # Prepare detailed transcript segments | |
| detailed_segments = [] | |
| for segment in transcript_list: | |
| detailed_segments.append( | |
| f"Start: {segment['start']:.2f}s, Duration: {segment['duration']:.2f}s, Text: {segment['text']}" | |
| ) | |
| # Combine results into a single string for Gradio output | |
| # Corrected approach: Join segments first, then format the output string | |
| detailed_segments_str = "\n".join(detailed_segments) | |
| output = ( | |
| "Full Transcript:\n" | |
| f"{full_transcript_text}\n\n" | |
| "Detailed Transcript Segments:\n" | |
| f"{detailed_segments_str}" | |
| ) | |
| return output | |
| except Exception as e: | |
| # Handle potential errors during transcript fetching | |
| error_message = ( | |
| f"An error occurred: {e}\n" | |
| "Possible reasons: No transcript available for this video, " | |
| "invalid video ID, or network issues. " | |
| "Please ensure the video ID is correct and the video has captions enabled." | |
| ) | |
| return error_message | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=get_youtube_transcript, | |
| inputs=gr.Textbox( | |
| label="YouTube Video ID", | |
| placeholder="e.g., dQw4w9WgXcQ (from youtube.com/watch?v=dQw4w9WgXcQ)" | |
| ), | |
| outputs=gr.Textbox( | |
| label="Transcript Output", | |
| lines=20, # Adjust number of lines for better display of long transcripts | |
| interactive=False # Make the output box read-only | |
| ), | |
| title="YouTube Transcript Fetcher", | |
| description="Enter a YouTube video ID to get its full transcript and detailed segments." | |
| ) | |
| # Launch the Gradio app | |
| if __name__ == "__main__": | |
| iface.launch() |