Spaces:
Build error
Build error
| import sys | |
| from google import genai | |
| import subprocess | |
| import os | |
| import shutil | |
| import gradio as gr | |
| import uuid | |
| import subprocess | |
| def download_subtitles(video_url): | |
| # Execute the bash script and capture the output | |
| # result = subprocess.run( | |
| # ['bash', 'download_subtitles.sh', url], | |
| # check=True, | |
| # text=True, | |
| # stdout=subprocess.PIPE, | |
| # stderr=subprocess.PIPE | |
| # ) | |
| # # Extract the last line from stdout which is the directory name | |
| # stdout_lines = result.stdout.strip().split('\n') | |
| # directory = stdout_lines[-1].strip() | |
| uuid_dir = str(uuid.uuid4()) | |
| # First command for auto-generated subtitles | |
| subprocess.run([ | |
| "yt-dlp", | |
| "--write-auto-subs", | |
| "--sub-lang", "en", | |
| "--convert-subs", "srt", | |
| "--skip-download", | |
| "-P", f"home:{uuid_dir}", | |
| video_url | |
| ], check=True) | |
| # Second command for regular subtitles | |
| subprocess.run([ | |
| "yt-dlp", | |
| "--write-subs", | |
| "--sub-lang", "en", | |
| "--convert-subs", "srt", | |
| "--skip-download", | |
| "-P", f"home:{uuid_dir}", | |
| video_url | |
| ], check=True) | |
| directory = uuid_dir | |
| # Verify the directory exists | |
| if not os.path.isdir(directory): | |
| raise FileNotFoundError(f"Directory {directory} does not exist") | |
| # Find the .srt file in the directory | |
| print( os.listdir(directory)) | |
| srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')] | |
| if not srt_files: | |
| raise FileNotFoundError(f"No .srt file found in {directory}") | |
| if len(srt_files) > 1: | |
| raise RuntimeError(f"Multiple .srt files found in {directory}") | |
| srt_path = os.path.join(directory, srt_files[0]) | |
| return srt_path | |
| def cleanup_directory(folder_path): | |
| # Check if the folder exists | |
| if not os.path.exists(folder_path): | |
| raise FileNotFoundError(f"The directory {folder_path} does not exist") | |
| # Remove the directory and all its contents | |
| shutil.rmtree(folder_path) | |
| def srt_to_text(input_file): | |
| try: | |
| with open(input_file, "r", encoding="utf-8") as f: | |
| content = f.read() | |
| except FileNotFoundError: | |
| print(f"Error: Input file '{input_file}' not found") | |
| sys.exit(1) | |
| entries = content.strip().split("\n\n") | |
| output_lines = [] | |
| for entry in entries: | |
| lines = entry.strip().split("\n") | |
| if len(lines) < 3: | |
| continue | |
| text_lines = lines[2:] | |
| for line in text_lines: | |
| stripped_line = line.strip() | |
| if stripped_line: | |
| if not output_lines or stripped_line != output_lines[-1]: | |
| output_lines.append(stripped_line) | |
| return "\n".join(output_lines) | |
| # url = "https://www.youtube.com/watch?v=B1dWbiXnz_s" | |
| # subtitlesfile = download_subtitles(url) | |
| # video_text = srt_to_text(subtitlesfile) | |
| # cleanup_directory(os.path.dirname(subtitlesfile)) | |
| # GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
| # client = genai.Client(api_key=GEMINI_API_KEY) | |
| # response = client.models.generate_content( | |
| # model='gemini-2.0-flash', | |
| # contents=f"Summarize following text chronollogically, make it long, use markdown: \n{video_text}", | |
| # ) | |
| # print(response.text) | |
| def get_transcript_text(url): | |
| # try: | |
| print("Downloading subtitles...") | |
| subtitlesfile = download_subtitles(url) | |
| print("Extracting text from subtitles...") | |
| video_text = srt_to_text(subtitlesfile) | |
| print("Cleaning up...") | |
| cleanup_directory(os.path.dirname(subtitlesfile)) | |
| return video_text | |
| # except Exception as e: | |
| # raise gr.Error(f"Error retrieving transcript: {e}") | |
| def summarize_video(url, prompt): | |
| # try: | |
| video_text = get_transcript_text(url) | |
| client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) | |
| final_prompt = prompt + "\n" + video_text | |
| print("Generating summary...") | |
| response = client.models.generate_content( | |
| model='gemini-2.0-flash', | |
| contents=final_prompt, | |
| ) | |
| summary = response.text | |
| return summary | |
| # except Exception as e: | |
| # return f"An error occurred: {str(e)}" | |
| with gr.Blocks() as app: | |
| gr.Markdown("# YouTube Video Summarizer") | |
| with gr.Row(): | |
| with gr.Column(scale=5): | |
| url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...") | |
| with gr.Column(scale=1): | |
| summarize_btn = gr.Button("Summarize", variant="primary") | |
| default_prompt = """Summarize the following text chronologically, make it long, use markdown:""" | |
| prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=4) | |
| output = gr.Markdown() | |
| summarize_btn.click( | |
| fn=summarize_video, | |
| inputs=[url_input, prompt_input], | |
| outputs=output | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() | |