Spaces:
Paused
Paused
| import gradio as gr | |
| import openai | |
| import json | |
| from google.oauth2 import service_account | |
| from baseline_utils import (detect_text_in_image, | |
| analyze_writer_image, | |
| generate_video, | |
| break_diary_to_scenes, | |
| scenes_caption, | |
| summarizer_for_audio, | |
| narration_generate) | |
| import os | |
| # Load secrets from Hugging Face Spaces environment | |
| openai_api_key = os.getenv("OPENAI_API_KEY") | |
| google_service_account_info = json.loads(os.getenv("GOOGLE_SERVICE_ACCOUNT")) | |
| gemini_api_key = os.getenv("GEMINI_API_KEY") | |
| eleven_api_key = os.getenv("ELEVEN_API_KEY") | |
| # Initialize OpenAI | |
| openai.api_key = openai_api_key | |
| # Function to get Google credentials | |
| def get_google_credentials(): | |
| return service_account.Credentials.from_service_account_info(google_service_account_info) | |
| def process_images(diary_image, writer_image, audio_option): | |
| # Save the file-like objects as image files | |
| diary_image_path = "temp_upload_images/temp_diary_image.png" | |
| writer_image_path = "temp_upload_images/temp_writer_image.png" | |
| os.makedirs("temp_upload_images", exist_ok=True) | |
| diary_image.save(diary_image_path) | |
| writer_image.save(writer_image_path) | |
| # Detect text from the diary image | |
| google_credentials = get_google_credentials() | |
| detected_text = detect_text_in_image(diary_image_path, google_credentials) | |
| # Analyze the writer's image using Gemini API | |
| writer_summary = analyze_writer_image(writer_image_path, gemini_api_key) | |
| scenes = break_diary_to_scenes(detected_text, openai_api_key) | |
| scene_list = [scene.strip() for scene in scenes.split("Scene")[1:]] | |
| scene_list = [scene.split(": ", 1)[1] for scene in scene_list] | |
| # Generate the summaries for audio narration | |
| audio_summaries = summarizer_for_audio(detected_text) | |
| # Generate the narration under main file | |
| narration_generate(audio_summaries, eleven_api_key) | |
| # Generate the video based on the summaries | |
| video_path= generate_video(scene_list, writer_summary, audio_option, fps=24) | |
| caption = scenes_caption(scene_list, openai_api_key) | |
| return video_path, caption | |
| # Define the Gradio interface | |
| def gradio_interface(diary_image, writer_image, audio_option): | |
| # Process the images and generate the video | |
| video_paths, prompts = process_images(diary_image, writer_image, audio_option) | |
| # Return the paths and corresponding prompts | |
| return video_paths, prompts | |
| # Set up the Gradio interface | |
| with gr.Blocks() as interface: | |
| gr.Markdown("# Handwritten Diary to Video") | |
| with gr.Row(): | |
| # Left column for user inputs | |
| with gr.Column(): | |
| diary_image_input = gr.Image(label="Upload your handwritten diary image", type="pil") | |
| writer_image_input = gr.Image(label="Upload a photo of the writer", type="pil") | |
| # Add a radio button for selecting audio options | |
| audio_option = gr.Radio( | |
| ["Narration", "Meow"], | |
| label="Choose Audio Option", | |
| value="Narration" # Default selection | |
| ) | |
| submit_button = gr.Button("Generate Video") | |
| # Right column for generated video and caption | |
| with gr.Column(): | |
| video_output = gr.Video(label="Generated Video") | |
| caption_output = gr.Markdown(label="Scene Caption") | |
| # Bind the submit button click to trigger the video generation and display | |
| submit_button.click( | |
| fn=gradio_interface, | |
| inputs=[diary_image_input, writer_image_input, audio_option], | |
| outputs=[video_output, caption_output] | |
| ) | |
| # Launch the interface | |
| interface.launch(debug=True) |