Spaces:
Build error
Build error
| import torch | |
| import gradio as gr | |
| from PIL import Image | |
| import scipy.io.wavfile as wavfile | |
| import numpy as np | |
| # Use a pipeline as a high-level helper | |
| from transformers import pipeline | |
| pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") | |
| # Use a pipeline as a high-level helper | |
| from transformers import pipeline | |
| narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs") | |
| def generate_audio(text): | |
| narrated_text = narrator(text) | |
| wavfile.write("output.wav", rate=narrated_text['sampling_rate'], | |
| data= narrated_text['audio'][0]) | |
| return 'output.wav' | |
| def caption_my_image(imagee): | |
| # Ensure NumPy is imported and correctly referenced | |
| if isinstance(imagee, np.ndarray): | |
| imagee = Image.fromarray(imagee) # Convert NumPy array to PIL Image | |
| elif not isinstance(imagee, Image.Image): | |
| raise TypeError("Unsupported image format. Please upload a valid image.") | |
| imagee = imagee.convert('RGB') | |
| caption = pipe(imagee) | |
| final_caption = caption[0]['generated_text'] | |
| return generate_audio(final_caption) | |
| demo = gr.Interface(fn=caption_my_image, | |
| inputs=[gr.Image(label='Upload an image to know the story behind it')], | |
| outputs=[gr.Audio(label='Play the narration of an image')], | |
| title="Here Image narration in real time", | |
| description='This will narrate the description of the image' | |
| ) | |
| demo.launch(share='True', debug=True) | |