import os import random from PIL import Image from bs4 import BeautifulSoup import gradio as gr from gradio_client import Client def whisper_stt(audio): print("inside whisper_stt") print(f'audio is - {audio}') print(f'audio is - {type(audio)}') #Connect to the Space client = Client("abidlabs/whisper", hf_token="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") print ("now here") text = client.predict(audio, api_name = '/predict') print(f"text is - {text}") return text def ttimg(prompt): print("inside ttimg") #Connect to the Space client = Client("stabilityai/stable-diffusion") img_dir = client.predict(prompt,"blur",7, fn_index = 1) print(f'image dir - {img_dir}') jpg_files = [f for f in os.listdir(img_dir) if f.endswith('.jpg')] #if len(jpg_files) > 0: # chosen_file = random.choice(jpg_files) file_path = os.path.join(img_dir, jpg_files[0]) print(f'file_path - {file_path}') return file_path #with open(file_path, 'rb') as f: # pil_img = Image.open(f) # return pil_img.show() def controlnet_img2img(img, cnet_prompt): print("inside controlnet_img2img") #Connect to the Space client = Client("ysharma/ControlNetwithSlider") img_dir = client.predict(img, cnet_prompt, "", "blur",1,512,20,9,123,0,100,200,api_name = '/canny') png_files = [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith('.png')] return png_files[0], png_files[1] def img2para(cnet_img2): print("inside img2para") #Connect to the Space client = Client("Awiny/Image2Paragraph") txt = client.predict(cnet_img2, [], api_name="/predict") print(f'img2para text is - {txt}') soup = BeautifulSoup(txt, 'html.parser') gpt4_section = soup.find_all('div', {'style': 'display: flex; flex-wrap: wrap;'})[1].find_all('p')[0].text return gpt4_section # demo with different Gradio Clients demo = gr.Blocks() with demo: gr.HTML("

Using Multiple Gradio Clients to create an app


") gr.HTML("""

""") with gr.Row(): audio_mic = gr.Audio(source="microphone", type="filepath") with gr.Column(): btn_whisper = gr.Button("Transcribe👇").style(full_width=True) prompt = gr.Textbox(label="prompt for SD image generation", interactive=False) #, value='a black swan') gr.HTML("
") with gr.Row(): btn_sd = gr.Button("Generate Image👉").style(full_width=True) sd_img = gr.Image(type='filepath') #value="/content/e321040c-b2eb-4550-8aaa-4cd416a7602d/tmpwyiwdr_1.jpg", gr.HTML("
") with gr.Row(): with gr.Column(): cnet_prompt = gr.Textbox(label="prompt for SD image generation") btn_cnet = gr.Button("ControlNet Output👉").style(full_width=True) cnet_img1 = gr.Image(type='filepath') cnet_img2 = gr.Image(type='filepath') gr.HTML("
") with gr.Row(): btn_Img2para = gr.Button("Lets understand Image with Text👉").style(full_width=True) paragraph = gr.Textbox(label="Image to Paragraph generation") btn_whisper.click(whisper_stt, audio_mic, prompt) btn_sd.click(ttimg, prompt, sd_img) btn_cnet.click(controlnet_img2img, [sd_img, cnet_prompt], [cnet_img1, cnet_img2]) btn_Img2para.click(img2para, cnet_img1, paragraph) #demo.launch() # demo with different Gradio Clients Chained together demo_chain_events = gr.Blocks() with demo_chain_events: gr.HTML("

Chaining Multiple Gradio Clients using Events!


") gr.HTML("""

""") audio_mic = gr.Audio(source="microphone", type="filepath") btn_whisper = gr.Button("Trigger the Chained Events🚀🚀").style(full_width=True) prompt = gr.Textbox(label="Getting the Prompt for Stable Diffusion using Whisper Gradio Client", interactive=False,) #value='a black swan' gr.HTML("
") with gr.Row(): with gr.Column(): html_sd = gr.HTML("

Stable Diffusion Generated Image👇

") #.style(full_width=True) sd_img = gr.Image(type='filepath') #value="/content/e321040c-b2eb-4550-8aaa-4cd416a7602d/tmpwyiwdr_1.jpg", #with gr.Row(): with gr.Column(): with gr.Row(): html_cnet = gr.HTML("

ControlNet Output👇

") #.style(full_width=True) cnet_prompt = gr.Textbox(label="Prompt for ControlNet", value='a beautiful scenery') with gr.Row(): cnet_img1 = gr.Image(type='filepath') cnet_img2 = gr.Image(type='filepath') gr.HTML("
") with gr.Row(): with gr.Column(): html_Img2para = gr.HTML("

Image to Paragraph output👇

") #.style(full_width=True) paragraph = gr.Textbox(label="Image to Paragraph generation") btn_whisper.click(whisper_stt, audio_mic, prompt)\ .success(ttimg, prompt, sd_img)\ .success(controlnet_img2img, [sd_img, cnet_prompt], [cnet_img1, cnet_img2])\ .success(img2para, cnet_img1, paragraph) #demo_chain_events.launch() demo_joint = gr.Blocks(theme=gr.themes.Monochrome()) with demo_joint: gr.HTML("

🚀Revolutionize Your App Development with Gradio Client🏗️

") gr.HTML("

Using Multiple Clients at Once : Whisper Client -> Stable Diffusion Client -> ControlNet Client -> Image2Paragraph Client


") with gr.Row(): with gr.Box(): demo.render() with gr.Box(): demo_chain_events.render() demo_joint.launch(debug=True)