Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,934 Bytes
979bbdf dc382c8 b4d819b 979bbdf 02c9b64 dc382c8 d56b9d9 dc382c8 caf2559 d56b9d9 dc382c8 d56b9d9 dc382c8 d56b9d9 dc382c8 d56b9d9 dc382c8 d56b9d9 dc382c8 d56b9d9 dc382c8 caf2559 02c9b64 caf2559 02c9b64 0fea237 02c9b64 979bbdf dc382c8 979bbdf dc382c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
from dotenv import load_dotenv
from functools import partial
import gradio as gr
from huggingface_hub import InferenceClient
from automatic_speech_recognition import automatic_speech_recognition
from image_classification import image_classification
from image_to_text import image_to_text
from text_to_image import text_to_image
from text_to_speech import text_to_speech
from utils import request_image
class App:
def __init__(self, client: InferenceClient):
self.client = client
def run(self):
with gr.Blocks(title="AI Building Blocks") as demo:
gr.Markdown("# AI Building Blocks")
gr.Markdown("A gallery of building blocks for building AI applications")
with gr.Tabs():
with gr.Tab("Text-to-image Generation"):
gr.Markdown("Generate an image from a text prompt.")
text_to_image_prompt = gr.Textbox(label="Prompt")
text_to_image_generate_button = gr.Button("Generate")
text_to_image_output = gr.Image(label="Image", type="pil")
text_to_image_generate_button.click(
fn=partial(text_to_image, self.client),
inputs=text_to_image_prompt,
outputs=text_to_image_output
)
with gr.Tab("Image-to-text or Image Captioning"):
gr.Markdown("Generate a text description of an image.")
image_to_text_url_input = gr.Textbox(label="Image URL")
image_to_text_image_request_button = gr.Button("Get Image")
image_to_text_image_input = gr.Image(label="Image", type="pil")
image_to_text_image_request_button.click(
fn=request_image,
inputs=image_to_text_url_input,
outputs=image_to_text_image_input
)
image_to_text_output = gr.List(label="Captions", headers=["Caption"])
image_to_text_button = gr.Button("Caption")
image_to_text_button.click(
fn=image_to_text,
inputs=image_to_text_image_input,
outputs=image_to_text_output
)
with gr.Tab("Image Classification"):
gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
image_classification_url_input = gr.Textbox(label="Image URL")
image_classification_image_request_button = gr.Button("Get Image")
image_classification_image_input = gr.Image(label="Image",type="pil")
image_classification_image_request_button.click(
fn=request_image,
inputs=image_classification_url_input,
outputs=image_classification_image_input
)
image_classification_button = gr.Button("Classify")
image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False)
image_classification_button.click(
fn=partial(image_classification, self.client),
inputs=image_classification_image_input,
outputs=image_classification_output
)
with gr.Tab("Text-to-speech (TTS)"):
gr.Markdown("Generate speech from text.")
text_to_speech_text = gr.Textbox(label="Text")
text_to_speech_generate_button = gr.Button("Generate")
text_to_speech_output = gr.Audio(label="Speech")
text_to_speech_generate_button.click(
fn=text_to_speech,
inputs=text_to_speech_text,
outputs=text_to_speech_output
)
with gr.Tab("Audio Transcription or Automatic Speech Recognition (ASR)"):
gr.Markdown("Transcribe audio to text.")
audio_transcription_audio_input = gr.Audio(label="Audio")
audio_transcription_generate_button = gr.Button("Transcribe")
audio_transcription_output = gr.Textbox(label="Text")
audio_transcription_generate_button.click(
fn=partial(automatic_speech_recognition, self.client),
inputs=audio_transcription_audio_input,
outputs=audio_transcription_output
)
demo.launch()
if __name__ == "__main__":
load_dotenv()
app = App(InferenceClient())
app.run()
|