File size: 4,934 Bytes
979bbdf
dc382c8
b4d819b
979bbdf
02c9b64
dc382c8
d56b9d9
dc382c8
caf2559
d56b9d9
dc382c8
 
 
 
 
 
 
 
 
 
 
 
 
 
d56b9d9
dc382c8
 
 
 
 
 
 
d56b9d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc382c8
 
d56b9d9
 
 
 
 
 
 
dc382c8
d56b9d9
 
dc382c8
 
d56b9d9
 
dc382c8
caf2559
02c9b64
caf2559
 
 
 
 
 
 
 
02c9b64
 
 
 
 
 
0fea237
02c9b64
 
 
979bbdf
dc382c8
979bbdf
 
dc382c8
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from dotenv import load_dotenv
from functools import partial
import gradio as gr
from huggingface_hub import InferenceClient
from automatic_speech_recognition import automatic_speech_recognition
from image_classification import image_classification
from image_to_text import image_to_text
from text_to_image import text_to_image
from text_to_speech import text_to_speech
from utils import request_image


class App:

    def __init__(self, client: InferenceClient):
        self.client = client

    def run(self):
        with gr.Blocks(title="AI Building Blocks") as demo:
            gr.Markdown("# AI Building Blocks")
            gr.Markdown("A gallery of building blocks for building AI applications")
            with gr.Tabs():
                with gr.Tab("Text-to-image Generation"):
                    gr.Markdown("Generate an image from a text prompt.")
                    text_to_image_prompt = gr.Textbox(label="Prompt")
                    text_to_image_generate_button = gr.Button("Generate")
                    text_to_image_output = gr.Image(label="Image", type="pil")
                    text_to_image_generate_button.click(
                        fn=partial(text_to_image, self.client),
                        inputs=text_to_image_prompt,
                        outputs=text_to_image_output
                    )
                with gr.Tab("Image-to-text or Image Captioning"):
                    gr.Markdown("Generate a text description of an image.")
                    image_to_text_url_input = gr.Textbox(label="Image URL")
                    image_to_text_image_request_button = gr.Button("Get Image")
                    image_to_text_image_input = gr.Image(label="Image", type="pil")
                    image_to_text_image_request_button.click(
                        fn=request_image,
                        inputs=image_to_text_url_input,
                        outputs=image_to_text_image_input
                    )
                    image_to_text_output = gr.List(label="Captions", headers=["Caption"])
                    image_to_text_button = gr.Button("Caption")
                    image_to_text_button.click(
                        fn=image_to_text,
                        inputs=image_to_text_image_input,
                        outputs=image_to_text_output
                    )
                with gr.Tab("Image Classification"):
                    gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
                    image_classification_url_input = gr.Textbox(label="Image URL")
                    image_classification_image_request_button = gr.Button("Get Image")
                    image_classification_image_input = gr.Image(label="Image",type="pil")
                    image_classification_image_request_button.click(
                        fn=request_image,
                        inputs=image_classification_url_input,
                        outputs=image_classification_image_input
                    )
                    image_classification_button = gr.Button("Classify")
                    image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False)
                    image_classification_button.click(
                        fn=partial(image_classification, self.client),
                        inputs=image_classification_image_input,
                        outputs=image_classification_output
                    )
                with gr.Tab("Text-to-speech (TTS)"):
                    gr.Markdown("Generate speech from text.")
                    text_to_speech_text = gr.Textbox(label="Text")
                    text_to_speech_generate_button = gr.Button("Generate")
                    text_to_speech_output = gr.Audio(label="Speech")
                    text_to_speech_generate_button.click(
                        fn=text_to_speech,
                        inputs=text_to_speech_text,
                        outputs=text_to_speech_output
                    )
                with gr.Tab("Audio Transcription or Automatic Speech Recognition (ASR)"):
                    gr.Markdown("Transcribe audio to text.")
                    audio_transcription_audio_input = gr.Audio(label="Audio")
                    audio_transcription_generate_button = gr.Button("Transcribe")
                    audio_transcription_output = gr.Textbox(label="Text")
                    audio_transcription_generate_button.click(
                        fn=partial(automatic_speech_recognition, self.client),
                        inputs=audio_transcription_audio_input,
                        outputs=audio_transcription_output
                    )

            demo.launch()


if __name__ == "__main__":
    load_dotenv()
    app = App(InferenceClient())
    app.run()