Devnik21 commited on
Commit
4dea00e
·
verified ·
1 Parent(s): ff9fd9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -151
app.py CHANGED
@@ -1,154 +1,118 @@
1
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
2
  import numpy as np
3
- import random
4
-
5
- # import spaces #[uncomment to use ZeroGPU]
6
- from diffusers import DiffusionPipeline
7
- import torch
8
-
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
11
-
12
- if torch.cuda.is_available():
13
- torch_dtype = torch.float16
14
- else:
15
- torch_dtype = torch.float32
16
-
17
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
- pipe = pipe.to(device)
19
-
20
- MAX_SEED = np.iinfo(np.int32).max
21
- MAX_IMAGE_SIZE = 1024
22
-
23
-
24
- # @spaces.GPU #[uncomment to use ZeroGPU]
25
- def infer(
26
- prompt,
27
- negative_prompt,
28
- seed,
29
- randomize_seed,
30
- width,
31
- height,
32
- guidance_scale,
33
- num_inference_steps,
34
- progress=gr.Progress(track_tqdm=True),
35
- ):
36
- if randomize_seed:
37
- seed = random.randint(0, MAX_SEED)
38
-
39
- generator = torch.Generator().manual_seed(seed)
40
-
41
- image = pipe(
42
- prompt=prompt,
43
- negative_prompt=negative_prompt,
44
- guidance_scale=guidance_scale,
45
- num_inference_steps=num_inference_steps,
46
- width=width,
47
- height=height,
48
- generator=generator,
49
- ).images[0]
50
-
51
- return image, seed
52
-
53
-
54
- examples = [
55
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
56
- "An astronaut riding a green horse",
57
- "A delicious ceviche cheesecake slice",
58
- ]
59
-
60
- css = """
61
- #col-container {
62
- margin: 0 auto;
63
- max-width: 640px;
64
- }
65
- """
66
-
67
- with gr.Blocks(css=css) as demo:
68
- with gr.Column(elem_id="col-container"):
69
- gr.Markdown(" # Text-to-Image Gradio Template")
70
-
71
- with gr.Row():
72
- prompt = gr.Text(
73
- label="Prompt",
74
- show_label=False,
75
- max_lines=1,
76
- placeholder="Enter your prompt",
77
- container=False,
78
- )
79
-
80
- run_button = gr.Button("Run", scale=0, variant="primary")
81
-
82
- result = gr.Image(label="Result", show_label=False)
83
-
84
- with gr.Accordion("Advanced Settings", open=False):
85
- negative_prompt = gr.Text(
86
- label="Negative prompt",
87
- max_lines=1,
88
- placeholder="Enter a negative prompt",
89
- visible=False,
90
- )
91
-
92
- seed = gr.Slider(
93
- label="Seed",
94
- minimum=0,
95
- maximum=MAX_SEED,
96
- step=1,
97
- value=0,
98
- )
99
-
100
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
101
-
102
- with gr.Row():
103
- width = gr.Slider(
104
- label="Width",
105
- minimum=256,
106
- maximum=MAX_IMAGE_SIZE,
107
- step=32,
108
- value=1024, # Replace with defaults that work for your model
109
- )
110
-
111
- height = gr.Slider(
112
- label="Height",
113
- minimum=256,
114
- maximum=MAX_IMAGE_SIZE,
115
- step=32,
116
- value=1024, # Replace with defaults that work for your model
117
- )
118
-
119
- with gr.Row():
120
- guidance_scale = gr.Slider(
121
- label="Guidance scale",
122
- minimum=0.0,
123
- maximum=10.0,
124
- step=0.1,
125
- value=0.0, # Replace with defaults that work for your model
126
- )
127
-
128
- num_inference_steps = gr.Slider(
129
- label="Number of inference steps",
130
- minimum=1,
131
- maximum=50,
132
- step=1,
133
- value=2, # Replace with defaults that work for your model
134
- )
135
-
136
- gr.Examples(examples=examples, inputs=[prompt])
137
- gr.on(
138
- triggers=[run_button.click, prompt.submit],
139
- fn=infer,
140
- inputs=[
141
- prompt,
142
- negative_prompt,
143
- seed,
144
- randomize_seed,
145
- width,
146
- height,
147
- guidance_scale,
148
- num_inference_steps,
149
- ],
150
- outputs=[result, seed],
151
  )
152
 
153
- if __name__ == "__main__":
154
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[ ]:
5
+
6
+
7
+ # !pip install -q gTTS
8
+ # !pip install -qU "google-genai==1.9.0"
9
+
10
+
11
+ # In[3]:
12
+
13
+
14
  import numpy as np
15
+ import pandas as pd
16
+ import os
17
+
18
+ from google import genai
19
+ from google.genai import types
20
+ from IPython.display import display, Image, Markdown, Audio
21
+ from IPython.display import display, Image as IPImage
22
+ from gtts import gTTS
23
+ import IPython.display as ipd
24
+ from PIL import Image as PILImage
25
+ import io
26
+
27
+
28
+ # In[4]:
29
+
30
+
31
+ GOOGLE_API_KEY = "AIzaSyDuMuSDMX4A33NYki7lgs6x13uxbHirMQk" # Replace with your key
32
+ client = genai.Client(api_key=GOOGLE_API_KEY)
33
+
34
+
35
+ # In[ ]:
36
+
37
+
38
+ #!pip install google.api_core
39
+
40
+
41
+ # In[8]:
42
+
43
+
44
+ from google.api_core import retry
45
+
46
+ is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})
47
+ genai.models.Models.generate_content = retry.Retry(
48
+ predicate=is_retriable
49
+ )(genai.models.Models.generate_content)
50
+
51
+
52
+ # In[10]:
53
+
54
+
55
+ # Prompt for user input
56
+ user_prompt = input("Enter your prompt: ")
57
+
58
+ # Request image generation
59
+ generation_response = client.models.generate_content(
60
+ model="gemini-2.0-flash-exp-image-generation",
61
+ contents=user_prompt,
62
+ config=types.GenerateContentConfig(
63
+ response_modalities=['text', 'image']
64
+ )
65
+ )
66
+
67
+ # Process and display the image
68
+ image_bytes = None
69
+ for part in generation_response.candidates[0].content.parts:
70
+ if part.text:
71
+ print(part.text)
72
+ elif part.inline_data:
73
+ image_bytes = part.inline_data.data
74
+ display(Image(image_bytes))
75
+
76
+
77
+ # In[11]:
78
+
79
+
80
+ if image_bytes:
81
+ pil_image = PILImage.open(io.BytesIO(image_bytes))
82
+
83
+ vision_prompt = [
84
+ "What is in this image? Describe it in detail.",
85
+ pil_image
86
+ ]
87
+
88
+ vision_response = client.models.generate_content(
89
+ model='gemini-2.0-flash',
90
+ contents=vision_prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  )
92
 
93
+ display(Markdown("### 🖼️ Image Description:"))
94
+ display(Markdown(vision_response.text))
95
+
96
+
97
+ # In[12]:
98
+
99
+
100
+ language = 'en' # ← change here if you want different language
101
+
102
+ image_description_text = vision_response.text
103
+
104
+ tts = gTTS(text=image_description_text, lang=language)
105
+ tts.save("description.mp3")
106
+
107
+ display(Markdown("### 📝 Image Description (Text):"))
108
+ display(Markdown(image_description_text))
109
+
110
+ display(Markdown("### 🔊 Image Description (Audio):"))
111
+ ipd.display(ipd.Audio("description.mp3"))
112
+
113
+
114
+ # In[ ]:
115
+
116
+
117
+
118
+