txh17 commited on
Commit
1bcac4b
·
verified ·
1 Parent(s): 15194dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -12
app.py CHANGED
@@ -1,10 +1,13 @@
1
  import gradio as gr
2
  from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer
3
  import torch
4
- import stable_diffusion_webnn # 假设stable-diffusion-v1.5-webnn的库名为 stable_diffusion_webnn
 
 
 
5
 
6
  # 使用T5模型生成文本描述
7
- model_name = "t5-large" # 可以根据需求选择不同版本的T5
8
  tokenizer = T5Tokenizer.from_pretrained(model_name)
9
  t5_model = T5ForConditionalGeneration.from_pretrained(model_name)
10
 
@@ -16,15 +19,33 @@ def generate_prompt(description: str) -> str:
16
  prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
17
  return prompt
18
 
19
- # 使用 stable-diffusion-v1.5-webnn 库加载 Stable Diffusion 模型
20
- # 这里假设 stable_diffusion_webnn 可以直接加载模型并生成图像
21
- text2image_pipeline = stable_diffusion_webnn.StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base")
22
- text2image_pipeline.to("cpu") # 使用CPU
23
 
24
- def generate_image(prompt: str):
25
- # 根据提示生成图像
26
- image = text2image_pipeline(prompt).images[0]
27
- return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # 使用Whisper模型进行语音转文本
30
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
@@ -43,20 +64,21 @@ def process_input(description: str, creativity: float, include_background: bool)
43
  prompt = generate_prompt(description)
44
  if include_background:
45
  prompt += " 添加详细的生动背景。"
46
- image = generate_image(prompt)
47
  return prompt, image
48
 
49
  # 处理音频输入和生成图像
50
  def process_audio_input(audio):
51
  description = transcribe_audio(audio)
52
  prompt = generate_prompt(description)
53
- image = generate_image(prompt)
54
  return prompt, image
55
 
56
  # Gradio界面部分
57
  text_input = gr.Textbox(label="请输入描述", placeholder="例如:天空中的魔法树屋")
58
  creativity_slider = gr.Slider(minimum=0, maximum=1, step=0.1, label="创意程度 (0 到 1)", value=0.7)
59
  background_checkbox = gr.Checkbox(label="是否添加背景", value=True)
 
60
  audio_input = gr.Audio(type="numpy", label="用语音描述图像")
61
 
62
  # 创建文本输入的界面
 
1
  import gradio as gr
2
  from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer
3
  import torch
4
+ from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
5
+ from diffusers import StableDiffusionPipeline
6
+ import cv2
7
+ import numpy as np
8
 
9
  # 使用T5模型生成文本描述
10
+ model_name = "t5-large" # 你可以根据需求选择不同版本的T5
11
  tokenizer = T5Tokenizer.from_pretrained(model_name)
12
  t5_model = T5ForConditionalGeneration.from_pretrained(model_name)
13
 
 
19
  prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
  return prompt
21
 
22
+ # 加载 ControlNet Canny 边缘检测模型
23
+ controlnet = ControlNetModel.from_pretrained("lllyasviel/controlnet-canny-sdxl-1.0")
24
+ stable_diffusion = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-1.0")
 
25
 
26
+ # 使用 ControlNet 管道
27
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
28
+ "stabilityai/stable-diffusion-xl-1.0",
29
+ controlnet=controlnet
30
+ )
31
+ pipe.to("cpu") # 使用CPU
32
+
33
+ def generate_image_with_controlnet(prompt: str):
34
+ # 生成 Canny 边缘图像并传入 ControlNet
35
+ # 使用模型生成图像并提取边缘
36
+ image = pipe(prompt).images[0]
37
+
38
+ # 转换为灰度图像
39
+ image = np.array(image.convert('L')) # 转为灰度图
40
+
41
+ # 使用 Canny 边缘检测
42
+ canny_edge_image = cv2.Canny(image, 100, 200) # 进行 Canny 边缘检测
43
+
44
+ # 将 Canny 边缘图像转换为适用于 ControlNet 的格式
45
+ canny_edge_image = torch.from_numpy(canny_edge_image).unsqueeze(0).unsqueeze(0).float() / 255.0 # 规范化
46
+ generated_image = pipe(prompt=prompt, control_image=canny_edge_image).images[0]
47
+
48
+ return generated_image
49
 
50
  # 使用Whisper模型进行语音转文本
51
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
 
64
  prompt = generate_prompt(description)
65
  if include_background:
66
  prompt += " 添加详细的生动背景。"
67
+ image = generate_image_with_controlnet(prompt)
68
  return prompt, image
69
 
70
  # 处理音频输入和生成图像
71
  def process_audio_input(audio):
72
  description = transcribe_audio(audio)
73
  prompt = generate_prompt(description)
74
+ image = generate_image_with_controlnet(prompt)
75
  return prompt, image
76
 
77
  # Gradio界面部分
78
  text_input = gr.Textbox(label="请输入描述", placeholder="例如:天空中的魔法树屋")
79
  creativity_slider = gr.Slider(minimum=0, maximum=1, step=0.1, label="创意程度 (0 到 1)", value=0.7)
80
  background_checkbox = gr.Checkbox(label="是否添加背景", value=True)
81
+
82
  audio_input = gr.Audio(type="numpy", label="用语音描述图像")
83
 
84
  # 创建文本输入的界面