txh17 commited on
Commit
cb80203
·
verified ·
1 Parent(s): 53710db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -44
app.py CHANGED
@@ -1,54 +1,26 @@
1
  import gradio as gr
2
- from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer
3
- import torch
4
- from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
5
  from diffusers import StableDiffusionPipeline
6
- import cv2
7
- import numpy as np
8
 
9
- # 使用T5模型生成文本描述
10
- model_name = "t5-large" # 你可以根据需求选择不同版本的T5
11
- tokenizer = T5Tokenizer.from_pretrained(model_name)
12
- t5_model = T5ForConditionalGeneration.from_pretrained(model_name)
13
 
14
  def generate_prompt(description: str) -> str:
15
- # 使用T5模型生成详细的图像生成提示
16
- input_text = f"将这个描述扩展为一个详细的图像生成提示:{description}"
17
- inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
18
- outputs = t5_model.generate(inputs["input_ids"], max_length=150, num_beams=5, early_stopping=True)
19
- prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
  return prompt
21
 
22
- # 加载 ControlNet Canny 边缘检测模型
23
- controlnet = ControlNetModel.from_pretrained("lllyasviel/controlnet-canny-sdxl-1.0")
24
- stable_diffusion = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-1.0")
25
 
26
- # 使用 ControlNet 管道
27
- pipe = StableDiffusionControlNetPipeline.from_pretrained(
28
- "stabilityai/stable-diffusion-xl-1.0",
29
- controlnet=controlnet
30
- )
31
- pipe.to("cpu") # 使用CPU
32
-
33
- def generate_image_with_controlnet(prompt: str):
34
- # 生成 Canny 边缘图像并传入 ControlNet
35
- # 使用模型生成图像并提取边缘
36
- image = pipe(prompt).images[0]
37
-
38
- # 转换为灰度图像
39
- image = np.array(image.convert('L')) # 转为灰度图
40
-
41
- # 使用 Canny 边缘检测
42
- canny_edge_image = cv2.Canny(image, 100, 200) # 进行 Canny 边缘检测
43
-
44
- # 将 Canny 边缘图像转换为适用于 ControlNet 的格式
45
- canny_edge_image = torch.from_numpy(canny_edge_image).unsqueeze(0).unsqueeze(0).float() / 255.0 # 规范化
46
- generated_image = pipe(prompt=prompt, control_image=canny_edge_image).images[0]
47
-
48
- return generated_image
49
 
50
  # 使用Whisper模型进行语音转文本
51
- from transformers import WhisperProcessor, WhisperForConditionalGeneration
52
  processor = WhisperProcessor.from_pretrained("openai/whisper-large")
53
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
54
 
@@ -64,21 +36,20 @@ def process_input(description: str, creativity: float, include_background: bool)
64
  prompt = generate_prompt(description)
65
  if include_background:
66
  prompt += " 添加详细的生动背景。"
67
- image = generate_image_with_controlnet(prompt)
68
  return prompt, image
69
 
70
  # 处理音频输入和生成图像
71
  def process_audio_input(audio):
72
  description = transcribe_audio(audio)
73
  prompt = generate_prompt(description)
74
- image = generate_image_with_controlnet(prompt)
75
  return prompt, image
76
 
77
  # Gradio界面部分
78
  text_input = gr.Textbox(label="请输入描述", placeholder="例如:天空中的魔法树屋")
79
  creativity_slider = gr.Slider(minimum=0, maximum=1, step=0.1, label="创意程度 (0 到 1)", value=0.7)
80
  background_checkbox = gr.Checkbox(label="是否添加背景", value=True)
81
-
82
  audio_input = gr.Audio(type="numpy", label="用语音描述图像")
83
 
84
  # 创建文本输入的界面
 
1
  import gradio as gr
2
+ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
 
 
3
  from diffusers import StableDiffusionPipeline
4
+ import torch
 
5
 
6
+ # 使用BART模型生成文本描述
7
+ prompt_generator = pipeline("text2text-generation", model="facebook/bart-large-cnn")
 
 
8
 
9
  def generate_prompt(description: str) -> str:
10
+ # 根据简短描述生成详细的图像生成提示
11
+ prompt = prompt_generator(f"将这个描述扩展为一个详细的图像生成提示:{description}", max_length=150)[0]['generated_text']
 
 
 
12
  return prompt
13
 
14
+ # 加载 ByteDance/SDXL-Lightning 模型
15
+ sdxl_pipeline = StableDiffusionPipeline.from_pretrained("ByteDance/SDXL-Lightning")
16
+ sdxl_pipeline.to("cpu") # 使用 CPU
17
 
18
+ def generate_image(prompt: str):
19
+ # 根据提示生成图像
20
+ image = sdxl_pipeline(prompt).images[0]
21
+ return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # 使用Whisper模型进行语音转文本
 
24
  processor = WhisperProcessor.from_pretrained("openai/whisper-large")
25
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
26
 
 
36
  prompt = generate_prompt(description)
37
  if include_background:
38
  prompt += " 添加详细的生动背景。"
39
+ image = generate_image(prompt)
40
  return prompt, image
41
 
42
  # 处理音频输入和生成图像
43
  def process_audio_input(audio):
44
  description = transcribe_audio(audio)
45
  prompt = generate_prompt(description)
46
+ image = generate_image(prompt)
47
  return prompt, image
48
 
49
  # Gradio界面部分
50
  text_input = gr.Textbox(label="请输入描述", placeholder="例如:天空中的魔法树屋")
51
  creativity_slider = gr.Slider(minimum=0, maximum=1, step=0.1, label="创意程度 (0 到 1)", value=0.7)
52
  background_checkbox = gr.Checkbox(label="是否添加背景", value=True)
 
53
  audio_input = gr.Audio(type="numpy", label="用语音描述图像")
54
 
55
  # 创建文本输入的界面