Spaces:

wjt6
/

llm-1

Sleeping

App Files Files Community

wjt6 commited on Apr 25, 2025

Commit

3c41586

verified ·

1 Parent(s): 65acb4a

Update app.py

Browse files

Files changed (1) hide show

app.py +170 -95

app.py CHANGED Viewed

@@ -1,110 +1,185 @@
-import os
-import torch
 import gradio as gr
-from transformers import pipeline
 from diffusers import StableDiffusionPipeline
-# 如果需要使用 Hugging Face 访问令牌，取消下面一行的注释并设置环境变量 HUGGINGFACE_TOKEN
-# from huggingface_hub import login
-# login(token=os.getenv("HUGGINGFACE_TOKEN"))
-# Step 1: Prompt-to-Prompt 模块，使用 Flan-T5 生成结构化提示词
-llm = pipeline(
-    "text2text-generation",
-    model="google/flan-t5-large",
-    device=0 if torch.cuda.is_available() else -1
-)
-# Step 2: 加载 Stable Diffusion 模型
-# 移除无效的 revision 参数，仅使用 torch_dtype 加速加载
-sd_v15 = StableDiffusionPipeline.from_pretrained(
-    "runwayml/stable-diffusion-v1-5",
-    torch_dtype=torch.float16
-)
-sd_v15 = sd_v15.to("cuda" if torch.cuda.is_available() else "cpu")
-sd_xl = StableDiffusionPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0"
-)
-sd_xl = sd_xl.to("cuda" if torch.cuda.is_available() else "cpu")
-# 可选：语音输入模块，使用 Whisper
-asr = pipeline(
-    "automatic-speech-recognition",
-    model="openai/whisper-base",
-    device=0 if torch.cuda.is_available() else -1
-)
-def transcribe(audio_path):
-    text = asr(audio_path)["text"]
-    return text
-def generate(description, model_choice, guidance_scale, negative_prompt, style):
-    # 构造给 LLM 的指令
-    instruction = (
-        f"请将以下简短描述扩展为 Stable Diffusion 友好的提示词，包含细节和风格：\n"
-        f"描述: '{description}'\n"
-        f"风格: '{style}'"
-    )
-    result = llm(instruction, max_length=128)[0]["generated_text"].strip()
-    prompt = result
-    # 根据模型选择生成图像
-    pipeline_model = sd_xl if model_choice == "SDXL" else sd_v15
-    image = pipeline_model(
-        prompt,
-        guidance_scale=guidance_scale,
-        negative_prompt=negative_prompt
-    ).images[0]
-    return prompt, image
-# Step 3: 构建 Gradio 界面
-with gr.Blocks(title="Prompt-to-Image Generator") as demo:
-    gr.Markdown("## 基于 LLM 的提示词生成与 Stable Diffusion 图像生成")
-    with gr.Row():
-        with gr.Column():
-            desc_input = gr.Textbox(label="文本描述", placeholder="例如：空中的魔法树屋")
-            style_dropdown = gr.Dropdown(
-                choices=["幻想风格", "赛博朋克", "写实主义"],
-                label="选择风格"
-            )
-            model_radio = gr.Radio(
-                choices=["SD v1.5", "SDXL"],
-                value="SD v1.5",
-                label="选择模型"
-            )
-            guidance_slider = gr.Slider(
-                minimum=0, maximum=20, step=0.5, value=7.5,
-                label="Guidance Scale"
-            )
-            neg_text = gr.Textbox(
-                label="反向提示词",
-                placeholder="排除内容（如：低分辨率、水印）"
-            )
-            use_voice = gr.Checkbox(label="启用语音输入（加分项）")
-            # 移除 'source' 参数以兼容 Gradio 版本
-            audio_input = gr.Audio(type="filepath", label="语音输入")
-            generate_btn = gr.Button("生成图像")
-        with gr.Column():
-            prompt_output = gr.Textbox(label="生成的提示词")
-            image_output = gr.Image(label="生成的图像")
-    # 绑定语音转文字（仅当启用时）
-    def conditional_transcribe(audio_path, use_voice_flag):
-        return transcribe(audio_path) if use_voice_flag else None
-    audio_input.change(
-        fn=conditional_transcribe,
-        inputs=[audio_input, use_voice],
-        outputs=desc_input
     )
-    # 点击按钮生成提示词并���图
-    generate_btn.click(
-        fn=generate,
-        inputs=[desc_input, model_radio, guidance_slider, neg_text, style_dropdown],
-        outputs=[prompt_output, image_output]
     )
-# Step 4: 启动应用
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+import torch
+import os
+import logging
 from diffusers import StableDiffusionPipeline
+from PIL import Image
+from openai import OpenAI
+from typing import Optional
+# 配置日志
+logging.basicConfig(level=logging.INFO)
+# 初始化API客户端
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+openai_client = OpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
+# 初始化Stable Diffusion
+def init_sd_pipeline():
+    try:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        torch_dtype = torch.float16 if device == "cuda" else torch.float32
+        logging.info(f"正在加载模型到 {device}，精度：{torch_dtype}")
+        pipe = StableDiffusionPipeline.from_pretrained(
+            "runwayml/stable-diffusion-v1-5",
+            torch_dtype=torch_dtype,
+            safety_checker=None,
+            use_safetensors=True
+        )
+        return pipe.to(device)
+    except Exception as e:
+        logging.error(f"模型初始化失败: {str(e)}")
+        return None
+# 显存优化
+torch.cuda.empty_cache()
+image_pipe = init_sd_pipeline()
+# 文本生成函数
+def generate_prompt(user_input, temperature, top_p, max_tokens, repetition_penalty):
+    if not openai_client:
+        raise gr.Error("OpenAI客户端未初始化，请检查API密钥")
+    system_prompt = """你是一个专业的提示词工程师，请将用户的想法转化为详细的Stable Diffusion提示词。
+遵循以下格式：
+[主体描述]，[环境细节]，[艺术风格]，[画质参数]
+示例：
+魔法屋在空中漂浮，被五彩云环绕，赛博朋克风格，8k分辨率，超精细细节"""
+    try:
+        response = openai_client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_input}
+            ],
+            temperature=temperature,
+            top_p=top_p,
+            max_tokens=max_tokens,
+            presence_penalty=repetition_penalty
+        )
+        return response.choices[0].message.content.strip()
+    except Exception as e:
+        logging.error(f"提示词生成失败: {str(e)}")
+        return f"生成失败: {str(e)}"
+# 图像生成函数
+def generate_image(prompt, negative_prompt="", guidance=7.5, steps=25):
+    if not image_pipe:
+        raise gr.Error("图像模型未初始化")
+    if not prompt:
+        raise gr.Error("请输入提示词")
+    try:
+        logging.info(f"开始生成图像，参数：guidance={guidance}, steps={steps}")
+        generator = torch.Generator()
+        if torch.cuda.is_available():
+            generator = generator.cuda()
+        result = image_pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            guidance_scale=guidance,
+            num_inference_steps=int(steps),
+            generator=generator.manual_seed(int(torch.rand(1).item()*1e7)),
+            num_images_per_prompt=1
+        )
+        return result.images[0]
+    except torch.cuda.OutOfMemoryError:
+        torch.cuda.empty_cache()
+        raise gr.Error("显存不足，请尝试：1. 简化提示词 2. 减小迭代次数 3. 降低引导强度")
+    except Exception as e:
+        logging.error(f"图像生成失败: {str(e)}")
+        raise gr.Error(f"生成失败: {str(e)}")
+# 界面布局
+with gr.Blocks(theme=gr.themes.Soft(), title="AI创作平台") as app:
+    gr.Markdown("## 🎨 智能创作平台 - 文本到图像生成工作流")
+    with gr.Tabs():
+        # 提示词优化选项卡
+        with gr.Tab("🖋 提示词设计"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    input_box = gr.Textbox(
+                        label="原始想法",
+                        placeholder="例：空中的魔法屋",
+                        lines=3,
+                        max_lines=6
+                    )
+                    with gr.Accordion("advanced parameters", open=False):
+                        temp_slider = gr.Slider(0.1, 1.5, 0.7,
+                                             label="creative temperature")
+                        top_p_slider = gr.Slider(0.1, 1.0, 0.9,
+                                               label="core sampling ratio")
+                        max_len = gr.Slider(64, 2048, 512, step=64,
+                                          label="maxlength")
+                        rep_penalty = gr.Slider(1.0, 2.0, 1.2,
+                                              label="reapted punishment")
+                    gen_btn = gr.Button("生成专业提示词", variant="primary")
+                output_prompt = gr.Textbox(
+                    label="优化后的提示词",
+                    lines=4,
+                    interactive=True,
+                    elem_classes=["prompt-box"]
+                )
+        # 图像生成选项卡
+        with gr.Tab("🖼 图像生成"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    prompt_transfer = gr.Textbox(
+                        label="当前提示词",
+                        lines=3,
+                        interactive=True
+                    )
+                    neg_prompt = gr.Textbox(
+                        label="排除内容",
+                        placeholder="例：模糊、低质量、水印",
+                        lines=2
+                    )
+                    with gr.Row():
+                        guidance_slider = gr.Slider(1, 20, 7.5,
+                                                   label="guiding strength", step=0.5)
+                        steps_slider = gr.Slider(10, 50, 25,
+                                               label="epochs", step=5)
+                    image_btn = gr.Button("生成图像", variant="primary")
+                gallery = gr.Gallery(
+                    label="生成结果",
+                    columns=2,
+                    height=600,
+                    object_fit="contain"
+                )
+    # 事件绑定
+    gen_btn.click(
+        generate_prompt,
+        [input_box, temp_slider, top_p_slider, max_len, rep_penalty],
+        output_prompt
+    ).then(
+        lambda x: x,
+        output_prompt,
+        prompt_transfer
     )
+    image_btn.click(
+        generate_image,
+        [prompt_transfer, neg_prompt, guidance_slider, steps_slider],
+        gallery,
+        api_name='generate'
     )
+# 运行应用
 if __name__ == "__main__":
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True,
+        share=False
+    )