File size: 5,516 Bytes
2c4a365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
import torch
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration, CLIPProcessor, CLIPModel
from torchvision import transforms

# Geräteeinstellung
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# BLIP-Modelle
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)

# CLIP-Modelle
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Platzhalter für DeepDanbooru

def danbooru_tagging(image):
    return "1girl, bodysuit, sitting, wooden floor, solo"

def generate_blip_caption(image):
    raw_image = Image.open(image).convert("RGB")
    inputs = blip_processor(raw_image, return_tensors="pt").to(device)
    out = blip_model.generate(**inputs)
    caption = blip_processor.decode(out[0], skip_special_tokens=True)
    return caption

def generate_clip_prompt(image, detail_level):
    raw_image = Image.open(image).convert("RGB")
    inputs = clip_processor(images=raw_image, return_tensors="pt").to(device)
    outputs = clip_model.get_image_features(**inputs)
    base_prompt = "a woman in a bodysuit on wooden floor"
    if detail_level >= 4:
        return base_prompt + ", cinematic lighting, ultra detailed, HDR"
    elif detail_level == 3:
        return base_prompt + ", moody atmosphere"
    elif detail_level == 2:
        return base_prompt + ", minimal shadows"
    else:
        return base_prompt

def get_output(image, output_type, style, detail_level, tags, model_choice):
    if model_choice == "BLIP":
        if output_type == "Detailed Description":
            return generate_blip_caption(image)
        elif output_type == "Short Caption":
            return generate_blip_caption(image).split(",")[0]
        elif output_type == "Model Training Data":
            return generate_blip_caption(image).lower().replace(" ", "_")
        elif output_type == "AI Prompt":
            return generate_clip_prompt(image, detail_level)
    elif model_choice == "CLIP":
        return generate_clip_prompt(image, detail_level)
    elif model_choice == "DeepDanbooru":
        return danbooru_tagging(image)
    elif model_choice == "NSFW Detector":
        return "(Simulierter NSFW-Klassifikator: Ergebnis nicht implementiert)"
    return "[Keine gültige Auswahl getroffen]"

with gr.Blocks(css="style.css") as app:
    gr.Markdown("# NSFW Image to Text Generator ✨")

    with gr.Row():
        with gr.Column():
            img = gr.Image(type="filepath", label="Upload Image")
            output_type = gr.Dropdown([
                "Detailed Description", "Short Caption", "Tag List", "AI Prompt", "Model Training Data"], 
                label="Output Type", value="Detailed Description")
            style = gr.Dropdown([
                "Neutral", "Erotic", "Artistic", "Technical", "Literary", "BDSM", "Fetish"], 
                label="Style", value="Neutral")
            detail = gr.Slider(1, 5, step=1, value=3, label="Detail Level")
            tags = gr.Textbox(label="Custom Tags (comma separated)")
            model_choice = gr.Radio([
                "CLIP", "BLIP", "DeepDanbooru", "NSFW Detector"], 
                label="AI Model", value="BLIP")
            btn_generate = gr.Button("Generate Text")

        with gr.Column():
            output = gr.Textbox(label="Generated Output", lines=8)
            gr.Button("Enhance")
            gr.Button("Shorten")
            gr.Button("Rewrite")

    btn_generate.click(get_output, 
        inputs=[img, output_type, style, detail, tags, model_choice], 
        outputs=output)

    with gr.Tab("Prompt Tools"):
        prompt_input = gr.Textbox(label="Prompt Builder")
        btn_optimize = gr.Button("Optimize Prompt")
        btn_random = gr.Button("Randomize")
        optimized_output = gr.Textbox(label="Optimized Prompt")
        btn_optimize.click(lambda p: p + ", ultra detailed", inputs=prompt_input, outputs=optimized_output)
        btn_random.click(lambda: "a cyberpunk alley at night", outputs=optimized_output)

    with gr.Tab("Training Data"):
        btn_tags = gr.Button("Generate Tags")
        tags_out = gr.Textbox(label="Training Tags")
        btn_tags.click(lambda: "1girl, solo, black bodysuit, sitting", outputs=tags_out)

        caption_mode = gr.Dropdown([
            "Basic Caption", "Detailed Description", "Booru Style", "Natural Language"], 
            label="Caption Generation")
        btn_caption = gr.Button("Generate Caption")
        caption_out = gr.Textbox(label="Training Caption")
        btn_caption.click(lambda mode: {
            "Basic Caption": "A woman posing for a photo",
            "Detailed Description": "A woman in a futuristic city wearing a sleek bodysuit.",
            "Booru Style": "1girl, bodysuit, city, night",
            "Natural Language": "She stands still beneath neon lights, calm yet focused."
        }.get(mode, ""), inputs=caption_mode, outputs=caption_out)

        trigger_word = gr.Textbox(label="Trigger Word")
        trigger_class = gr.Textbox(label="Class")
        btn_lora = gr.Button("Prepare LoRA Training Data")
        lora_out = gr.Textbox(label="LoRA Output")
        btn_lora.click(lambda t, c: f"LoRA: {t}, class: {c}", inputs=[trigger_word, trigger_class], outputs=lora_out)

app.launch()