File size: 1,677 Bytes
40e72bf
98ec970
30a1716
98ec970
30a1716
98ec970
30a1716
98ec970
 
 
 
 
 
 
 
 
 
30a1716
98ec970
 
 
 
 
 
 
 
 
 
 
 
 
30a1716
98ec970
 
 
30a1716
98ec970
 
30a1716
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
os.system("pip install gradio==4.0.0 transformers==4.36.2 torch==2.0.1 pillow==9.4.0 accelerate==0.30.0 bitsandbytes==0.43.0")
import gradio as gr
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForVisionAndLanguageGeneration

# 加载DAM-3B模型和处理器
model_name = "nvidia/DAM-3B"
processor = AutoProcessor.from_pretrained(model_name)
# 4-bit量化适配免费空间,降低内存占用
model = AutoModelForVisionAndLanguageGeneration.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    load_in_4bit=True,
    device_map="auto"
)

def generate_detailed_caption(image):
    # 模拟全图描述(DAM-3B支持区域指定,此处简化为全图细节描述)
    inputs = processor(images=image, return_tensors="pt").to(model.device, torch.float16)
    # 生成详细描述,设置长文本参数
    outputs = model.generate(
        **inputs,
        max_length=200,  # 延长描述长度,保留更多细节
        num_beams=4,     # 束搜索提升描述连贯性
        no_repeat_ngram_size=3,  # 避免重复内容
        early_stopping=True
    )
    caption = processor.decode(outputs[0], skip_special_tokens=True)
    return f"图像细节描述:{caption}"

# 构建Gradio界面
with gr.Blocks(title="图像细节描述工具") as demo:
    gr.Markdown("# 图像细节描述工具(DAM-3B优化版)")
    image_input = gr.Image(type="pil", label="上传图片")
    text_output = gr.Textbox(label="生成细节描述", lines=5)
    gr.Button("生成详细描述").click(fn=generate_detailed_caption, inputs=image_input, outputs=text_output)

if __name__ == "__main__":
    demo.launch()