Spaces:
Runtime error
Runtime error
| # app.py | |
| # app.py | |
| import gradio as gr | |
| import subprocess | |
| import torch | |
| from PIL import Image | |
| from transformers import AutoProcessor, AutoModelForCausalLM | |
| # โ๏ธ flashโattn ์ค์น (CUDA ๋น๋๋ฅผ ๊ฑด๋๋๋๋ค) | |
| subprocess.run( | |
| 'pip install flash-attn --no-build-isolation', | |
| env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, | |
| shell=True | |
| ) | |
| # 1. ์ฅ์น ์ค์ | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # 2. Florence ๋ชจ๋ธ ๋ฐ ํ๋ก์ธ์ ๋ก๋ | |
| florence_model = AutoModelForCausalLM.from_pretrained( | |
| 'microsoft/Florence-2-base', | |
| trust_remote_code=True | |
| ).to(device).eval() | |
| florence_processor = AutoProcessor.from_pretrained( | |
| 'microsoft/Florence-2-base', | |
| trust_remote_code=True | |
| ) | |
| # 3. ์ด๋ฏธ์ง ์ค๋ช ์์ฑ ํจ์ | |
| def generate_caption(image): | |
| if not isinstance(image, Image.Image): | |
| image = Image.fromarray(image) | |
| # 30~50๋จ์ด ๋ถ๋์ ํ๊ตญ์ด ์์ธ ์ค๋ช ์ ์์ฑํ๋ผ๋ ์ง์๋ฌธ | |
| instruction = ( | |
| "์ด ์ด๋ฏธ์ง๋ฅผ 30์์ 50๋จ์ด ๋ถ๋์ ํ๊ตญ์ด๋ก ์์ธํ ์ค๋ช ํ์ธ์. " | |
| "๋ฐฐ๊ฒฝ, ์์, ์ง๊ฐ, ์ธ๋ฌผ์ ํ์ ๊ณผ ์์, ์กฐ๋ช , ๊ตฌ๋, ๋ถ์๊ธฐ ๋ฑ์ ๋ชจ๋ ํฌํจํ์ฌ ์์ ํด ์ฃผ์ธ์." | |
| ) | |
| inputs = florence_processor( | |
| text=instruction, | |
| images=image, | |
| return_tensors="pt" | |
| ).to(device) | |
| generated_ids = florence_model.generate( | |
| input_ids=inputs["input_ids"], | |
| pixel_values=inputs["pixel_values"], | |
| max_new_tokens=1024, | |
| do_sample=False, | |
| num_beams=3, | |
| early_stopping=False, | |
| ) | |
| generated_text = florence_processor.batch_decode( | |
| generated_ids, | |
| skip_special_tokens=False | |
| )[0] | |
| parsed = florence_processor.post_process_generation( | |
| generated_text, | |
| task=instruction, | |
| image_size=(image.width, image.height) | |
| ) | |
| prompt = parsed[instruction] | |
| # ํ์์ "Asian"โ"Korean" ๊ต์ | |
| if "Asian" in prompt: | |
| prompt = prompt.replace("Asian", "Korean") | |
| print("โ ์์ฑ ์๋ฃ:\n", prompt) | |
| return prompt | |
| # 4. Gradio ๋ธ๋ก์ผ๋ก ์ธํฐํ์ด์ค ๊ตฌ์ฑ (์บ๋ฆฌ์ปค์ณ ๋ฒํผ ์ ์ง) | |
| with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as demo: | |
| gr.Markdown("## ๐ผ๏ธ ์ด๋ฏธ์ง ์ค๋ช ์์ฑ๊ธฐ") | |
| gr.Markdown( | |
| "โ ํ์ฌ CPU ๋ชจ๋๋ก ์คํ ์ค์ด๋ฏ๋ก ์๋๊ฐ ๋๋ฆด ์ ์์ต๋๋ค. ์ํด ๋ถํ๋๋ฆฝ๋๋ค." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.Image(label="์ ๋ ฅ ์ด๋ฏธ์ง", type="pil") | |
| with gr.Column(): | |
| # โจ lines๋ฅผ 3์์ 6์ผ๋ก ๋๋ ค ํ ์คํธ ๋ฐ์ค ๋์ด๋ฅผ 2๋ฐฐ๋ก ํค์ | |
| caption_output = gr.Textbox( | |
| label="์์ฑ๋ ์ค๋ช ", | |
| lines=6, | |
| show_copy_button=True | |
| ) | |
| # ์ค๋ฅธ์ชฝ ํ๋จ '์บ๋ฆฌ์ปค์ณ ๋ง๋ค๊ธฐ' ๋ฒํผ | |
| gr.HTML(""" | |
| <div style='margin-top: 10px; text-align: center;'> | |
| <a href="https://huggingface.co/spaces/VIDraft/stable-diffusion-3.5-large-turboX" target="_blank"> | |
| <button style=' | |
| padding: 10px 20px; | |
| background-color: #ff9900; | |
| color: white; | |
| border: none; | |
| border-radius: 10px; | |
| font-size: 16px; | |
| box-shadow: 2px 2px 8px rgba(0,0,0,0.3); | |
| cursor: pointer; | |
| '> | |
| ๐จ ์บ๋ฆฌ์ปค์ณ ๋ง๋ค๊ธฐ | |
| </button> | |
| </a> | |
| </div> | |
| """) | |
| # ์ ๋ก๋ํ๋ฉด ์๋์ผ๋ก generate_caption ํธ์ถ | |
| image_input.upload( | |
| fn=generate_caption, | |
| inputs=image_input, | |
| outputs=caption_output | |
| ) | |
| # 5. ์น์ฑ ์คํ | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) | |
| # import gradio as gr | |
| # import torch | |
| # from PIL import Image | |
| # from transformers import BlipProcessor, BlipForConditionalGeneration | |
| # # 1. ์ฅ์น ์ค์ | |
| # device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # # 2. ๋ชจ๋ธ ๋ฐ ํ๋ก์ธ์ ๋ก๋ | |
| # processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
| # model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device) | |
| # # 3. ์ด๋ฏธ์ง ์ค๋ช ์์ฑ ํจ์ | |
| # def generate_caption(image): | |
| # if image is None: | |
| # return "์ด๋ฏธ์ง๋ฅผ ์ ๋ก๋ํด์ฃผ์ธ์." | |
| # # ๊ณ ์ ์ฒ๋ฆฌ๋ฅผ ์ํ ๋ฆฌ์ฌ์ด์ฆ | |
| # image = image.resize((384, 384)) | |
| # # ์ค๋ช ์์ฑ | |
| # inputs = processor(images=image, return_tensors="pt").to(device) | |
| # output_ids = model.generate(**inputs, max_length=50) | |
| # caption = processor.decode(output_ids[0], skip_special_tokens=True) | |
| # print("โ ์์ฑ๋ ์ค๋ช :", caption) | |
| # return caption | |
| # # 4. Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ | |
| # with gr.Blocks(title="์ด๋ฏธ์ง ์ค๋ช ์์ฑ๊ธฐ") as demo: | |
| # gr.Markdown("## ๐ผ๏ธ ์ด๋ฏธ์ง๋ฅผ ์ ๋ก๋ํ๋ฉด ์ค๋ช ์ด ์๋ ์์ฑ๋ฉ๋๋ค.") | |
| # with gr.Row(): | |
| # with gr.Column(): | |
| # image_input = gr.Image(label="์ ๋ ฅ ์ด๋ฏธ์ง", type="pil") | |
| # with gr.Column(): | |
| # caption_output = gr.Textbox(label="์์ฑ๋ ์ค๋ช ", lines=3, show_copy_button=True) | |
| # # HTML๋ก ๋ฒํผ ์์ฑ | |
| # gr.HTML(""" | |
| # <div style='margin-top: 10px; text-align: center;'> | |
| # <a href="https://huggingface.co/spaces/VIDraft/stable-diffusion-3.5-large-turboX" target="_blank"> | |
| # <button style='padding: 10px 20px; background-color: #ff9900; color: white; border: none; border-radius: 10px; font-size: 16px; box-shadow: 2px 2px 8px rgba(0,0,0,0.3); cursor: pointer;'> | |
| # ๐จ ์บ๋ฆฌ์ปค์ณ ๋ง๋ค๊ธฐ | |
| # </button> | |
| # </a> | |
| # </div> | |
| # """) | |
| # # ์ ๋ก๋ โ ์ค๋ช ์๋ ์์ฑ ์ฐ๊ฒฐ | |
| # image_input.upload(fn=generate_caption, inputs=image_input, outputs=caption_output) | |
| # # 5. ์ฑ ์คํ | |
| # demo.launch(debug=True) | |