Spaces:
Sleeping
Sleeping
| # 0. ํ์ ๋ผ์ด๋ธ๋ฌ๋ฆฌ ์ค์น (Colab์์ ์คํ ์ ์ฃผ์ ํด์ ํ ๋จผ์ ์คํ) | |
| # !pip install gradio transformers torch pillow | |
| import gradio as gr | |
| import torch | |
| import spaces | |
| from PIL import Image | |
| from transformers import CLIPProcessor, CLIPModel | |
| # 1. ๋ชจ๋ธ ๋ฐ ํ๋ก์ธ์ ๋ก๋ | |
| # ๋ชจ๋ธ ๋ค์ด๋ก๋๋ฅผ ์ํด ์ต์ด ์คํ ์ ์๊ฐ์ด ์กฐ๊ธ ๊ฑธ๋ฆด ์ ์์ต๋๋ค. | |
| model_name = "openai/clip-vit-base-patch32" | |
| try: | |
| model = CLIPModel.from_pretrained(model_name) | |
| processor = CLIPProcessor.from_pretrained(model_name) | |
| print(f"๋ชจ๋ธ ๋ก๋ ์๋ฃ: {model_name}") | |
| except Exception as e: | |
| print(f"๋ชจ๋ธ ๋ก๋ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
| # GPU ์ฌ์ฉ ๊ฐ๋ฅ ์ CUDA ์ค์ , ์๋๋ฉด CPU | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| # 2. ์ถ๋ก ํจ์ ์ ์ | |
| def predict(image, text_options): | |
| if image is None: | |
| return None | |
| # ํ ์คํธ ์ ์ฒ๋ฆฌ (์ผํ๋ก ๊ตฌ๋ถ๋ ๋ฌธ์์ด์ ๋ฆฌ์คํธ๋ก ๋ณํํ๊ณ ๊ณต๋ฐฑ ์ ๊ฑฐ) | |
| candidates = [t.strip() for t in text_options.split(",") if t.strip()] | |
| if not candidates: | |
| return {"Error": "ํ ์คํธ ํ๋ณด๋ฅผ ์ ๋ ฅํด์ฃผ์ธ์."} | |
| # ์ ๋ ฅ ๋ฐ์ดํฐ ์ฒ๋ฆฌ (์ด๋ฏธ์ง + ํ ์คํธ) | |
| inputs = processor( | |
| text=candidates, | |
| images=image, | |
| return_tensors="pt", | |
| padding=True | |
| ).to(device) | |
| # ์ถ๋ก (Gradient ๊ณ์ฐ ๋ถํ์) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| # ๊ฒฐ๊ณผ ๊ณ์ฐ (Logits -> Softmax ํ๋ฅ ๋ณํ) | |
| logits_per_image = outputs.logits_per_image | |
| probs = logits_per_image.softmax(dim=1).cpu().numpy()[0] | |
| # ๊ฒฐ๊ณผ ๋์ ๋๋ฆฌ ์์ฑ (Label: Score ํํ) | |
| # Gradio์ Label ์ปดํฌ๋ํธ๋ {๋ผ๋ฒจ: ํ๋ฅ } ํํ์ ๋์ ๋๋ฆฌ๋ฅผ ๋ฐ์ต๋๋ค. | |
| return {candidates[i]: float(probs[i]) for i in range(len(candidates))} | |
| # 3. Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ | |
| iface = gr.Interface( | |
| fn=predict, | |
| inputs=[ | |
| gr.Image(type="pil", label="์ด๋ฏธ์ง ์ ๋ก๋"), | |
| gr.Textbox( | |
| label="ํ๋ณด ํ ์คํธ (์ผํ๋ก ๊ตฌ๋ถ)", | |
| placeholder="์: soccer player, baseball player, referee", # ์์ ์์ | |
| value="cat, dog, car" # ๊ธฐ๋ณธ๊ฐ ์ค์ | |
| ) | |
| ], | |
| outputs=gr.Label(num_top_classes=3, label="๋งค์นญ ๊ฒฐ๊ณผ"), | |
| title="CLIP ์ด๋ฏธ์ง-ํ ์คํธ ๋งค์นญ๊ธฐ", | |
| description="์ด๋ฏธ์ง๋ฅผ ์ ๋ก๋ํ๊ณ , ๊ทธ ์ด๋ฏธ์ง๊ฐ ๋ฌด์์ธ์ง ์ค๋ช ํ๋ ๋จ์ด๋ค์ ์ผํ(,)๋ก ๊ตฌ๋ถํด ์ ์ด์ฃผ์ธ์. AI๊ฐ ๊ฐ์ฅ ์ ์ ํ ์ค๋ช ์ ์ฐพ์์ค๋๋ค." | |
| ) | |
| # ๋ฉ์ธ ์คํ ๋ธ๋ก ์์ (์คํ ์์ : _= -> ==) | |
| if __name__ == "__main__": | |
| # Colab์์ ์คํ ์ share=True๋ฅผ ํ๋ฉด ์ธ๋ถ์์ ์ ์ ๊ฐ๋ฅํ ๋งํฌ๊ฐ ์์ฑ๋ฉ๋๋ค. | |
| iface.launch(share=True) |