Hug0endob commited on
Commit
09c7c56
·
verified ·
1 Parent(s): 7766a5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -11
app.py CHANGED
@@ -7,22 +7,28 @@ from PIL import Image, ImageSequence
7
  from transformers import AutoProcessor, LlavaForConditionalGeneration
8
  import gradio as gr
9
 
10
- MODEL_NAME = "fancyfeast/llama-joycaption-beta-one-hf-llava" # public repo
 
 
 
 
11
 
12
- # Optional: read HF token from secrets if you set HF_TOKEN in Space (not required for public repo)
13
- HF_TOKEN = os.getenv("HF_TOKEN")
14
-
15
- # Load processor and model (CPU only)
16
  token_arg = {"token": HF_TOKEN} if HF_TOKEN else {}
17
  processor = AutoProcessor.from_pretrained(MODEL_NAME, **token_arg)
18
  llava_model = LlavaForConditionalGeneration.from_pretrained(
19
  MODEL_NAME,
20
  device_map="cpu",
21
  torch_dtype=torch.bfloat16,
22
- **token_arg
23
  )
24
  llava_model.eval()
25
 
 
 
 
26
  def download_bytes(url: str, timeout: int = 30) -> bytes:
27
  resp = requests.get(url, stream=True, timeout=timeout)
28
  resp.raise_for_status()
@@ -39,7 +45,6 @@ def mp4_to_gif(mp4_bytes: bytes) -> bytes:
39
  resp.raise_for_status()
40
  match = re.search(r'<img[^>]+src="([^"]+\.gif)"', resp.text)
41
  if not match:
42
- # try to extract via other img tags
43
  match = re.search(r'src="([^"]+?/tmp/[^"]+\.gif)"', resp.text)
44
  if not match:
45
  raise RuntimeError("Failed to extract GIF URL from ezgif response")
@@ -60,6 +65,9 @@ def load_first_frame_from_bytes(raw: bytes) -> Image.Image:
60
  img = img.convert("RGB")
61
  return img
62
 
 
 
 
63
  def generate_caption_from_url(url: str, prompt: str = "Describe the image.") -> str:
64
  if not url:
65
  return "No URL provided."
@@ -70,7 +78,8 @@ def generate_caption_from_url(url: str, prompt: str = "Describe the image.") ->
70
 
71
  lower = url.lower().split("?")[0]
72
  try:
73
- if lower.endswith(".mp4") or b"ftyp" in raw[:16].lower():
 
74
  try:
75
  raw = mp4_to_gif(raw)
76
  except Exception as e:
@@ -89,17 +98,25 @@ def generate_caption_from_url(url: str, prompt: str = "Describe the image.") ->
89
  except Exception as e:
90
  return f"Inference error: {e}"
91
 
92
- iface = gr.Interface(
 
 
 
 
93
  fn=generate_caption_from_url,
94
  inputs=[
95
  gr.Textbox(label="Image / GIF / MP4 URL", placeholder="https://example.com/photo.jpg"),
96
  gr.Textbox(label="Prompt (optional)", value="Describe the image."),
97
  ],
98
  outputs=gr.Textbox(label="Generated caption"),
99
- title="JoyCaption (public fancyfeast) - URL input",
100
  description="Paste a direct link to an image, GIF, or MP4. MP4 files are converted to GIF via ezgif.com; the first frame is captioned.",
101
- allow_flagging="never",
102
  )
103
 
 
 
 
 
 
104
  if __name__ == "__main__":
105
  iface.launch()
 
7
  from transformers import AutoProcessor, LlavaForConditionalGeneration
8
  import gradio as gr
9
 
10
+ # ---------------------------
11
+ # Config
12
+ # ---------------------------
13
+ MODEL_NAME = "fancyfeast/llama-joycaption-beta-one-hf-llava"
14
+ HF_TOKEN = os.getenv("HF_TOKEN") # optional secret in Space settings
15
 
16
+ # ---------------------------
17
+ # Load model & processor
18
+ # ---------------------------
 
19
  token_arg = {"token": HF_TOKEN} if HF_TOKEN else {}
20
  processor = AutoProcessor.from_pretrained(MODEL_NAME, **token_arg)
21
  llava_model = LlavaForConditionalGeneration.from_pretrained(
22
  MODEL_NAME,
23
  device_map="cpu",
24
  torch_dtype=torch.bfloat16,
25
+ **token_arg,
26
  )
27
  llava_model.eval()
28
 
29
+ # ---------------------------
30
+ # Helpers
31
+ # ---------------------------
32
  def download_bytes(url: str, timeout: int = 30) -> bytes:
33
  resp = requests.get(url, stream=True, timeout=timeout)
34
  resp.raise_for_status()
 
45
  resp.raise_for_status()
46
  match = re.search(r'<img[^>]+src="([^"]+\.gif)"', resp.text)
47
  if not match:
 
48
  match = re.search(r'src="([^"]+?/tmp/[^"]+\.gif)"', resp.text)
49
  if not match:
50
  raise RuntimeError("Failed to extract GIF URL from ezgif response")
 
65
  img = img.convert("RGB")
66
  return img
67
 
68
+ # ---------------------------
69
+ # Main inference
70
+ # ---------------------------
71
  def generate_caption_from_url(url: str, prompt: str = "Describe the image.") -> str:
72
  if not url:
73
  return "No URL provided."
 
78
 
79
  lower = url.lower().split("?")[0]
80
  try:
81
+ # crude MP4 detection by extension or ftyp box signature
82
+ if lower.endswith(".mp4") or raw[:16].lower().find(b"ftyp") != -1:
83
  try:
84
  raw = mp4_to_gif(raw)
85
  except Exception as e:
 
98
  except Exception as e:
99
  return f"Inference error: {e}"
100
 
101
+ # ---------------------------
102
+ # Gradio UI (compatible init)
103
+ # ---------------------------
104
+ # Use try/except to support Gradio versions that don't accept allow_flagging
105
+ gradio_kwargs = dict(
106
  fn=generate_caption_from_url,
107
  inputs=[
108
  gr.Textbox(label="Image / GIF / MP4 URL", placeholder="https://example.com/photo.jpg"),
109
  gr.Textbox(label="Prompt (optional)", value="Describe the image."),
110
  ],
111
  outputs=gr.Textbox(label="Generated caption"),
112
+ title="JoyCaption (fancyfeast) - URL input",
113
  description="Paste a direct link to an image, GIF, or MP4. MP4 files are converted to GIF via ezgif.com; the first frame is captioned.",
 
114
  )
115
 
116
+ try:
117
+ iface = gr.Interface(**gradio_kwargs, allow_flagging="never")
118
+ except TypeError:
119
+ iface = gr.Interface(**gradio_kwargs)
120
+
121
  if __name__ == "__main__":
122
  iface.launch()