Hug0endob commited on
Commit
6b83d17
·
verified ·
1 Parent(s): 39c30d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -66
app.py CHANGED
@@ -30,23 +30,46 @@ def get_client(key: str = None):
30
  def is_remote(src: str) -> bool:
31
  return bool(src) and src.startswith(("http://", "https://"))
32
 
33
- def fetch_bytes(src: str) -> bytes:
 
34
  if is_remote(src):
35
- r = requests.get(src, timeout=60)
36
- r.raise_for_status()
37
- return r.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  with open(src, "rb") as f:
39
  return f.read()
40
 
41
  def convert_to_jpeg_bytes(media_bytes: bytes, base_h=480) -> bytes:
42
  img = Image.open(BytesIO(media_bytes))
 
 
 
 
 
43
  if img.mode != "RGB":
44
  img = img.convert("RGB")
45
  h = base_h
46
  w = max(1, int(img.width * (h / img.height)))
47
  img = img.resize((w, h), Image.LANCZOS)
48
  buf = BytesIO()
49
- img.save(buf, format="JPEG", quality=90)
50
  return buf.getvalue()
51
 
52
  def b64_jpeg(img_bytes: bytes) -> str:
@@ -59,10 +82,21 @@ def save_bytes_to_temp(b: bytes, suffix: str):
59
  f.write(b)
60
  return path
61
 
 
 
 
 
 
 
 
62
  def choose_model_for_src(src: str):
63
- lower = (src or "").lower()
64
- video_exts = (".mp4", ".mov", ".webm", ".mkv")
65
- if is_remote(src) or any(lower.endswith(ext) for ext in video_exts):
 
 
 
 
66
  return DEFAULT_VIDEO_MODEL
67
  return DEFAULT_IMAGE_MODEL
68
 
@@ -88,17 +122,20 @@ def extract_delta(chunk):
88
  if not data:
89
  return None
90
  try:
91
- return data.choices[0].delta.content
92
- except Exception:
93
- pass
94
- try:
95
  c = data.choices[0].delta
96
  if isinstance(c, dict):
97
- return c.get("content")
 
98
  except Exception:
99
  pass
100
  try:
101
- return data.choices[0].message.content
 
 
 
 
 
102
  except Exception:
103
  pass
104
  try:
@@ -110,63 +147,60 @@ def extract_delta(chunk):
110
  def generate_final_text(src: str, custom_prompt: str, api_key: str):
111
  client = get_client(api_key)
112
  prompt = (custom_prompt.strip() if custom_prompt and custom_prompt.strip() else "Please provide a detailed visual review.")
113
- image_exts = (".jpg", ".jpeg", ".png", ".webp", ".gif")
114
  lower = (src or "").lower()
115
- is_image = lower.endswith(image_exts) or (not is_remote(src) and os.path.isfile(src) and lower.endswith(image_exts))
 
116
  parts = []
117
 
118
  def stream_and_collect(model, messages):
119
  try:
 
120
  stream_gen = None
121
  try:
122
  stream_gen = client.chat.stream(model=model, messages=messages)
123
  except Exception:
124
  stream_gen = None
125
-
126
  if stream_gen:
127
  for chunk in stream_gen:
128
  d = extract_delta(chunk)
129
  if d:
130
  parts.append(d)
131
- else:
132
- try:
133
- res = client.chat.complete(model=model, messages=messages, stream=False)
134
- except Exception:
135
- res = None
136
- if res is None:
137
- parts.append("[Model returned no response]")
138
- return
 
139
  try:
140
- choices = getattr(res, "choices", None) or res.get("choices", [])
141
- except Exception:
142
- choices = []
143
- if choices:
144
- try:
145
- msg = choices[0].message
146
- if isinstance(msg, dict):
147
- content = msg.get("content")
148
  else:
149
- content = getattr(msg, "content", None)
150
- if content:
151
- if isinstance(content, str):
152
- parts.append(content)
153
- else:
154
- if isinstance(content, list):
155
- for c in content:
156
- if isinstance(c, dict) and c.get("type") == "text":
157
- parts.append(c.get("text", ""))
158
- elif isinstance(content, dict):
159
- text = content.get("text") or content.get("content")
160
- if text:
161
- parts.append(text)
162
- except Exception:
163
- parts.append(str(res))
164
- else:
165
  parts.append(str(res))
 
 
166
  except Exception as e:
167
- parts.append(f"\n[Model error: {e}]")
168
 
169
- # Image handling
170
  if is_image:
171
  try:
172
  raw = fetch_bytes(src)
@@ -175,14 +209,14 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
175
  except Exception as e:
176
  return f"Error processing image: {e}"
177
  msgs = build_messages_for_image(prompt, b64)
178
- stream_and_collect(DEFAULT_IMAGE_MODEL, msgs)
179
- return "".join(parts)
180
 
181
- # Remote video
182
  if is_remote(src):
183
  msgs = build_messages_for_text(prompt, f"Video URL: {src}")
184
  stream_and_collect(choose_model_for_src(src), msgs)
185
- return "".join(parts)
186
 
187
  # Local video fallback: extract a frame with ffmpeg
188
  tmp_media = None
@@ -193,26 +227,39 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
193
  tmp_media = save_bytes_to_temp(media_bytes, suffix=ext)
194
  ffmpeg = shutil.which("ffmpeg")
195
  if ffmpeg:
 
196
  try:
197
  tmp_frame_fd, tmp_frame = tempfile.mkstemp(suffix=".jpg")
198
  os.close(tmp_frame_fd)
199
  cmd = [ffmpeg, "-nostdin", "-y", "-i", tmp_media, "-frames:v", "1", "-q:v", "2", tmp_frame]
200
  proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
201
- proc.communicate(timeout=20)
202
- if os.path.exists(tmp_frame):
203
  with open(tmp_frame, "rb") as f:
204
  frame_bytes = f.read()
205
  try:
206
  jpg = convert_to_jpeg_bytes(frame_bytes, base_h=480)
207
  b64 = b64_jpeg(jpg)
208
  msgs = build_messages_for_image(prompt, b64)
209
- stream_and_collect(DEFAULT_IMAGE_MODEL, msgs)
210
- return "".join(parts)
211
  finally:
212
  try:
213
  os.remove(tmp_frame)
214
  except Exception:
215
  pass
 
 
 
 
 
 
 
 
 
 
 
 
216
  except Exception:
217
  pass
218
  return "Unable to process the provided file. Provide a direct image/frame URL or a remote video URL."
@@ -223,7 +270,7 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
223
  except Exception:
224
  pass
225
 
226
- # UI
227
  css = """
228
  .preview_column { min-width: 380px; }
229
  .preview_media img, .preview_media video { max-width: 100%; height: auto; }
@@ -231,17 +278,37 @@ css = """
231
 
232
  def load_preview(url: str):
233
  if not url:
234
- return None, None, '<div id="preview_box" style="color:gray">No URL</div>'
235
  try:
236
  r = requests.get(url, timeout=30, stream=True)
237
  r.raise_for_status()
238
  ctype = r.headers.get("content-type", "")
239
- if (ctype and ctype.startswith("video/")) or any(url.lower().endswith(ext) for ext in (".mp4", ".mov", ".webm", ".mkv")):
240
- return None, url, '<div id="preview_box" style="color:gray">Video</div>'
241
  data = r.content
242
  img = Image.open(BytesIO(data)).convert("RGB")
243
- return img, None, '<div id="preview_box" style="color:gray">Image</div>'
244
  except Exception:
245
- return None, None, '<div id="preview_box" style="color:red">Preview failed</div>'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
- with gr.Blocks(title
 
 
30
  def is_remote(src: str) -> bool:
31
  return bool(src) and src.startswith(("http://", "https://"))
32
 
33
+ def fetch_bytes(src: str, stream_threshold=20 * 1024 * 1024) -> bytes:
34
+ """Fetch bytes. For remote large files stream to a temp file then read."""
35
  if is_remote(src):
36
+ with requests.get(src, timeout=60, stream=True) as r:
37
+ r.raise_for_status()
38
+ content_length = r.headers.get("content-length")
39
+ if content_length and int(content_length) > stream_threshold:
40
+ # stream to temp file
41
+ fd, path = tempfile.mkstemp()
42
+ os.close(fd)
43
+ with open(path, "wb") as f:
44
+ for chunk in r.iter_content(8192):
45
+ if chunk:
46
+ f.write(chunk)
47
+ with open(path, "rb") as f:
48
+ data = f.read()
49
+ try:
50
+ os.remove(path)
51
+ except Exception:
52
+ pass
53
+ return data
54
+ else:
55
+ return r.content
56
  with open(src, "rb") as f:
57
  return f.read()
58
 
59
  def convert_to_jpeg_bytes(media_bytes: bytes, base_h=480) -> bytes:
60
  img = Image.open(BytesIO(media_bytes))
61
+ try:
62
+ # take first frame for animated formats
63
+ img.seek(0)
64
+ except Exception:
65
+ pass
66
  if img.mode != "RGB":
67
  img = img.convert("RGB")
68
  h = base_h
69
  w = max(1, int(img.width * (h / img.height)))
70
  img = img.resize((w, h), Image.LANCZOS)
71
  buf = BytesIO()
72
+ img.save(buf, format="JPEG", quality=85)
73
  return buf.getvalue()
74
 
75
  def b64_jpeg(img_bytes: bytes) -> str:
 
82
  f.write(b)
83
  return path
84
 
85
+ IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
86
+ VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv")
87
+
88
+ def ext_from_src(src: str) -> str:
89
+ _, ext = os.path.splitext((src or "").split("?")[0])
90
+ return ext.lower()
91
+
92
  def choose_model_for_src(src: str):
93
+ ext = ext_from_src(src)
94
+ if ext in VIDEO_EXTS:
95
+ return DEFAULT_VIDEO_MODEL
96
+ if ext in IMAGE_EXTS:
97
+ return DEFAULT_IMAGE_MODEL
98
+ # fallback: remote URLs likely videos, local files inspect existence
99
+ if is_remote(src):
100
  return DEFAULT_VIDEO_MODEL
101
  return DEFAULT_IMAGE_MODEL
102
 
 
122
  if not data:
123
  return None
124
  try:
125
+ # try common shapes and coerce to string
 
 
 
126
  c = data.choices[0].delta
127
  if isinstance(c, dict):
128
+ txt = c.get("content") or c.get("text")
129
+ return str(txt) if txt is not None else None
130
  except Exception:
131
  pass
132
  try:
133
+ msg = data.choices[0].message
134
+ if isinstance(msg, dict):
135
+ txt = msg.get("content")
136
+ else:
137
+ txt = getattr(msg, "content", None)
138
+ return str(txt) if txt is not None else None
139
  except Exception:
140
  pass
141
  try:
 
147
  def generate_final_text(src: str, custom_prompt: str, api_key: str):
148
  client = get_client(api_key)
149
  prompt = (custom_prompt.strip() if custom_prompt and custom_prompt.strip() else "Please provide a detailed visual review.")
 
150
  lower = (src or "").lower()
151
+ ext = ext_from_src(src)
152
+ is_image = ext in IMAGE_EXTS or (not is_remote(src) and os.path.isfile(src) and ext in IMAGE_EXTS)
153
  parts = []
154
 
155
  def stream_and_collect(model, messages):
156
  try:
157
+ # prefer streaming if available; fall back to non-streaming
158
  stream_gen = None
159
  try:
160
  stream_gen = client.chat.stream(model=model, messages=messages)
161
  except Exception:
162
  stream_gen = None
 
163
  if stream_gen:
164
  for chunk in stream_gen:
165
  d = extract_delta(chunk)
166
  if d:
167
  parts.append(d)
168
+ return
169
+ # fallback complete
170
+ res = client.chat.complete(model=model, messages=messages, stream=False)
171
+ # try to extract text
172
+ try:
173
+ choices = getattr(res, "choices", None) or res.get("choices", [])
174
+ except Exception:
175
+ choices = []
176
+ if choices:
177
  try:
178
+ msg = choices[0].message
179
+ if isinstance(msg, dict):
180
+ content = msg.get("content")
181
+ else:
182
+ content = getattr(msg, "content", None)
183
+ if content:
184
+ if isinstance(content, str):
185
+ parts.append(content)
186
  else:
187
+ # handle list/dict shaped content
188
+ if isinstance(content, list):
189
+ for c in content:
190
+ if isinstance(c, dict) and c.get("type") == "text":
191
+ parts.append(c.get("text", ""))
192
+ elif isinstance(content, dict):
193
+ text = content.get("text") or content.get("content")
194
+ if text:
195
+ parts.append(text)
196
+ except Exception:
 
 
 
 
 
 
197
  parts.append(str(res))
198
+ else:
199
+ parts.append(str(res))
200
  except Exception as e:
201
+ parts.append(f"[Model error: {e}]")
202
 
203
+ # Image path
204
  if is_image:
205
  try:
206
  raw = fetch_bytes(src)
 
209
  except Exception as e:
210
  return f"Error processing image: {e}"
211
  msgs = build_messages_for_image(prompt, b64)
212
+ stream_and_collect(choose_model_for_src(src), msgs)
213
+ return "".join(parts).strip()
214
 
215
+ # Remote video: send URL as plain text (avoid embedding non-text chunk types)
216
  if is_remote(src):
217
  msgs = build_messages_for_text(prompt, f"Video URL: {src}")
218
  stream_and_collect(choose_model_for_src(src), msgs)
219
+ return "".join(parts).strip()
220
 
221
  # Local video fallback: extract a frame with ffmpeg
222
  tmp_media = None
 
227
  tmp_media = save_bytes_to_temp(media_bytes, suffix=ext)
228
  ffmpeg = shutil.which("ffmpeg")
229
  if ffmpeg:
230
+ tmp_frame = None
231
  try:
232
  tmp_frame_fd, tmp_frame = tempfile.mkstemp(suffix=".jpg")
233
  os.close(tmp_frame_fd)
234
  cmd = [ffmpeg, "-nostdin", "-y", "-i", tmp_media, "-frames:v", "1", "-q:v", "2", tmp_frame]
235
  proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
236
+ out, err = proc.communicate(timeout=30)
237
+ if proc.returncode == 0 and os.path.exists(tmp_frame) and os.path.getsize(tmp_frame) > 0:
238
  with open(tmp_frame, "rb") as f:
239
  frame_bytes = f.read()
240
  try:
241
  jpg = convert_to_jpeg_bytes(frame_bytes, base_h=480)
242
  b64 = b64_jpeg(jpg)
243
  msgs = build_messages_for_image(prompt, b64)
244
+ stream_and_collect(choose_model_for_src(src), msgs)
245
+ return "".join(parts).strip()
246
  finally:
247
  try:
248
  os.remove(tmp_frame)
249
  except Exception:
250
  pass
251
+ else:
252
+ # ffmpeg failed or produced no frame
253
+ try:
254
+ if tmp_frame and os.path.exists(tmp_frame):
255
+ os.remove(tmp_frame)
256
+ except Exception:
257
+ pass
258
+ except subprocess.TimeoutExpired:
259
+ try:
260
+ proc.kill()
261
+ except Exception:
262
+ pass
263
  except Exception:
264
  pass
265
  return "Unable to process the provided file. Provide a direct image/frame URL or a remote video URL."
 
270
  except Exception:
271
  pass
272
 
273
+ # Simple UI: left preview + inputs, right final text only (plain text)
274
  css = """
275
  .preview_column { min-width: 380px; }
276
  .preview_media img, .preview_media video { max-width: 100%; height: auto; }
 
278
 
279
  def load_preview(url: str):
280
  if not url:
281
+ return None, None, "No URL"
282
  try:
283
  r = requests.get(url, timeout=30, stream=True)
284
  r.raise_for_status()
285
  ctype = r.headers.get("content-type", "")
286
+ if (ctype and ctype.startswith("video/")) or any(url.lower().split("?")[0].endswith(ext) for ext in VIDEO_EXTS):
287
+ return None, url, "Video"
288
  data = r.content
289
  img = Image.open(BytesIO(data)).convert("RGB")
290
+ return img, None, "Image"
291
  except Exception:
292
+ return None, None, "Preview failed"
293
+
294
+ with gr.Blocks(title="Flux", css=css) as demo:
295
+ with gr.Row():
296
+ with gr.Column(scale=1, elem_classes="preview_column"):
297
+ preview_status = gr.Textbox(value="No URL", interactive=False, label="Preview Status")
298
+ preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media")
299
+ preview_video = gr.Video(label="Preview Video", elem_classes="preview_media")
300
+
301
+ url_input = gr.Textbox(label="Image or Video URL", placeholder="https://...", lines=1)
302
+ custom_prompt = gr.Textbox(label="Prompt (optional)", lines=2, value="")
303
+ with gr.Accordion("Mistral API Key (optional)", open=False):
304
+ api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
305
+ submit = gr.Button("Submit")
306
+
307
+ with gr.Column(scale=2):
308
+ final_text = gr.Textbox(label="Final Text", lines=20)
309
+
310
+ url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video, preview_status])
311
+ submit.click(fn=generate_final_text, inputs=[url_input, custom_prompt, api_key], outputs=[final_text])
312
 
313
+ if __name__ == "__main__":
314
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))