Tojichok commited on
Commit
23af1bd
·
verified ·
1 Parent(s): 1096d78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -34
app.py CHANGED
@@ -1,47 +1,40 @@
1
- import os, io, requests
 
2
  import gradio as gr
3
  from PIL import Image
4
  import torch
5
- from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
6
 
7
- # 1) DEVICE
8
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
9
 
10
- # 2) Captioning модель
11
- model = VisionEncoderDecoderModel.from_pretrained(
12
- "nlpconnect/vit-gpt2-image-captioning"
13
- ).to(device)
14
- processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
15
- tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
16
 
17
- # 3) TMDb
18
  TMDB_KEY = os.environ["TMDB_API_KEY"]
19
  TMDB_SEARCH_URL = "https://api.themoviedb.org/3/search/movie"
20
 
21
- def generate_caption(image: Image.Image):
22
- """Генерим краткий caption"""
23
- pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
24
- output_ids = model.generate(pixel_values, max_new_tokens=50)
25
- caption = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
26
- return caption
27
 
28
- def caption_to_movies(image: Image.Image, dummy):
29
- """
30
- 1) Caption
31
- 2) Поиск TMDb по этому caption
32
- 3) Топ‑3 фильмов с title+url
33
- """
34
- caption = generate_caption(image)
35
-
36
- # Точный поиск
37
  params = {"api_key": TMDB_KEY, "query": caption}
38
- resp = requests.get(TMDB_SEARCH_URL, params=params)
39
  if resp.status_code != 200:
40
- return {"caption": caption, "results": [{"error": f"TMDb {resp.status_code}"}]}
 
41
 
42
- results_raw = resp.json().get("results", [])[:3]
43
  results = []
44
- for m in results_raw:
45
  results.append({
46
  "title": m.get("title", "Unknown"),
47
  "url": f"https://www.themoviedb.org/movie/{m['id']}"
@@ -49,19 +42,19 @@ def caption_to_movies(image: Image.Image, dummy):
49
 
50
  return {"caption": caption, "results": results}
51
 
52
- # 4) Интерфейс
53
  iface = gr.Interface(
54
- fn=caption_to_movies,
55
  inputs=[
56
  gr.Image(type="pil", label="Постер или кадр фильма"),
57
- gr.Textbox(visible=False) # второй аргумент по сигнатуре
58
  ],
59
  outputs=[
60
  gr.Textbox(label="Auto‑caption"),
61
  gr.JSON(label="Top‑3 Movies (title + TMDb URL)")
62
  ],
63
- title="Movie Finder via Caption + TMDb",
64
- description="Генерирует подпись кадра и ищет фильмы на TMDb по этому описанию"
65
  )
66
 
67
  if __name__ == "__main__":
 
1
+ import os
2
+ import requests
3
  import gradio as gr
4
  from PIL import Image
5
  import torch
6
+ from transformers import BlipProcessor, BlipForConditionalGeneration
7
 
8
+ # 1) Устройство: CPU (или GPU, если вдруг)
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ print("Using device:", device)
11
 
12
+ # 2) Лёгкая BLIP‑модель (~240 MiB)
13
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
14
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")\
15
+ .to(device)
 
 
16
 
17
+ # 3) TMDb API
18
  TMDB_KEY = os.environ["TMDB_API_KEY"]
19
  TMDB_SEARCH_URL = "https://api.themoviedb.org/3/search/movie"
20
 
21
+ def caption_and_search(image: Image.Image, _):
22
+ # 4) Генерируем подпись (≈3–5 сек на CPU)
23
+ inputs = processor(images=image, return_tensors="pt").to(device)
24
+ with torch.no_grad():
25
+ out = model.generate(**inputs, max_new_tokens=30)
26
+ caption = processor.decode(out[0], skip_special_tokens=True).strip()
27
 
28
+ # 5) Делаем поиск в TMDb
 
 
 
 
 
 
 
 
29
  params = {"api_key": TMDB_KEY, "query": caption}
30
+ resp = requests.get(TMDB_SEARCH_URL, params=params, timeout=10)
31
  if resp.status_code != 200:
32
+ return {"caption": caption,
33
+ "results": [{"error": f"TMDb API returned {resp.status_code}"}]}
34
 
35
+ movies = resp.json().get("results", [])[:3]
36
  results = []
37
+ for m in movies:
38
  results.append({
39
  "title": m.get("title", "Unknown"),
40
  "url": f"https://www.themoviedb.org/movie/{m['id']}"
 
42
 
43
  return {"caption": caption, "results": results}
44
 
45
+ # 6) Интерфейс Gradio
46
  iface = gr.Interface(
47
+ fn=caption_and_search,
48
  inputs=[
49
  gr.Image(type="pil", label="Постер или кадр фильма"),
50
+ gr.Textbox(visible=False) # второй аргумент для сигнатуры
51
  ],
52
  outputs=[
53
  gr.Textbox(label="Auto‑caption"),
54
  gr.JSON(label="Top‑3 Movies (title + TMDb URL)")
55
  ],
56
+ title="Fast Movie Finder (BLIP‑Base + TMDb)",
57
+ description="≈240 MiB на CPU даёт caption за 3–5 сек и сразу ищет топ‑3 фильма в TMDb"
58
  )
59
 
60
  if __name__ == "__main__":