Emilianohack6950 commited on
Commit
aaf0bd8
·
verified ·
1 Parent(s): 490aa25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -6
app.py CHANGED
@@ -13,6 +13,7 @@ import os
13
  import json
14
  import gradio as gr
15
  from playwright.async_api import async_playwright
 
16
 
17
  USER_AGENT = (
18
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
@@ -20,6 +21,14 @@ USER_AGENT = (
20
  "Chrome/91.0.4472.124 Safari/537.36"
21
  )
22
 
 
 
 
 
 
 
 
 
23
  async def scrape_images(url, max_imgs):
24
  max_imgs = max(10, min(max_imgs, 300))
25
  async with async_playwright() as p:
@@ -29,7 +38,6 @@ async def scrape_images(url, max_imgs):
29
  viewport={"width": 1366, "height": 768},
30
  )
31
 
32
- # Cargar cookies desde variable de entorno si está definida
33
  cookies_env = os.getenv("COOKIES_JSON")
34
  if cookies_env:
35
  try:
@@ -79,24 +87,28 @@ async def scrape_images(url, max_imgs):
79
  await browser.close()
80
  return collected_data[:max_imgs]
81
 
82
- def run_scraper(url, max_imgs):
 
 
83
  return asyncio.run(scrape_images(url, int(max_imgs)))
84
 
85
- def interface_fn(url, max_imgs):
86
- results = run_scraper(url, max_imgs)
87
  images = [(item["img_url"], f"Usuario: {item['user']}") for item in results]
88
  return images
89
 
90
  demo = gr.Interface(
91
  fn=interface_fn,
92
  inputs=[
93
- gr.Textbox(label="URL de la galería DeviantArt", lines=1, value="https://www.deviantart.com/silkedead/gallery/68498591/screenshots-film-and-movie"),
 
94
  gr.Slider(minimum=10, maximum=300, step=1, value=30, label="Máximo de imágenes")
95
  ],
96
  outputs=gr.Gallery(label="Imágenes recolectadas"),
97
  title="Scraper de Imágenes - DeviantArt",
98
- description="Introduce la URL de la galería DeviantArt y la cantidad máxima de imágenes que quieres recolectar."
99
  )
100
 
101
  if __name__ == "__main__":
102
  demo.launch()
 
 
13
  import json
14
  import gradio as gr
15
  from playwright.async_api import async_playwright
16
+ from urllib.parse import quote_plus
17
 
18
  USER_AGENT = (
19
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
 
21
  "Chrome/91.0.4472.124 Safari/537.36"
22
  )
23
 
24
+ def build_url(input_str):
25
+ input_str = input_str.strip()
26
+ if input_str.startswith("http://") or input_str.startswith("https://"):
27
+ return input_str
28
+ else:
29
+ categoria = quote_plus(input_str)
30
+ return f"https://www.deviantart.com/search?q={categoria}"
31
+
32
  async def scrape_images(url, max_imgs):
33
  max_imgs = max(10, min(max_imgs, 300))
34
  async with async_playwright() as p:
 
38
  viewport={"width": 1366, "height": 768},
39
  )
40
 
 
41
  cookies_env = os.getenv("COOKIES_JSON")
42
  if cookies_env:
43
  try:
 
87
  await browser.close()
88
  return collected_data[:max_imgs]
89
 
90
+ def run_scraper(user_input, max_imgs):
91
+ url = build_url(user_input)
92
+ print(f"Usando URL: {url}")
93
  return asyncio.run(scrape_images(url, int(max_imgs)))
94
 
95
+ def interface_fn(user_input, max_imgs):
96
+ results = run_scraper(user_input, max_imgs)
97
  images = [(item["img_url"], f"Usuario: {item['user']}") for item in results]
98
  return images
99
 
100
  demo = gr.Interface(
101
  fn=interface_fn,
102
  inputs=[
103
+ gr.Textbox(label="URL o Categoría DeviantArt", lines=1,
104
+ placeholder="Pega una URL o escribe una categoría o usuario"),
105
  gr.Slider(minimum=10, maximum=300, step=1, value=30, label="Máximo de imágenes")
106
  ],
107
  outputs=gr.Gallery(label="Imágenes recolectadas"),
108
  title="Scraper de Imágenes - DeviantArt",
109
+ description="Introduce una URL completa o solo una categoría/usuario para buscar imágenes."
110
  )
111
 
112
  if __name__ == "__main__":
113
  demo.launch()
114
+