Spaces:

IotaCluster
/

ScreenShot

Paused

App Files Files Community

IotaCluster commited on Aug 4, 2025

Commit

bf53a82

verified ·

1 Parent(s): 1c8c582

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -75

app.py CHANGED Viewed

@@ -1,80 +1,37 @@
-import re
 import gradio as gr
-import requests
-from docx import Document
-from PIL import Image
-from io import BytesIO
-SITESHOT_API_KEY = "your_siteshot_api_key_here"  # <-- Replace with your actual API key
-def extract_first_table_as_dict(doc_path):
-    doc = Document(doc_path)
-    if not doc.tables:
-        return []
-    table = doc.tables[0]
-    keys = [cell.text.strip() for cell in table.rows[0].cells]
-    data = []
-    for row in table.rows[1:]:
-        values = [cell.text.strip() for cell in row.cells]
-        row_dict = dict(zip(keys, values))
-        data.append(row_dict)
-    return data
-def extract_urls_from_dict_list(dict_list):
-    url_pattern = r'https?://[^\s)>\]]+'
-    urls = set()
-    for entry in dict_list:
-        for value in entry.values():
-            found = re.findall(url_pattern, value)
-            urls.update(found)
-    return list(urls)
-def capture_screenshot(doc_file):
-    # Step 1: Extract table
-    table_data = extract_first_table_as_dict(doc_file.name)
-    # Step 2: Extract URLs
-    urls = extract_urls_from_dict_list(table_data)
-    # Step 3: Take screenshots
-    screenshots = []
-    for url in urls:
-        try:
-            response = requests.get(
-                "https://api.screenshotapi.net/screenshot",
-                params={
-                    "token": SITESHOT_API_KEY,
-                    "url": url,
-                    "output": "image",
-                    "file_type": "png",
-                    "full_page": "true"
-                },
-                stream=True
-            )
-            if response.status_code == 200:
-                image = Image.open(BytesIO(response.content))
-                screenshots.append(image)
-            else:
-                print(f"Failed screenshot for {url}: {response.status_code}")
-        except Exception as e:
-            print(f"Error processing {url}: {e}")
-    return table_data, screenshots
-# Gradio Interface
-app = gr.Interface(
     fn=capture_screenshot,
-    inputs=gr.File(label="Upload Word (.docx) File", file_types=[".docx"]),
-    outputs=[
-        gr.JSON(label="Extracted Table Data"),
-        gr.Gallery(label="Webpage Screenshots").style(grid=[2], height="auto")
-    ],
-    title="📄 DOCX Table Extractor + 🌐 URL Screenshotter",
-    description="Upload a Word file. This app extracts the first table, finds any links, and screenshots them using SiteShot."
 )
 if __name__ == "__main__":
-    app.launch(share=True)

 import gradio as gr
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from webdriver_manager.chrome import ChromeDriverManager
+import time
+def capture_screenshot(url):
+    options = Options()
+    options.add_argument("--headless")
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    options.add_argument("--window-size=1920,1080")
+    driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
+    screenshot_path = "screenshot.png"
+    try:
+        driver.get(url)
+        time.sleep(2)
+        driver.save_screenshot(screenshot_path)
+    except Exception as e:
+        return f"Error: {str(e)}"
+    finally:
+        driver.quit()
+    return screenshot_path
+demo = gr.Interface(
     fn=capture_screenshot,
+    inputs=gr.Textbox(label="Enter URL"),
+    outputs=gr.Image(label="Screenshot"),
+    title="Web Page Screenshot Tool",
+    description="Enter a URL and get a screenshot using headless Chrome.",
 )
 if __name__ == "__main__":
+    demo.launch()