Spaces:

clementBE
/

instagram_test

Sleeping

App Files Files Community

clementBE commited on Sep 30, 2025

Commit

c5aba08

verified ·

1 Parent(s): ae8813c

Create app.py

Browse files

Files changed (1) hide show

app.py +115 -0

app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import time
+import requests
+from selenium import webdriver
+from bs4 import BeautifulSoup
+from selenium.webdriver.chrome.options import Options
+from fastapi import FastAPI, Response, FileResponse, HTTPException
+from fastapi.staticfiles import StaticFiles
+import os
+# Initialize the FastAPI app
+app = FastAPI()
+def getimage(url: str) -> str:
+    """
+    Scrapes the profile image from a given URL using Selenium and BeautifulSoup,
+    and saves it to the local filesystem.
+    """
+    # 1. Setup Selenium Options
+    chrome_options = Options()
+    # REQUIRED for deployment on servers like Hugging Face Spaces or Docker
+    chrome_options.add_argument('--headless')
+    chrome_options.add_argument('--no-sandbox')
+    chrome_options.add_argument('--disable-dev-shm-usage')
+    chrome_options.add_argument("--window-size=1200x800")
+    driver = None
+    try:
+        # 2. Initialize the WebDriver
+        driver = webdriver.Chrome(options=chrome_options)
+        # 3. Navigate and Wait
+        driver.get(url)
+        # Wait long enough for the dynamic content (profile picture) to load
+        time.sleep(5)
+        page_source = driver.page_source
+        # 4. Parse the Source
+        soup = BeautifulSoup(page_source, 'html.parser')
+        # 5. Targeted Thumbnail/Profile Picture Selection Logic
+        # Strategy: Search for an image with 'alt' text related to the profile
+        def is_profile_image(tag):
+            alt_text = tag.get('alt', '').lower()
+            # Common alt texts used for the main profile picture
+            return tag.name == 'img' and ('profile picture' in alt_text or 'avatar' in alt_text)
+        img_tag = soup.find(is_profile_image)
+        # Fallback Strategy: If the profile-specific search fails, take the largest available image
+        if not img_tag:
+            print("Fallback to finding the first image with a 'src' attribute.")
+            img_tag = soup.find('img', src=True)
+        if not img_tag:
+            raise ValueError("Could not find a suitable image tag on the page.")
+        img_url = img_tag['src']
+        # 6. Download the Image
+        r = requests.get(img_url, stream=True)
+        r.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
+        filename = "instagram_profile.png"
+        with open(filename, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                f.write(chunk)
+        return filename
+    except Exception as e:
+        # Clean up the browser instance in case of an error
+        raise RuntimeError(f"Scraping failed for URL {url}: {e}") from e
+    finally:
+        if driver:
+            driver.quit()
+# --- FastAPI Endpoints ---
+# Endpoint to trigger the image scraping
+@app.get("/fetch_profile_image")
+def fetch_image_endpoint(input_url: str):
+    """
+    Accepts a URL, scrapes the profile image, and returns the result.
+    """
+    if not input_url.startswith("http"):
+         raise HTTPException(status_code=400, detail="Input must be a valid URL starting with http:// or https://")
+    try:
+        saved_filename = getimage(input_url)
+        # We can also return the image itself, but for simplicity,
+        # we'll confirm the file was saved.
+        return {
+            "status": "success",
+            "message": f"Profile picture successfully caught and saved as {saved_filename}",
+            "filename": saved_filename
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# This part serves the static files (like a frontend HTML page)
+# Note: You would need a 'static' folder with an 'index.html' file to see a UI.
+app.mount("/", StaticFiles(directory="static", html=True), name="static")
+# The root endpoint serves the main HTML page
+@app.get("/")
+def index() -> FileResponse:
+    # Ensure the path exists, otherwise the app will fail to start
+    if os.path.exists("static/index.html"):
+        return FileResponse(path="static/index.html", media_type="text/html")
+    else:
+        # If running without a UI, just return a simple message
+        return {"message": "Image Scraper API Running. Access /fetch_profile_image?input_url=<URL> to test."}