clementBE commited on
Commit
dea18bb
·
verified ·
1 Parent(s): 38e720f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -128
app.py CHANGED
@@ -1,135 +1,108 @@
1
- import time
2
- import requests
3
- from selenium import webdriver
4
- from bs4 import BeautifulSoup
5
- from selenium.webdriver.chrome.options import Options
6
- from selenium.webdriver.chrome.service import Service # New: Import Service for modern Selenium setup
7
- from fastapi import FastAPI, Response, HTTPException
8
- from fastapi.responses import FileResponse
9
- from fastapi.staticfiles import StaticFiles
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  import os
11
- import uvicorn # Required for explicit startup
12
-
13
- # Initialize the FastAPI app
14
- app = FastAPI()
15
-
16
- def getimage(url: str) -> str:
17
- """
18
- Scrapes the profile image from a given URL using Selenium and BeautifulSoup,
19
- and saves it to the local filesystem.
20
- """
21
-
22
- # 1. Setup Selenium Options
23
- chrome_options = Options()
24
- # REQUIRED for deployment on servers like Hugging Face Spaces or Docker
25
- chrome_options.add_argument('--headless')
26
- chrome_options.add_argument('--no-sandbox')
27
- chrome_options.add_argument('--disable-dev-shm-usage')
28
- chrome_options.add_argument("--window-size=1200x800")
29
-
30
- driver = None
31
- try:
32
- # 2. Initialize the WebDriver
33
- # Note: If 'chromedriver' is not in the system PATH,
34
- # this will fail. For custom environments, you may need to
35
- # specify the driver path using Service(executable_path='...')
36
- driver = webdriver.Chrome(options=chrome_options)
37
-
38
- # 3. Navigate and Wait
39
- driver.get(url)
40
- # Wait long enough for the dynamic content (profile picture) to load
41
- time.sleep(5)
42
- page_source = driver.page_source
43
-
44
- # 4. Parse the Source
45
- soup = BeautifulSoup(page_source, 'html.parser')
46
-
47
- # 5. Targeted Thumbnail/Profile Picture Selection Logic
48
- # Strategy: Search for an image with 'alt' text related to the profile
49
- def is_profile_image(tag):
50
- alt_text = tag.get('alt', '').lower()
51
- # Common alt texts used for the main profile picture
52
- return tag.name == 'img' and ('profile picture' in alt_text or 'avatar' in alt_text)
53
-
54
- img_tag = soup.find(is_profile_image)
55
-
56
- # Fallback Strategy: If the profile-specific search fails, take the largest available image
57
- if not img_tag:
58
- print("Fallback to finding the first image with a 'src' attribute.")
59
- img_tag = soup.find('img', src=True)
60
-
61
- if not img_tag:
62
- raise ValueError("Could not find a suitable image tag on the page.")
63
-
64
- img_url = img_tag['src']
65
-
66
- # 6. Download the Image
67
- r = requests.get(img_url, stream=True)
68
- r.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
69
-
70
- filename = "instagram_profile.png"
71
- with open(filename, 'wb') as f:
72
- for chunk in r.iter_content(chunk_size=8192):
73
- f.write(chunk)
74
-
75
- return filename
76
-
77
- except Exception as e:
78
- # Clean up the browser instance in case of an error
79
- raise RuntimeError(f"Scraping failed for URL {url}: {e}") from e
80
- finally:
81
- if driver:
82
- driver.quit()
83
-
84
-
85
- # --- FastAPI Endpoints ---
86
-
87
- # Endpoint to trigger the image scraping
88
- @app.get("/fetch_profile_image")
89
- def fetch_image_endpoint(input_url: str):
90
- """
91
- Accepts a URL, scrapes the profile image, and returns the result.
92
- """
93
- if not input_url.startswith("http"):
94
- raise HTTPException(status_code=400, detail="Input must be a valid URL starting with http:// or https://")
95
 
96
  try:
97
- saved_filename = getimage(input_url)
98
-
99
- # We can also return the image itself, but for simplicity,
100
- # we'll confirm the file was saved.
101
- return {
102
- "status": "success",
103
- "message": f"Profile picture successfully caught and saved as {saved_filename}",
104
- "filename": saved_filename
105
- }
106
- except Exception as e:
107
- # We catch the RuntimeError raised in getimage and return a 500 status
108
- raise HTTPException(status_code=500, detail=str(e))
109
-
110
- # Ensure the 'static' directory exists for mounting, preventing the RuntimeError
111
- STATIC_DIR = "static"
112
- if not os.path.exists(STATIC_DIR):
113
- os.makedirs(STATIC_DIR)
114
-
115
- # This part serves the static files (like a frontend HTML page)
116
- # Note: You would need a 'static' folder with an 'index.html' file to see a UI.
117
- app.mount("/", StaticFiles(directory=STATIC_DIR, html=True), name="static")
118
-
119
- # The root endpoint serves the main HTML page
120
- @app.get("/")
121
- def index() -> FileResponse:
122
- # Ensure the path exists, otherwise the app will fail to start
123
- if os.path.exists("static/index.html"):
124
- return FileResponse(path="static/index.html", media_type="text/html")
125
  else:
126
- # If running without a UI, just return a simple message
127
- return {"message": "Image Scraper API Running. Access /fetch_profile_image?input_url=<URL> to test."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
 
130
- # --- Explicit Uvicorn Startup Block (CRITICAL FIX) ---
131
  if __name__ == "__main__":
132
- # This block ensures the application starts listening on a network port,
133
- # resolving the "application does not seem to be initialized" error.
134
- # We use 0.0.0.0 for compatibility with containerized/sandboxed environments.
135
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
+ """
2
+ Gradio + Hugging Face Inference API example app
3
+ File: gradio_hf_inference_app.py
4
+
5
+ How it works
6
+ - Uses the official Hugging Face Inference API endpoint: https://api-inference.huggingface.co/models/{model}
7
+ - Reads the token from the environment variable HUGGINGFACE_API_TOKEN (or HF_API_TOKEN)
8
+ - Provides a small Gradio UI to choose model, enter prompt and parameters, and shows generated text
9
+
10
+ Instructions to run locally
11
+ 1. Install dependencies: pip install -r requirements.txt
12
+ 2. Export your HF token: export HUGGINGFACE_API_TOKEN="hf_..."
13
+ 3. Run: python gradio_hf_inference_app.py
14
+
15
+ Instructions to deploy on Hugging Face Spaces (Gradio)
16
+ 1. Create a new Space on Hugging Face and choose the Gradio template.
17
+ 2. Upload this file and requirements.txt to the repository, or push via git.
18
+ 3. Add a secret in the Space settings named HUGGINGFACE_API_TOKEN with your token value.
19
+ 4. (Optional) If using large models, choose GPU hardware for the Space.
20
+ 5. The Space will start and you can use the UI.
21
+
22
+ Notes
23
+ - This example uses the simple REST Inference API via requests. For higher throughput or advanced use-cases, consider using the
24
+ huggingface_hub library and caching, or hosted endpoints provided by Hugging Face (inference endpoints) for production.
25
+
26
+ """
27
+
28
  import os
29
+ import requests
30
+ import gradio as gr
31
+ from typing import Optional
32
+
33
+ # Read token from environment
34
+ HF_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN") or os.environ.get("HF_API_TOKEN")
35
+ DEFAULT_MODEL = "gpt2" # change to a different default if you prefer (e.g. "gpt-neo-125M")
36
+
37
+
38
+ def call_hf_inference(model: str, prompt: str, max_new_tokens: int = 128, temperature: float = 1.0, top_k: Optional[int] = None):
39
+ """Call the Hugging Face Inference API and return generated text or error message."""
40
+ if not HF_TOKEN:
41
+ return "ERROR: HUGGINGFACE_API_TOKEN environment variable is not set.\nSet it and restart the app."
42
+
43
+ url = f"https://api-inference.huggingface.co/models/{model}"
44
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
45
+ payload = {
46
+ "inputs": prompt,
47
+ "parameters": {
48
+ "max_new_tokens": int(max_new_tokens),
49
+ "temperature": float(temperature),
50
+ },
51
+ "options": {"use_cache": False}
52
+ }
53
+
54
+ # Add optional top_k
55
+ if top_k is not None:
56
+ payload["parameters"]["top_k"] = int(top_k)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  try:
59
+ resp = requests.post(url, headers=headers, json=payload, timeout=120)
60
+ except requests.exceptions.RequestException as e:
61
+ return f"Request error: {e}"
62
+
63
+ if resp.status_code == 200:
64
+ try:
65
+ data = resp.json()
66
+ except ValueError:
67
+ return f"Invalid JSON response:\n{resp.text}"
68
+
69
+ # The Inference API usually returns a list of objects for text generation: [{"generated_text": "..."}]
70
+ if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict) and "generated_text" in data[0]:
71
+ return data[0]["generated_text"]
72
+ # Some models return plain text or different structure
73
+ if isinstance(data, dict) and "error" in data:
74
+ return f"API error: {data['error']}"
75
+ return str(data)
 
 
 
 
 
 
 
 
 
 
 
76
  else:
77
+ # Helpful debug info
78
+ try:
79
+ err = resp.json()
80
+ except ValueError:
81
+ err = resp.text
82
+ return f"HTTP {resp.status_code}: {err}"
83
+
84
+
85
+ # Gradio interface
86
+ with gr.Blocks(title="Hugging Face Inference (Gradio)") as demo:
87
+ gr.Markdown("# Hugging Face Inference API — Gradio demo\nEnter a model name and a prompt, then generate text using the official API token stored in environment variables.")
88
+
89
+ with gr.Row():
90
+ with gr.Column(scale=2):
91
+ model_input = gr.Textbox(label="Model name (eg. gpt2, bigscience/bloom, facebook/opt-350m)", value=DEFAULT_MODEL)
92
+ prompt_input = gr.Textbox(label="Prompt", placeholder="Write a short story about a curious robot...", lines=6)
93
+ run_btn = gr.Button("Generate")
94
+
95
+ with gr.Column(scale=1):
96
+ max_tokens = gr.Slider(minimum=1, maximum=1024, step=1, value=128, label="Max new tokens")
97
+ temperature = gr.Slider(minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Temperature")
98
+ top_k = gr.Number(value=None, label="top_k (optional)")
99
+ output_box = gr.Textbox(label="Generated text / API response", lines=12)
100
+
101
+ def on_generate(model, prompt, max_new_tokens, temperature, top_k):
102
+ return call_hf_inference(model.strip(), prompt, max_new_tokens, temperature, None if top_k is None else int(top_k))
103
+
104
+ run_btn.click(on_generate, inputs=[model_input, prompt_input, max_tokens, temperature, top_k], outputs=[output_box])
105
 
106
 
 
107
  if __name__ == "__main__":
108
+ demo.launch()