Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -134,15 +134,50 @@ def enhanced_ai_scrape(input_method, url, html, prompts_str, selector, page_pos,
|
|
| 134 |
except Exception as e:
|
| 135 |
return error_response(f"Unexpected error: {str(e)}")
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
# ----------------- Gradio UI ------------------
|
| 138 |
|
| 139 |
with gr.Blocks() as demo:
|
| 140 |
gr.Markdown("""
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
| 146 |
""")
|
| 147 |
|
| 148 |
with gr.Row():
|
|
@@ -225,7 +260,7 @@ with gr.Blocks() as demo:
|
|
| 225 |
|
| 226 |
# Rate limit status
|
| 227 |
gr.Markdown("#### Rate Limit Status")
|
| 228 |
-
rate_limit_status = gr.JSON(label="Current Usage"
|
| 229 |
refresh_status_btn = gr.Button("Refresh Status", size="sm")
|
| 230 |
|
| 231 |
scrape_btn = gr.Button("Scrape with AI", variant="primary")
|
|
@@ -248,7 +283,24 @@ with gr.Blocks() as demo:
|
|
| 248 |
scrape_btn.click(
|
| 249 |
enhanced_ai_scrape,
|
| 250 |
inputs=[input_method_scraper, url_scraper, html_content, element_prompts, root_selector, page_position],
|
| 251 |
-
outputs=[scrape_status, context_output, selectors_output, detailed_data, links_data, pagination_info]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
)
|
| 253 |
|
| 254 |
demo.launch()
|
|
|
|
| 134 |
except Exception as e:
|
| 135 |
return error_response(f"Unexpected error: {str(e)}")
|
| 136 |
|
| 137 |
+
|
| 138 |
+
def get_rate_limit_status(request: gr.Request):
|
| 139 |
+
"""Get current rate limit status for the user"""
|
| 140 |
+
if not request:
|
| 141 |
+
return {"error": "Unable to get request info"}
|
| 142 |
+
|
| 143 |
+
ip = get_real_ip(request)
|
| 144 |
+
now = time.time()
|
| 145 |
+
|
| 146 |
+
# Clean up old timestamps
|
| 147 |
+
request_times[ip] = [t for t in request_times[ip] if now - t < TIME_WINDOW]
|
| 148 |
+
|
| 149 |
+
current_requests = len(request_times[ip])
|
| 150 |
+
time_window_minutes = round(TIME_WINDOW / 60, 1)
|
| 151 |
+
|
| 152 |
+
if current_requests >= MAX_REQUESTS:
|
| 153 |
+
time_remaining = int(TIME_WINDOW - (now - request_times[ip][0]))
|
| 154 |
+
time_remaining_minutes = round(time_remaining / 60, 1)
|
| 155 |
+
return {
|
| 156 |
+
"status": "Rate limited",
|
| 157 |
+
"current_requests": current_requests,
|
| 158 |
+
"max_requests": MAX_REQUESTS,
|
| 159 |
+
"time_window_minutes": time_window_minutes,
|
| 160 |
+
"time_remaining_minutes": time_remaining_minutes
|
| 161 |
+
}
|
| 162 |
+
else:
|
| 163 |
+
return {
|
| 164 |
+
"status": "Available",
|
| 165 |
+
"current_requests": current_requests,
|
| 166 |
+
"max_requests": MAX_REQUESTS,
|
| 167 |
+
"time_window_minutes": time_window_minutes,
|
| 168 |
+
"remaining_requests": MAX_REQUESTS - current_requests
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
# ----------------- Gradio UI ------------------
|
| 172 |
|
| 173 |
with gr.Blocks() as demo:
|
| 174 |
gr.Markdown("""
|
| 175 |
+
<div style='text-align: center; margin-bottom: 24px;'>
|
| 176 |
+
<h1 style='font-size:2.2em; margin-bottom: 0.2em;'>🧩 AI Scraper</h1>
|
| 177 |
+
<p style='font-size:1.2em; margin-top: 0;'>Extract structured data from web pages with advanced AI models.</p>
|
| 178 |
+
<p style='font-size:1em; margin-top: 0.5em;'>For more details and API usage, see the <a href='https://jigsawstack.com/docs/api-reference/ai/scrape' target='_blank'>documentation</a>.</p>
|
| 179 |
+
<p style='font-size:0.9em; margin-top: 0.5em; color: #666;'>Rate limit: 1 request per hour per IP address</p>
|
| 180 |
+
</div>
|
| 181 |
""")
|
| 182 |
|
| 183 |
with gr.Row():
|
|
|
|
| 260 |
|
| 261 |
# Rate limit status
|
| 262 |
gr.Markdown("#### Rate Limit Status")
|
| 263 |
+
rate_limit_status = gr.JSON(label="Current Usage")
|
| 264 |
refresh_status_btn = gr.Button("Refresh Status", size="sm")
|
| 265 |
|
| 266 |
scrape_btn = gr.Button("Scrape with AI", variant="primary")
|
|
|
|
| 283 |
scrape_btn.click(
|
| 284 |
enhanced_ai_scrape,
|
| 285 |
inputs=[input_method_scraper, url_scraper, html_content, element_prompts, root_selector, page_position],
|
| 286 |
+
outputs=[scrape_status, context_output, selectors_output, detailed_data, links_data, pagination_info],
|
| 287 |
+
_js="() => []"
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
# Rate limit status handlers
|
| 291 |
+
refresh_status_btn.click(
|
| 292 |
+
get_rate_limit_status,
|
| 293 |
+
inputs=[],
|
| 294 |
+
outputs=rate_limit_status,
|
| 295 |
+
_js="() => []"
|
| 296 |
+
)
|
| 297 |
+
|
| 298 |
+
# Auto-refresh rate limit status when page loads
|
| 299 |
+
demo.load(
|
| 300 |
+
get_rate_limit_status,
|
| 301 |
+
inputs=[],
|
| 302 |
+
outputs=rate_limit_status,
|
| 303 |
+
_js="() => []"
|
| 304 |
)
|
| 305 |
|
| 306 |
demo.launch()
|