Spaces:
Sleeping
Sleeping
| """ | |
| ============================================ | |
| Manual Intervention Routes | |
| - View captcha screenshots | |
| - Click on captcha remotely | |
| - Type text into fields remotely | |
| - Mark intervention as resolved | |
| ============================================ | |
| """ | |
| import os | |
| import logging | |
| from typing import Optional | |
| from fastapi import APIRouter, HTTPException, status | |
| from fastapi.responses import FileResponse | |
| from pydantic import BaseModel, Field | |
| from app.scraper.browser_manager import browser_manager | |
| from app.scraper.captcha_detector import captcha_detector | |
| from app.scraper.scraper_engine import scraper_status, update_status | |
| from app.config import settings | |
| logger = logging.getLogger(__name__) | |
| router = APIRouter(prefix="/api/intervention", tags=["Intervention"]) | |
| # ============================================ | |
| # Request Models | |
| # ============================================ | |
| class ClickRequest(BaseModel): | |
| """Request to click at coordinates on a novel's page.""" | |
| novel_id: int = Field(..., examples=[1]) | |
| x: int = Field(..., ge=0, examples=[500]) | |
| y: int = Field(..., ge=0, examples=[300]) | |
| class TypeRequest(BaseModel): | |
| """Request to type text into a field on a novel's page.""" | |
| novel_id: int = Field(..., examples=[1]) | |
| selector: str = Field(..., examples=["input#captcha-input"]) | |
| text: str = Field(..., examples=["abc123"]) | |
| class ResolveRequest(BaseModel): | |
| """Request to mark an intervention as resolved.""" | |
| novel_id: int = Field(..., examples=[1]) | |
| class RefreshScreenshotRequest(BaseModel): | |
| """Request a fresh screenshot.""" | |
| novel_id: int = Field(..., examples=[1]) | |
| # ============================================ | |
| # Routes | |
| # ============================================ | |
| async def get_active_interventions(): | |
| """ | |
| Get all novels currently waiting for manual intervention. | |
| The frontend polls this to show captcha alerts. | |
| """ | |
| interventions = captcha_detector.get_all_interventions() | |
| result = {} | |
| for novel_id, info in interventions.items(): | |
| result[novel_id] = { | |
| "novel_id": novel_id, | |
| "screenshot": info.get("screenshot", ""), | |
| "reason": info.get("reason", "Unknown"), | |
| "page_url": info.get("page_url", ""), | |
| "timestamp": info.get("timestamp", 0), | |
| "waiting": info.get("waiting", True), | |
| } | |
| return { | |
| "count": len(result), | |
| "interventions": result, | |
| } | |
| async def get_screenshot(filename: str): | |
| """ | |
| Serve a captcha screenshot image. | |
| The frontend displays this so the user can see the captcha. | |
| """ | |
| # Security: prevent directory traversal | |
| safe_filename = os.path.basename(filename) | |
| filepath = os.path.join(settings.SCREENSHOTS_DIR, safe_filename) | |
| if not os.path.exists(filepath): | |
| raise HTTPException( | |
| status_code=404, | |
| detail=f"Screenshot not found: {safe_filename}", | |
| ) | |
| return FileResponse( | |
| filepath, | |
| media_type="image/png", | |
| filename=safe_filename, | |
| ) | |
| async def remote_click(request: ClickRequest): | |
| """ | |
| Click at specific coordinates on a novel's browser page. | |
| How it works: | |
| 1. User sees the screenshot in the UI | |
| 2. User clicks on the captcha in the screenshot | |
| 3. Frontend sends the click coordinates here | |
| 4. Backend performs the actual click on the headless browser | |
| """ | |
| novel_id = request.novel_id | |
| # Verify the novel has an active page | |
| page = browser_manager.get_page(novel_id) | |
| if page is None or page.is_closed(): | |
| raise HTTPException( | |
| status_code=404, | |
| detail=f"No active browser page for Novel {novel_id}", | |
| ) | |
| try: | |
| await browser_manager.click_at_coordinates(novel_id, request.x, request.y) | |
| logger.info(f"Remote click at ({request.x}, {request.y}) for Novel {novel_id}") | |
| # Take a new screenshot after clicking to show the result | |
| import time | |
| new_filename = f"novel_{novel_id}_after_click_{int(time.time())}.png" | |
| new_screenshot = await browser_manager.take_screenshot(novel_id, new_filename) | |
| return { | |
| "message": f"Clicked at ({request.x}, {request.y})", | |
| "new_screenshot": new_filename if new_screenshot else None, | |
| } | |
| except Exception as e: | |
| logger.error(f"Remote click failed for Novel {novel_id}: {e}") | |
| raise HTTPException( | |
| status_code=500, | |
| detail=f"Click failed: {str(e)}", | |
| ) | |
| async def remote_type(request: TypeRequest): | |
| """ | |
| Type text into a field on a novel's browser page. | |
| Useful for text-based captchas. | |
| """ | |
| novel_id = request.novel_id | |
| page = browser_manager.get_page(novel_id) | |
| if page is None or page.is_closed(): | |
| raise HTTPException( | |
| status_code=404, | |
| detail=f"No active browser page for Novel {novel_id}", | |
| ) | |
| try: | |
| await browser_manager.type_text(novel_id, request.selector, request.text) | |
| logger.info( | |
| f"Remote type into '{request.selector}' for Novel {novel_id}" | |
| ) | |
| return { | |
| "message": f"Typed '{request.text}' into '{request.selector}'", | |
| } | |
| except Exception as e: | |
| logger.error(f"Remote type failed for Novel {novel_id}: {e}") | |
| raise HTTPException( | |
| status_code=500, | |
| detail=f"Type failed: {str(e)}", | |
| ) | |
| async def resolve_intervention(request: ResolveRequest): | |
| """ | |
| Mark a captcha intervention as resolved. | |
| Call this after: | |
| 1. You've clicked on the captcha via /click | |
| 2. The captcha appears to be solved | |
| 3. You want the scraper to continue | |
| """ | |
| novel_id = request.novel_id | |
| intervention = captcha_detector.get_intervention_status(novel_id) | |
| if not intervention: | |
| raise HTTPException( | |
| status_code=404, | |
| detail=f"No active intervention for Novel {novel_id}", | |
| ) | |
| captcha_detector.mark_intervention_complete(novel_id) | |
| update_status( | |
| novel_id, | |
| phase="resuming", | |
| message="Intervention resolved! Resuming scraping...", | |
| ) | |
| logger.info(f"Intervention resolved for Novel {novel_id} ✅") | |
| return { | |
| "message": f"Intervention for Novel {novel_id} marked as resolved", | |
| "novel_id": novel_id, | |
| } | |
| async def refresh_screenshot(request: RefreshScreenshotRequest): | |
| """ | |
| Take a fresh screenshot of the novel's current page. | |
| Use this to see the current state after clicking. | |
| """ | |
| novel_id = request.novel_id | |
| page = browser_manager.get_page(novel_id) | |
| if page is None or page.is_closed(): | |
| raise HTTPException( | |
| status_code=404, | |
| detail=f"No active browser page for Novel {novel_id}", | |
| ) | |
| try: | |
| import time | |
| filename = f"novel_{novel_id}_refresh_{int(time.time())}.png" | |
| screenshot_path = await browser_manager.take_screenshot(novel_id, filename) | |
| if screenshot_path is None: | |
| raise HTTPException( | |
| status_code=500, | |
| detail="Failed to take screenshot", | |
| ) | |
| return { | |
| "screenshot": filename, | |
| "page_url": page.url, | |
| "page_title": await page.title(), | |
| } | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| logger.error(f"Refresh screenshot failed for Novel {novel_id}: {e}") | |
| raise HTTPException( | |
| status_code=500, | |
| detail=f"Screenshot failed: {str(e)}", | |
| ) |