import os import asyncio from fastapi import FastAPI, HTTPException from pydantic import BaseModel from typing import Optional, Dict from uuid import uuid4 from src.web_extractor import WebExtractor from src.scrapers.playwright_scraper import ScraperConfig app = FastAPI() # Store active sessions sessions: Dict[str, WebExtractor] = {} class ScrapeRequest(BaseModel): url: str query: str model_name: Optional[str] = "alias-fast" class SessionCreateRequest(BaseModel): model_name: Optional[str] = "alias-fast" @app.get("/health") async def health(): return {"status": "ok", "message": "CyberScraper 2077 API is running"} @app.post("/api/scrape") async def scrape(request: ScrapeRequest): """Stateless scrape request (creates a new extractor for each request)""" scraper_config = ScraperConfig( headless=True, max_retries=3, delay_after_load=5 ) extractor = WebExtractor(model_name=request.model_name, scraper_config=scraper_config) try: # Construct the query by combining URL and the specific request full_query = f"{request.url} {request.query}" response = await extractor.process_query(full_query) # If response is a tuple (csv/excel), extract the first part if isinstance(response, tuple): response = response[0] # Clean up if hasattr(extractor.playwright_scraper, 'close'): await extractor.playwright_scraper.close() return { "url": request.url, "query": request.query, "response": response } except Exception as e: # Try to clean up on error if hasattr(extractor, 'playwright_scraper') and hasattr(extractor.playwright_scraper, 'close'): await extractor.playwright_scraper.close() raise HTTPException(status_code=500, detail=str(e)) @app.post("/api/session") async def create_session(request: SessionCreateRequest): """Create a persistent scraping session""" session_id = str(uuid4()) try: scraper_config = ScraperConfig( headless=True, max_retries=3, delay_after_load=5 ) extractor = WebExtractor(model_name=request.model_name, scraper_config=scraper_config) sessions[session_id] = extractor return {"session_id": session_id, "message": "Session created", "model": request.model_name} except Exception as e: raise HTTPException(status_code=500, detail=f"Failed to create session: {str(e)}") @app.post("/api/session/{session_id}/scrape") async def session_scrape(session_id: str, request: ScrapeRequest): """Scrape using an existing session context""" if session_id not in sessions: raise HTTPException(status_code=404, detail="Session not found") extractor = sessions[session_id] try: full_query = f"{request.url} {request.query}" response = await extractor.process_query(full_query) if isinstance(response, tuple): response = response[0] return { "session_id": session_id, "url": request.url, "query": request.query, "response": response } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.delete("/api/session/{session_id}") async def close_session(session_id: str): """Close a session and release resources""" if session_id in sessions: extractor = sessions[session_id] if hasattr(extractor.playwright_scraper, 'close'): await extractor.playwright_scraper.close() del sessions[session_id] return {"message": "Session closed", "session_id": session_id} raise HTTPException(status_code=404, detail="Session not found") if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)