Spaces:

AdarshJi
/

FLOWER

Sleeping

App Files Files Community

AdarshJi commited on Feb 23

Commit

1ed2217

verified ·

1 Parent(s): 2918e0b

Update app.py

Browse files

Files changed (1) hide show

app.py +650 -527

app.py CHANGED Viewed

@@ -1,552 +1,675 @@
-# server.py
-import base64
 import json
-import logging
-import os
 import random
-import string
-import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Tuple
-import cloudscraper
-from fastapi import Body, FastAPI, HTTPException, Request
-from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
-from fastapi.staticfiles import StaticFiles
-from pydantic import BaseModel, Field, validator
-from requests.exceptions import RequestException
-from urllib.parse import urlencode
-# ---------------------------
-# Config & Logging
-# ---------------------------
-BASE = "https://image-generation.perchance.org"
-OUT_DIR = "outputs"
-UPLOADS_DIR = os.path.join(OUT_DIR, "uploads")
-FILES_ROUTE = "/files"
-os.makedirs(OUT_DIR, exist_ok=True)
-os.makedirs(UPLOADS_DIR, exist_ok=True)
-logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)-7s | %(message)s")
-logger = logging.getLogger("perchance_server")
-app = FastAPI(title="Perchance Image Generator API")
-# serve files
-app.mount(FILES_ROUTE, StaticFiles(directory=OUT_DIR), name="files")
-# ---------------------------
-# Pydantic models (API)
-# ---------------------------
-class PromptItem(BaseModel):
-    prompt: str
-    negative: Optional[str] = ""
-    seed: Optional[int] = -1
-    resolution: Optional[str] = "512x768"
-    guidance: Optional[float] = 7.0
-    # optional per-item count: how many images to generate for this prompt
-    count: Optional[int] = 1
-    @validator("count")
-    def count_min(cls, v):
-        if v is None:
-            return 1
-        if v < 1:
-            raise ValueError("count must be >= 1")
-        return v
-class GenerateRequest(BaseModel):
-    items: List[PromptItem] = Field(..., description="List of prompt objects. If you pass a single item and top-level count >1, the single prompt will be used repeatedly.")
-    # If items list length == 1 and global_count > 1 -> generate that many variants
-    global_count: Optional[int] = Field(1, description="When items has a single entry and you want multiple images")
-    concurrency: Optional[int] = Field(2, description="How many concurrent generation tasks")
-    proxies: Optional[List[str]] = None
-    proxy_rotate: Optional[bool] = False
-    fast: Optional[bool] = False
-    max_wait: Optional[int] = 90
-    @validator("global_count")
-    def gcount_min(cls, v):
-        if v is None:
-            return 1
-        if v < 1:
-            raise ValueError("global_count must be >= 1")
-        return v
-class UploadBase64Request(BaseModel):
-    filename: Optional[str]
-    data: str  # data:image/png;base64,... or raw base64
-# ---------------------------
-# Small utility helpers
-# ---------------------------
-def safe_filename(name: str) -> str:
-    keep = "-_.() %s%s" % (string.ascii_letters, string.digits)
-    return "".join(c for c in name if c in keep)[:140].replace(" ", "_")
-def random_token(n=8):
-    return "".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(n))
-# ---------------------------
-# Proxy manager (lightweight)
-# ---------------------------
-class ProxyManager:
-    def __init__(self, proxies: Optional[List[str]] = None, rotate: bool = False):
-        self.sessions: List[Tuple[str, Any]] = []
-        self.rotate = rotate
-        self.index = 0
-        if proxies:
-            self.load(proxies)
-    @staticmethod
-    def normalize(p: str) -> str:
-        p = p.strip()
-        if not p:
-            return ""
-        if "://" not in p:
-            return "http://" + p
-        return p
-    def load(self, proxies: List[str]):
-        self.sessions = []
-        for raw in proxies:
-            p = self.normalize(raw)
-            if not p:
-                continue
-            try:
-                s = cloudscraper.create_scraper()
-                s.proxies = {"http": p, "https": p}
-                self.sessions.append((p, s))
-            except Exception as e:
-                logger.warning("Failed to create session for proxy %s : %s", p, e)
-        logger.info("ProxyManager loaded %d proxies", len(self.sessions))
-    def count(self):
-        return len(self.sessions)
-    def get(self) -> Tuple[Optional[str], Any]:
-        if not self.sessions:
-            return None, cloudscraper.create_scraper()
-        if self.rotate:
-            p, s = self.sessions[self.index % len(self.sessions)]
-            self.index += 1
-            return p, s
-        else:
-            return self.sessions[0]
-# ---------------------------
-# Perchance client (blocking)
-# ---------------------------
-@dataclass
-class PerchanceClient:
-    base_url: str = BASE
-    timeout: int = 30
-    max_wait: int = 90
-    proxy_manager: Optional[ProxyManager] = None
-    headers: dict = None
-    def __post_init__(self):
-        if self.headers is None:
-            self.headers = {
-                "Accept": "*/*",
-                "Content-Type": "text/plain;charset=UTF-8",
-                "Origin": "https://image-generation.perchance.org",
-                "Referer": "https://image-generation.perchance.org/embed",
-                "User-Agent": "PerchanceClient/1.0",
-            }
-    def _choose_session(self):
-        if self.proxy_manager and self.proxy_manager.count() > 0:
-            return self.proxy_manager.get()
-        return None, cloudscraper.create_scraper()
-    def verify_user(self, thread=0):
-        _, s = self._choose_session()
-        url = f"{self.base_url}/api/verifyUser?thread={thread}&__cacheBust={random.random()}"
         try:
-            r = s.get(url, timeout=self.timeout, headers=self.headers)
-            r.raise_for_status()
-            return r.json()
         except Exception as e:
-            logger.debug("verify_user failed: %s", e)
-            return {}
-    def get_ad_access_code(self):
-        _, s = self._choose_session()
-        url = f"{self.base_url}/api/getAccessCodeForAdPoweredStuff"
-        try:
-            r = s.get(url, timeout=self.timeout, headers=self.headers)
-            r.raise_for_status()
-            return r.text.strip()
-        except Exception:
-            return ""
-    def post_generate(self, body: dict, params: dict):
-        proxy, s = self._choose_session()
-        qs = "?" + urlencode(params) if params else ""
-        url = f"{self.base_url}/api/generate{qs}"
-        try:
-            r = s.post(url, headers=self.headers, data=json.dumps(body), timeout=self.timeout)
-            r.raise_for_status()
-            try:
-                return r.json()
-            except Exception:
-                return {"status": "invalid_json", "raw": r.text}
-        except RequestException as e:
-            logger.warning("post_generate failed via %s: %s", proxy or "direct", e)
-            return {"status": "fetch_failure", "error": str(e)}
-    def await_existing_generation(self, user_key: str, timeout_seconds: int = 18):
-        proxy, s = self._choose_session()
-        url = f"{self.base_url}/api/awaitExistingGenerationRequest?userKey={user_key}&__cacheBust={random.random()}"
-        try:
-            r = s.get(url, timeout=timeout_seconds, headers=self.headers)
-            r.raise_for_status()
             try:
-                return r.json()
-            except Exception:
-                return {"status": "invalid_json", "raw": r.text}
-        except Exception:
-            return {"status": "fetch_failure"}
-    def download_temporary_image(self, image_id: str, save_dir: str, prefix: str, max_wait: Optional[int] = None):
-        max_wait = max_wait or self.max_wait
-        os.makedirs(save_dir, exist_ok=True)
-        dl_url = f"{self.base_url}/api/downloadTemporaryImage?imageId={image_id}"
-        start = time.time()
-        backoff = 0.5
-        attempt = 0
-        def looks_wait(s: str) -> bool:
-            if not s:
-                return False
-            sl = s.lower()
-            return any(k in sl for k in ("wait", "waiting", "pending", "processing"))
-        if looks_wait(image_id):
-            time.sleep(0.25)
-        while True:
-            attempt += 1
-            proxy, s = self._choose_session()
-            try:
-                r = s.get(dl_url, timeout=self.timeout, headers=self.headers, stream=True)
-                if r.status_code == 200:
-                    ct = r.headers.get("Content-Type", "")
-                    ext = ".jpg"
-                    if "png" in ct:
-                        ext = ".png"
-                    elif "webp" in ct:
-                        ext = ".webp"
-                    name = f"{prefix}_{image_id[:12]}{ext}"
-                    outpath = os.path.join(save_dir, safe_filename(name))
-                    with open(outpath, "wb") as fh:
-                        for chunk in r.iter_content(8192):
-                            if chunk:
-                                fh.write(chunk)
-                    logger.info("Saved downloaded image to %s", outpath)
-                    return outpath
-                txt = (r.text or "").lower()
-                elapsed = time.time() - start
-                if r.status_code in (404, 410) or "waiting" in txt or looks_wait(image_id):
-                    if elapsed >= max_wait:
-                        raise RequestException(f"Timeout waiting for image {image_id}")
-                    time.sleep(backoff)
-                    backoff = min(backoff * 1.8, 8.0)
-                    continue
-                r.raise_for_status()
-            except RequestException as e:
-                elapsed = time.time() - start
-                if elapsed < max_wait:
-                    logger.debug("download attempt failed (%s). retrying...", e)
-                    time.sleep(backoff)
-                    backoff = min(backoff * 1.8, 8.0)
-                    continue
-                logger.error("Failed download: %s", e)
-                raise
-    def generate_one(self, prompt: str, negative: str, seed: int, resolution: str, guidance: float,
-                     user_key: str, ad_access_code: str, request_id: str, max_retries: int = 4, max_wait: Optional[int] = None):
-        params = {"userKey": user_key, "requestId": request_id, "adAccessCode": ad_access_code, "__cacheBust": random.random()}
-        body = {
-            "prompt": prompt,
-            "negativePrompt": negative,
-            "seed": seed,
-            "resolution": resolution,
-            "guidanceScale": guidance,
-            "channel": "ai-text-to-image-generator",
-            "subChannel": "private",
-            "userKey": user_key,
-            "adAccessCode": ad_access_code,
-            "requestId": request_id,
         }
-        attempt = 0
-        refreshed = False
-        while attempt < max_retries:
-            attempt += 1
-            result = self.post_generate(body, params)
-            status = result.get("status")
-            if status == "success":
-                if result.get("imageId"):
-                    return {"imageId": result["imageId"], "seed": result.get("seed")}
-                if result.get("imageDataUrls"):
-                    return {"inline": result["imageDataUrls"][0], "seed": result.get("seed")}
-                return {"error": "no_image_in_success", "raw": result}
-            elif status == "waiting_for_prev_request_to_finish":
-                self.await_existing_generation(user_key)
-                time.sleep(0.2 + 0.2 * random.random())
-                continue
-            elif status == "invalid_ad_access_code":
-                if not refreshed:
-                    new = self.get_ad_access_code()
-                    if new:
-                        ad_access_code = new
-                        params["adAccessCode"] = new
-                        body["adAccessCode"] = new
-                        refreshed = True
-                        time.sleep(0.8)
                         continue
-                return {"error": "invalid_ad_access_code", "raw": result}
-            elif status == "gen_failure" and result.get("type") == 1:
-                time.sleep(2.0)
-                continue
-            else:
-                # network or unknown - retry
-                if status in (None, "fetch_failure", "invalid_json", "stale_request"):
-                    time.sleep(1.0)
                     continue
-                return {"error": "unhandled_status", "status": status, "raw": result}
-        return {"error": "max_retries_exceeded"}
-# ---------------------------
-# API logic
-# ---------------------------
-def build_tasks_from_request(req: GenerateRequest) -> List[Dict]:
     """
-    Build a flat list of generation tasks from the request payload.
-    Each task is a dict with keys: prompt, negative, seed, resolution, guidance, request_count (1..)
     """
-    tasks = []
-    items = req.items
-    if len(items) == 1 and req.global_count and req.global_count > 1:
-        # repeat the single prompt global_count times
-        it = items[0]
-        for i in range(req.global_count):
-            tasks.append({
-                "prompt": it.prompt,
-                "negative": it.negative,
-                "seed": it.seed,
-                "resolution": it.resolution,
-                "guidance": it.guidance,
-                "task_index": i + 1,
-            })
-    else:
-        # for each item, generate item.count images
-        for it in items:
-            cnt = it.count or 1
-            for i in range(cnt):
-                tasks.append({
-                    "prompt": it.prompt,
-                    "negative": it.negative,
-                    "seed": it.seed,
-                    "resolution": it.resolution,
-                    "guidance": it.guidance,
-                    "task_index": i + 1,
-                })
-    return tasks
-def make_file_url(request: Request, filepath: str) -> str:
-    # filepath is relative under OUT_DIR
-    host = str(request.base_url).rstrip("/")
-    # file path relative to OUT_DIR
-    rel = os.path.relpath(filepath, OUT_DIR)
-    return f"{host}{FILES_ROUTE}/{rel.replace(os.path.sep, '/')}"
-# Blocking worker that runs generation for a single task and returns result path or error
-def run_generation_task(client: PerchanceClient, task: Dict, user_key: str, ad_code: str, save_dir: str, max_wait: int) -> Dict:
-    req_id = f"{time.time()}-{random_token(6)}"
-    gen = client.generate_one(
-        prompt=task["prompt"],
-        negative=task["negative"],
-        seed=task["seed"],
-        resolution=task["resolution"],
-        guidance=task["guidance"],
-        user_key=user_key,
-        ad_access_code=ad_code,
-        request_id=req_id,
-        max_retries=5,
-        max_wait=max_wait
-    )
-    if not isinstance(gen, dict):
-        return {"error": "invalid_gen_response"}
-    if gen.get("inline"):
-        # inline base64 dataURL -> save and return
-        data_url = gen["inline"]
-        header, b64 = (data_url.split(",", 1) + [""])[:2]
-        ext = ".png" if "png" in header else ".jpg"
-        prefix = safe_filename(task["prompt"])[:30] + "_" + random_token(4)
-        filename = f"{prefix}{ext}"
-        outpath = os.path.join(save_dir, filename)
-        with open(outpath, "wb") as fh:
-            fh.write(base64.b64decode(b64))
-        return {"path": outpath}
-    if gen.get("imageId"):
-        try:
-            prefix = safe_filename(task["prompt"])[:30] + "_" + random_token(4)
-            outpath = client.download_temporary_image(gen["imageId"], save_dir, prefix=prefix, max_wait=max_wait)
-            return {"path": outpath}
-        except Exception as e:
-            logger.exception("Download failed for imageId %s: %s", gen.get("imageId"), e)
-            return {"error": str(e), "raw": gen}
-    return {"error": gen.get("error", "unknown")}
-# ---------------------------
-# Endpoints
-# ---------------------------
-@app.post("/upload_base64")
-async def upload_base64(payload: UploadBase64Request):
     """
-    Accept base64 data and save to uploads folder, return a URL to access it.
-    Body: { "filename": "optional.png", "data": "data:image/png;base64,..." }
     """
-    data = payload.data
-    if "," in data:
-        _, b64 = data.split(",", 1)
-    else:
-        b64 = data
     try:
-        raw = base64.b64decode(b64)
-    except Exception:
-        raise HTTPException(status_code=400, detail="invalid base64")
-    fn = payload.filename or f"upload_{int(time.time())}_{random_token(6)}.png"
-    fn = safe_filename(fn)
-    outpath = os.path.join(UPLOADS_DIR, fn)
-    with open(outpath, "wb") as fh:
-        fh.write(raw)
-    url = f"{str(app.url_path_for('files'))}/{os.path.relpath(outpath, OUT_DIR).replace(os.path.sep, '/')}"
-    # return full URL
-    # base URL unknown in background, so return path-like and let client prefix host
-    return {"path": outpath, "url_path": url}
-@app.post("/generate")
-async def generate_stream(request: Request, body: GenerateRequest = Body(...)):
-    """
-    Streaming endpoint. Sends Server-Sent-Events-like chunked responses with status updates.
-    Each event is a JSON object on its own line.
-    Example client usage: fetch and parse lines; each line is a JSON event.
-    """
-    # build tasks
-    tasks = build_tasks_from_request(body)
-    if not tasks:
-        raise HTTPException(status_code=400, detail="no tasks built from request")
-    # set up proxy manager & client
-    proxy_mgr = ProxyManager(proxies=body.proxies, rotate=body.proxy_rotate) if body.proxies else None
-    client = PerchanceClient(proxy_manager=proxy_mgr, max_wait=body.max_wait)
-    # initial verify and ad code fetch
-    verify = client.verify_user(thread=0)
-    user_key = verify.get("userKey", "") or ""
-    ad_code = client.get_ad_access_code() or ""
-    concurrency = max(1, body.concurrency or 1)
-    save_dir = OUT_DIR
-    # We'll use a blocking ThreadPoolExecutor for generation tasks and stream results as they finish.
-    executor = ThreadPoolExecutor(max_workers=concurrency)
-    def event_stream():
-        # announce start
-        yield json.dumps({"event": "started", "total_tasks": len(tasks)}) + "\n"
-        futures = {}
-        for i, t in enumerate(tasks):
-            # submit each generation task
-            fut = executor.submit(run_generation_task, client, t, user_key, ad_code, save_dir, body.max_wait)
-            futures[fut] = {"task_index": i + 1, "task": t}
-        # as finished, send updates
-        completed = 0
-        for fut in as_completed(list(futures.keys())):
-            meta = futures[fut]
-            idx = meta["task_index"]
-            t = meta["task"]
-            try:
-                res = fut.result()
-            except Exception as e:
-                logger.exception("Task raised exception")
-                res = {"error": str(e)}
-            completed += 1
-            # prepare response entry
-            if res.get("path"):
-                url = make_file_url(request, res["path"])
-                event = {"event": "completed", "task_index": idx, "prompt": t["prompt"], "file_url": url}
-            else:
-                event = {"event": "failed", "task_index": idx, "prompt": t["prompt"], "error": res.get("error"), "raw": res.get("raw")}
-            # stream the event as JSON line
-            yield json.dumps(event) + "\n"
-        yield json.dumps({"event": "all_done", "completed": completed}) + "\n"
-    return StreamingResponse(event_stream(), media_type="application/json")
-@app.post("/generate_sync")
-async def generate_sync(request: Request, body: GenerateRequest = Body(...)):
-    """
-    Non-streaming endpoint that blocks and returns JSON array of results (list of urls / errors).
     """
-    tasks = build_tasks_from_request(body)
-    if not tasks:
-        raise HTTPException(status_code=400, detail="no tasks")
-    proxy_mgr = ProxyManager(proxies=body.proxies, rotate=body.proxy_rotate) if body.proxies else None
-    client = PerchanceClient(proxy_manager=proxy_mgr, max_wait=body.max_wait)
-    verify = client.verify_user(thread=0)
-    user_key = verify.get("userKey", "") or ""
-    ad_code = client.get_ad_access_code() or ""
-    concurrency = max(1, body.concurrency or 1)
-    results = [None] * len(tasks)
-    with ThreadPoolExecutor(max_workers=concurrency) as ex:
-        futures = {ex.submit(run_generation_task, client, t, user_key, ad_code, OUT_DIR, body.max_wait): idx for idx, t in enumerate(tasks)}
-        for fut in as_completed(futures):
-            idx = futures[fut]
-            try:
-                res = fut.result()
-            except Exception as e:
-                res = {"error": str(e)}
-            if res.get("path"):
-                url = make_file_url(request, res["path"])
-                results[idx] = {"ok": True, "url": url}
-            else:
-                results[idx] = {"ok": False, "error": res.get("error"), "raw": res.get("raw")}
-    return JSONResponse({"results": results})
-@app.get("/", response_class=HTMLResponse)
-async def index():
-    return """
-    <html><body>
-      <h2>Perchance Image Generator API</h2>
-      <p>POST /generate to stream results. POST /generate_sync to get final JSON. POST /upload_base64 to upload images.</p>
-    </body></html>
     """
-# ---------------------------
-# Safety note
-# ---------------------------
-@app.on_event("startup")
-def startup_note():
-    logger.info("Server ready. Files served under %s at %s/files/", OUT_DIR, "http://<host>:<port>")

+# app.py
+import asyncio
+import json
+from typing import List, Dict, Any
+import aiohttp
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.responses import StreamingResponse, JSONResponse
+from pydantic import BaseModel
+from starlette.middleware.cors import CORSMiddleware
+import asyncio
+import aiohttp
+import urllib.parse
+from bs4 import BeautifulSoup as bs
 import json
 import random
+import html
+from bs4 import NavigableString, Tag
+from rich import print
+class BingScraper:
+    # Rotating User-Agent list.
+    USER_AGENTS = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Safari/605.1.15",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0",
+        "Mozilla/5.0 (Linux; Android 10; SM-G975F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
+    ]
+    # Optional proxy list (empty if not used for fastest performance).
+    PROXIES = [
+        # "http://proxy1.example.com:8080",
+        # "http://proxy2.example.com:8080",
+    ]
+    def __init__(self):
+        pass
+    @classmethod
+    def get_random_headers(cls):
+        """Return HTTP headers with a random User-Agent."""
+        return {
+            "User-Agent": random.choice(cls.USER_AGENTS),
+            "Accept": "text/html,application/xhtml+xml,application/xml;""q=0.9,image/avif,image/webp,*/*;q=0.8","Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8","Cache-Control": "no-cache","Pragma": "no-cache","Cookie": "MUID=12164D9908FD6B8C103F5871090F6AAA; _EDGE_V=1; MUIDB=12164D9908FD6B8C103F5871090F6AAA; SRCHD=AF=NOFORM; SRCHUID=V=2&GUID=26D35EBC20D44E418642BA10D7C14F42&dmnchg=1; ak_bmsc=17AB29B1FF4DE4D83B080A7E5CD24350~000000000000000000000000000000~YAAQNtjIFzUD3a2WAQAAnKvFuRtoko96GJe/vh01R588ZQYnYquFWtB0CzXeN5JGXZWgz7CwJqtckHuj3Z70qUhOcji4vkzhCMc/u91gnAIA0zCu7FcDeEJQgRx6n9MxhjrDAel2IIezGUgh+5ktFvDgUIO05s06PqDAtIUzuc9yTrbdAJi3iZvxFFKdGnbQkJ5krI9w3auWhY6i7JvcUPiDsbzzv0Chj1MxzRT1zdkP1B/JFtz+s5d8rUfagFpQporeRG/9gdid4qUPWvPHD6k98AdCTBYOysMHH2z9ErrD5PCO2mLK/RPrJSoqqN4d2mtnWeHNeF897PioJk0nOJw/IrseF0EgdsscKs7NVg/e3Mp27FTEIBduBRa93vvaabLMxg38; _UR=QS=0&TQS=0&Pn=0; BFBUSR=BFBHP=0; _HPVN=CS=eyJQbiI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiUCJ9LCJTYyI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiSCJ9LCJReiI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiVCJ9LCJBcCI6dHJ1ZSwiTXV0ZSI6dHJ1ZSwiTGFkIjoiMjAyNS0wNS0xMFQwMDowMDowMFoiLCJJb3RkIjowLCJHd2IiOjAsIlRucyI6MCwiRGZ0IjpudWxsLCJNdnMiOjAsIkZsdCI6MCwiSW1wIjoyLCJUb2JuIjowfQ==; _Rwho=u=d&ts=2025-05-10; ipv6=hit=1746877082438&t=4; _EDGE_S=F=1&SID=159E288249896C4D3A4D3D6A487B6D96&mkt=en-in; USRLOC=HS=1&ELOC=LAT=28.648426055908203|LON=77.1643295288086|N=Delhi%2C%20Delhi|ELT=1|; SRCHUSR=DOB=20250510&DS=1; MMCASM=ID=94A07BAF860D449DA2A4ABC8CAAC2538; _RwBf=r=0&ilt=9&ihpd=1&ispd=1&rc=24&rb=0&rg=200&pc=21&mtu=0&rbb=0&clo=0&v=9&l=2025-05-10T07:00:00.0000000Z&lft=0001-01-01T00:00:00.0000000&aof=0&ard=0001-01-01T00:00:00.0000000&rwdbt=0&rwflt=0&rwaul2=0&g=&o=2&p=&c=&t=0&s=0001-01-01T00:00:00.0000000+00:00&ts=2025-05-10T11:00:39.6623875+00:00&rwred=0&wls=&wlb=&wle=&ccp=&cpt=&lka=0&lkt=0&aad=0&TH=&cid=0&gb=; _SS=SID=159E288249896C4D3A4D3D6A487B6D96&R=24&RB=0&GB=0&RG=200&RP=21; SRCHHPGUSR=SRCHLANG=en&IG=00DEBA4A2B754BC983011302535541CA&PV=10.0.0&DM=1&BRW=S&BRH=M&CW=1094&CH=738&SCW=1177&SCH=2902&DPR=1.3&UTC=330&WTS=63882470261&PRVCW=1094&PRVCH=738&EXLTT=1&HV=1746874838&HVE=CfDJ8GtUudZcSi1Enm88WwQKtCf-s_3m7rtBIR85jW2Uv01W2IjDUasKRdncp2MkJ7Bl7PxVetzuZETt8bkyd54iRcMP8SVUsClaL2I5uvRiGiSldOKFjy7i69jYPS-egJOhCGf717H5WHFvCI4UwespMZgxZkdo8SVoBwOlx_yQKpA2qtpqV7t6wYd7etwY1FpUaA&BCML=0&BCSRLANG=&ADLT=OFF"
+        }
+    @classmethod
+    def get_random_proxy(cls):
+        """Return a random proxy if available."""
+        if cls.PROXIES:
+            return random.choice(cls.PROXIES)
+        return None
+    async def fetch_html(self, url, session):
+        """Fetch HTML content asynchronously with minimal overhead."""
+        headers = self.get_random_headers()
+        proxy = self.get_random_proxy()
         try:
+            async with session.get(url, headers=headers, proxy=proxy, timeout=30) as response:
+                if response.status != 200:
+                    print(f"Failed to fetch {url}: HTTP {response.status}")
+                    return None
+                return await response.text()
         except Exception as e:
+            print(f"Error fetching {url}: {e}")
+            return None
+    # ===== Text Search Methods =====
+    async def fetch_bing_text(self, query, session):
+        """Fetch Bing search results HTML for the given query."""
+        encoded_query = urllib.parse.quote_plus(query)
+        url = f'https://www.bing.com/search?q={encoded_query}'
+        return await self.fetch_html(url, session)
+    def parse_text_results(self, html):
+        """
+        Parse the Bing HTML using BeautifulSoup to extract:
+          - icon (if available)
+          - URL
+          - Title
+          - Abstract (or fallback text)
+          - Additional columns (if available)
+        """
+        soup = bs(html, 'lxml')
+        results = []
+        result_no = 0
+        main_containers = soup.find_all(id="b_results")
+        for container in main_containers:
+            list_results = container.find_all(class_='b_algo')
+            for cont in list_results:
+                result_no += 1
+                try:
+                    icon = 'https:' + cont.find_all(class_='rms_iac')[0].get('data-src')
+                except Exception:
+                    icon = None
+                try:
+                    URL = cont.find_all(class_='tilk')[0].get('href')
+                except Exception:
+                    continue
+                try:
+                    Title = cont.find_all('h2')[0].get_text(strip=True)
+                except Exception:
+                    Title = "No Title"
+                try:
+                    abstract_elem = cont.find_all(class_='b_caption')
+                    if abstract_elem and abstract_elem[0].get_text(strip=True):
+                        Abstract = abstract_elem[0].get_text(strip=True)
+                    else:
+                        Abstract = cont.find_all(class_='b_algoSlug')[0].get_text(strip=True)
+                except Exception:
+                    Abstract = "No Abstract"
+                other = []
+                try:
+                    for column in cont.find_all(class_='b_rc_gb_sub_column'):
+                        for div in column.find_all('div'):
+                            try:
+                                sub_title = div.find_all(class_='b_rc_gb_sub_title')[0].get_text(strip=True)
+                            except Exception:
+                                sub_title = "No Sub-title"
+                            try:
+                                sub_description = div.find_all(class_='b_rc_gb_text_wrapper')[0].get_text(strip=True)
+                            except Exception:
+                                sub_description = ""
+                            if sub_description:
+                                other.append({'Title': sub_title, 'Description': sub_description})
+                except Exception:
+                    other = None
+                results.append({
+                    'no': result_no,
+                    'icon': icon,
+                    'URL': URL,
+                    'Title': Title,
+                    'Abstract': Abstract,
+                    'other': other
+                })
+        return results
+    async def search_text(self, query, session):
+        """Perform a text search (regular and news) and return parsed results."""
+        real_query = query
+        news_query = query + " news"
+        # Run both searches concurrently.
+        real_html_task = asyncio.create_task(self.fetch_bing_text(real_query, session))
+        news_html_task = asyncio.create_task(self.fetch_bing_text(news_query, session))
+        real_html, news_html = await asyncio.gather(real_html_task, news_html_task)
+        if not real_html or not news_html:
+            print("Failed to retrieve one or both text searches.")
+            return None
+        real_results = self.parse_text_results(real_html)
+        news_results = self.parse_text_results(news_html)
+        return {
+            "query_results": real_results,
+            "news_results": news_results
+        }
+    # ===== Video Search Methods =====
+    async def fetch_bing_video(self, query, session):
+        """
+        Fetch Bing video search results HTML for the given query.
+        The URL below targets Bing's video search endpoint.
+        """
+        encoded_query = urllib.parse.quote_plus(query)
+        url = f'https://www.bing.com/videos/search?q={encoded_query}&adlt=off'
+        return await self.fetch_html(url, session)
+    def parse_video_results(self, html_content):
+        """
+        Parse the Bing video search HTML to extract:
+          - Video URL (from the mmeta JSON)
+          - Title (from an element with class "mc_vtvc_title")
+          - Thumbnail URL (from the mmeta JSON)
+          - Duration (if available)
+          - Source (if available)
+        """
+        soup = bs(html_content, 'lxml')
+        video_results = []
+        result_no = 0
+        # Find all divs with class "mc_vtvc" that have an mmeta attribute.
+        video_divs = soup.find_all("div", class_="mc_vtvc", attrs={"mmeta": True})
+        for div in video_divs:
+            result_no += 1
+            mmeta_raw = div.get("mmeta")
             try:
+                # Unescape and parse the JSON metadata.
+                mmeta_json = json.loads(html.unescape(mmeta_raw))
+            except Exception as e:
+                mmeta_json = {}
+                print(f"Error parsing mmeta JSON: {e}")
+            # Retrieve video URL and thumbnail from the metadata.
+            video_url = mmeta_json.get("murl") or mmeta_json.get("pgurl")
+            thumbnail = mmeta_json.get("turl")
+            # Find the title; often stored in an element with class "mc_vtvc_title".
+            title_elem = div.find(class_="mc_vtvc_title")
+            title = title_elem.get_text(strip=True) if title_elem else "No Title"
+            # Look for duration within an element that may contain a duration value.
+            duration_elem = div.find(class_="mc_bc_rc")
+            duration = duration_elem.get_text(strip=True) if duration_elem else None
+            # Optionally, extract the video source if available.
+            source_elem = div.find(class_="mc_vtvc_meta_source")
+            source = source_elem.get_text(strip=True) if source_elem else None
+            video_results.append({
+                "no": result_no,
+                "Video URL": video_url,
+                "Title": title,
+                "Thumbnail": thumbnail,
+                "Duration": duration,
+                "Source": source,
+                "Meta": mmeta_json  # Additional metadata if needed.
+            })
+        return video_results
+    async def search_video(self, query, session):
+        """Perform a video search and return parsed results."""
+        html_content = await self.fetch_bing_video(query, session)
+        if not html_content:
+            print("Failed to retrieve video search results.")
+            return None
+        video_results = self.parse_video_results(html_content)
+        return {
+            "query": query,
+            "video_results": video_results
         }
+    # ===== Image Search Methods =====
+    async def fetch_bing_image(self, query, session):
+        """Fetch Bing images search results HTML for the given query."""
+        encoded_query = urllib.parse.quote_plus(query)
+        # Using the URL structure with &ch=803.
+        url = f'https://www.bing.com/images/search?q={encoded_query}'
+        return await self.fetch_html(url, session)
+    def parse_image_results(self, html):
+        """
+        Parse the Bing images HTML using BeautifulSoup to extract detailed image data.
+        For each image result (<li> inside a <ul class="dgControl_list">), extract:
+          - JSON data from the 'm' attribute in <a class="iusc">
+          - Image resolution from the <span class="nowrap"> tag inside <div class="img_info hon">
+          - The hosting domain from the <div class="lnkw"> tag.
+        """
+        soup = bs(html, 'lxml')
+        image_results = []
+        # Find all <ul> elements that contain image results.
+        ul_lists = soup.find_all("ul", class_="dgControl_list")
+        for ul in ul_lists:
+            li_elements = ul.find_all("li")
+            for li in li_elements:
+                image_data = {}
+                try:
+                    # Extract the JSON details from the anchor tag.
+                    a_tag = li.find("a", class_="iusc")
+                    if not a_tag:
+                        continue
+                    m_json_str = a_tag.get("m")
+                    if not m_json_str:
                         continue
+                    try:
+                        # Parse the JSON string.
+                        details = json.loads(m_json_str)
+                    except Exception as e:
+                        print(f"JSON parsing error: {e}")
+                        continue
+                    # Copy all available keys from the JSON.
+                    image_data.update(details)
+                    # Extract additional details from the HTML.
+                    # Resolution: from <div class="img_info hon"> -> <span class="nowrap">
+                    img_info = li.find("div", class_="img_info")
+                    if img_info:
+                        resolution_span = img_info.find("span", class_="nowrap")
+                        if resolution_span:
+                            image_data["resolution"] = resolution_span.get_text(strip=True)
+                    # Domain: from <div class="lnkw"> -> <a> text.
+                    lnkw_div = li.find("div", class_="lnkw")
+                    if lnkw_div:
+                        domain_a = lnkw_div.find("a")
+                        if domain_a:
+                            image_data["domain"] = domain_a.get_text(strip=True)
+                    image_results.append(image_data)
+                except Exception as e:
+                    print(f"Error parsing an image result: {e}")
                     continue
+        return image_results
+    async def search_image(self, query, session):
+        """Perform an image search and return parsed results."""
+        image_html = await self.fetch_bing_image(query, session)
+        if not image_html:
+            print("Failed to retrieve image search results.")
+            return None
+        image_results = self.parse_image_results(image_html)
+        return {
+            "query": query,
+            "image_results": image_results
+        }
+    # ===== Wikipedia Search and Extraction Methods =====
+    @staticmethod
+    def format_anchor(tag):
+        """
+        Format an <a> tag into the desired output format:
+        (Link Text)[Full_URL]
+        """
+        href = tag.get("href", "")
+        if href.startswith("/") or href.startswith("#"):
+            href = "https://en.wikipedia.org" + href
+        text = tag.get_text(strip=True)
+        return f"({text})[{href}]"
+    @classmethod
+    def extract_text(cls, element):
+        """
+        Recursively extract text from an element.
+        - For <a> tags, return the formatted version and do not process its children.
+        - For other tags, process only their immediate children.
+        """
+        if isinstance(element, NavigableString):
+            return element.strip()
+        if isinstance(element, Tag):
+            if element.name in ["style", "script", "noscript"]:
+                return ""
+            if element.name == "a":
+                return cls.format_anchor(element)
+            texts = []
+            for child in element.children:
+                t = cls.extract_text(child)
+                if t:
+                    texts.append(t)
+            return " ".join(texts).strip()
+        return ""
+    @classmethod
+    def process_infobox(cls, table):
+        """
+        Process the infobox table row by row using recursive extraction.
+        Returns a list of strings where each line represents a row.
+        """
+        lines = []
+        for row in table.find_all("tr"):
+            header_cell = row.find("th")
+            data_cell = row.find("td")
+            header_text = cls.extract_text(header_cell) if header_cell else ""
+            data_text = cls.extract_text(data_cell) if data_cell else ""
+            if header_text and data_text:
+                lines.append(f"{header_text}: {data_text}")
+            elif header_text:
+                lines.append(header_text)
+            elif data_text:
+                lines.append(data_text)
+        return lines
+    @classmethod
+    def process_paragraph(cls, p_tag):
+        """
+        Process the paragraph (<p> tag) using recursive extraction.
+        """
+        return cls.extract_text(p_tag)
+    async def extract_page_content(self, url, session):
+        """
+        Fetch the Wikipedia page HTML once and extract both the infobox and a
+        paragraph with sufficient content.
+        """
+        html_content = await self.fetch_html(url, session)
+        if not html_content:
+            print("Failed to fetch the page.")
+            return None, None
+        soup = bs(html_content, "lxml")
+        # Extract infobox.
+        table = soup.find("table", class_="infobox")
+        infobox_lines = self.process_infobox(table) if table else None
+        # Extract a paragraph with sufficient content.
+        content_div = soup.find("div", class_="mw-content-ltr")
+        paragraph_text = None
+        if content_div:
+            paragraphs = content_div.find_all("p")
+            for p in paragraphs:
+                text = self.process_paragraph(p)
+                # Only choose a paragraph if it has at least 5 words (adjust threshold as needed)
+                if len(text.split()) >= 5:
+                    paragraph_text = text
+                    break
+        return infobox_lines, paragraph_text
+    async def search_wikipedia_url(self, query, session):
+        """
+        Perform a Bing search for the query and return a Wikipedia URL if found.
+        """
+        # Correct the search query by appending "wikipedia.org"
+        encoded_query = urllib.parse.quote_plus(query + " wikipedia.org")
+        bing_url = f"https://www.bing.com/search?q={encoded_query}"
+        html_content = await self.fetch_html(bing_url, session)
+        if not html_content:
+            return None
+        soup = bs(html_content, "lxml")
+        # Try different containers for search results.
+        results = soup.find_all("li", class_="b_algo") or soup.find_all("div", class_="b_algo")
+        for result in results:
+            a_tag = result.find("a", href=True)
+            if a_tag and "wikipedia.org" in a_tag["href"]:
+                return a_tag["href"]
+        return None
+    async def get_wikipedia_url_concurrently(self, query, session, num_attempts=10):
+        """
+        Launch num_attempts concurrent search tasks for a Wikipedia URL.
+        Return the first found Wikipedia URL and cancel the other tasks.
+        """
+        tasks = [asyncio.create_task(self.search_wikipedia_url(query, session)) for _ in range(num_attempts)]
+        done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
+        wiki_url = None
+        for task in done:
+            wiki_url = task.result()
+            if wiki_url:
+                break
+        for task in pending:
+            task.cancel()
+        return wiki_url
+    async def search_wikipedia(self, query, session):
+        """
+        Search Bing for a Wikipedia URL and extract its infobox and a paragraph.
+        """
+        wiki_url = await self.get_wikipedia_url_concurrently(query, session, num_attempts=10)
+        if not wiki_url:
+            print("Could not find a Wikipedia URL for the query.")
+            return None
+        print(f"\nFound Wikipedia URL: {wiki_url}\n")
+        infobox_lines, paragraph_text = await self.extract_page_content(wiki_url, session)
+        return {
+            "wikipedia_url": wiki_url,
+            "infobox": infobox_lines,
+            "paragraph": paragraph_text
+        }
+# New wrapper class providing a unified API interface.
+class BingScraperAPI:
     """
+    A unified API that exposes methods as:
+      - search(query)
+      - video(query)
+      - images(query)
+      - wikipedia(query)
+    In addition, a new asynchronous generator 'fetch' is provided to run multiple
+    providers concurrently and yield results as soon as they become available.
     """
+    def __init__(self):
+        self.scraper = BingScraper()
+    async def search(self, query):
+        """Perform a regular and news text search."""
+        async with aiohttp.ClientSession() as session:
+            return await self.scraper.search_text(query, session)
+    async def video(self, query):
+        """Perform a video search."""
+        async with aiohttp.ClientSession() as session:
+            return await self.scraper.search_video(query, session)
+    async def images(self, query):
+        """Perform an image search."""
+        async with aiohttp.ClientSession() as session:
+            return await self.scraper.search_image(query, session)
+    async def wikipedia(self, query):
+        """Perform a Wikipedia search and extract infobox and content."""
+        async with aiohttp.ClientSession() as session:
+            return await self.scraper.search_wikipedia(query, session)
+    async def fetch(self, providers, param):
+        """
+        Accepts:
+          - providers: a list of provider names (e.g., ["search", "video", "images"])
+          - param: a list of query parameters.
+            If a single parameter is provided, it is used for all providers.
+            Otherwise, the number of parameters must match the number of providers.
+        Launches tasks concurrently and yields (provider_name, result) as soon as each is available.
+        """
+        # Map provider names to the corresponding methods.
+        provider_map = {
+            "search": self.search,
+            "video": self.video,
+            "images": self.images,
+            "wikipedia": self.wikipedia
+        }
+        # Determine query parameters to use for each provider.
+        if len(param) == 1:
+            params = [param[0]] * len(providers)
+        elif len(param) == len(providers):
+            params = param
+        else:
+            raise ValueError("The number of query parameters must be either 1 or equal to the number of providers.")
+        tasks = []
+        for prov, q in zip(providers, params):
+            if prov not in provider_map:
+                raise ValueError(f"Unknown provider: {prov}")
+            # Schedule the provider call.
+            task = asyncio.create_task(provider_map[prov](q))
+            tasks.append((prov, task))
+        # Yield results as they become available.
+        for prov, task in tasks:
+            try:
+                result = await asyncio.wait_for(task, timeout=60)
+                yield prov, result
+            except Exception as e:
+                yield prov, {"error": str(e)}
+app = FastAPI(title="BingScraper API (FastAPI wrapper)")
+# Allow CORS for local dev; change origins in production
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Pydantic model for request validation
+class FetchRequest(BaseModel):
+    providers: List[str]
+    param: List[str]
+# Allowed providers -> underlying scraper call names (these call the low-level methods)
+ALLOWED_PROVIDERS = {
+    "search": "search_text",
+    "video": "search_video",
+    "images": "search_image",
+    "wikipedia": "search_wikipedia"
+}
+async def _run_provider(scraper: BingScraperAPI, provider: str, query: str, session: aiohttp.ClientSession, timeout: float = 30.0) -> Dict[str, Any]:
     """
+    Run a single provider using the underlying BingScraper methods (reusing the given session).
+    Returns a dict: {"provider": provider, "result": ...}
     """
+    # Map provider to the low-level method on scraper.scraper
+    low_level = ALLOWED_PROVIDERS.get(provider)
+    if not low_level:
+        raise ValueError(f"Unknown provider: {provider}")
+    method = getattr(scraper.scraper, low_level, None)
+    if not method or not asyncio.iscoroutinefunction(method):
+        raise RuntimeError(f"Provider method not found or not async: {low_level}")
     try:
+        # Run the provider with a per-call timeout
+        result = await asyncio.wait_for(method(query, session), timeout=timeout)
+        return {"provider": provider, "result": result}
+    except asyncio.CancelledError:
+        raise
+    except Exception as e:
+        return {"provider": provider, "error": str(e)}
+@app.post("/v1/agents/websearch")
+async def fetch_post(payload: FetchRequest):
     """
+    Accepts JSON:
+    {
+      "providers": ["search", "images"],
+      "param": ["who is elon musk", "elon musk"]
+    }
+    Returns JSON mapping provider -> result as they finish (collected).
     """
+    providers = payload.providers
+    param = payload.param
+    if not providers:
+        raise HTTPException(status_code=400, detail="providers must be a non-empty list")
+    if not param:
+        raise HTTPException(status_code=400, detail="param must be a non-empty list")
+    # Expand params list to match providers (if user passed single param)
+    if len(param) == 1:
+        params = [param[0]] * len(providers)
+    elif len(param) == len(providers):
+        params = param
+    else:
+        raise HTTPException(status_code=400, detail="param length must be 1 or equal to length of providers")
+    # Validate providers
+    for p in providers:
+        if p not in ALLOWED_PROVIDERS:
+            raise HTTPException(status_code=400, detail=f"Unsupported provider: {p}")
+    scraper_api = BingScraperAPI()
+    # Use a single aiohttp session for all provider calls (faster)
+    async with aiohttp.ClientSession() as session:
+        tasks = [
+            asyncio.create_task(_run_provider(scraper_api, prov, q, session))
+            for prov, q in zip(providers, params)
+        ]
+        results = {}
+        # Collect results as they finish
+        for coro in asyncio.as_completed(tasks):
+            res = await coro
+            prov = res.get("provider")
+            # store either result or error
+            results[prov] = res.get("result") if "result" in res else {"error": res.get("error")}
+        return JSONResponse(results)
+@app.get("/v1/agents/websearch/stream")
+async def stream(request: Request, providers: str, params: str):
+    """
+    Stream results as Server-Sent Events (SSE).
+    Query string example:
+    /stream?providers=search,images&params=who%20is%20elon%20musk,elon%20musk
+    Each SSE event will be JSON:
+    {"provider": "<provider>", "result": <result or error>}
+    """
+    prov_list = [p.strip() for p in providers.split(",") if p.strip()]
+    param_list = [p.strip() for p in params.split(",") if p.strip()]
+    if not prov_list:
+        raise HTTPException(status_code=400, detail="providers query param required")
+    if not param_list:
+        raise HTTPException(status_code=400, detail="params query param required")
+    if len(param_list) == 1:
+        param_list = [param_list[0]] * len(prov_list)
+    elif len(param_list) != len(prov_list):
+        raise HTTPException(status_code=400, detail="params must have length 1 or equal to providers length")
+    for p in prov_list:
+        if p not in ALLOWED_PROVIDERS:
+            raise HTTPException(status_code=400, detail=f"Unsupported provider: {p}")
+    scraper_api = BingScraperAPI()
+    async def event_generator():
+        # Create one session for all calls
+        async with aiohttp.ClientSession() as session:
+            tasks = [
+                asyncio.create_task(_run_provider(scraper_api, prov, q, session))
+                for prov, q in zip(prov_list, param_list)
+            ]
+            for coro in asyncio.as_completed(tasks):
+                if await request.is_disconnected():
+                    # client disconnected
+                    for t in tasks:
+                        t.cancel()
+                    break
+                res = await coro
+                # SSE format: data: <json>\n\n
+                payload = json.dumps(res, default=str)
+                yield f"data: {payload}\n\n"
+    return StreamingResponse(event_generator(), media_type="text/event-stream")
+# Optional root endpoint
+@app.get("/")
+async def root():
+    return {"msg": "BingScraper FastAPI wrapper. Use POST /fetch or GET /stream"}
+# To run:
+# uvicorn app:app --host 0.0.0.0 --port 8000 --reload