crawl4ai

Sleeping

App Files Files Community

DarmacSEO commited on Nov 9, 2025

Commit

40488af

verified ·

1 Parent(s): 11b634d

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -29

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ app = FastAPI(title="Crawl4AI API")
 URL_RE = re.compile(r"https?://[^\s\"'>)]+", re.IGNORECASE)
 def find_url_anywhere(obj):
-    """Znajdź pierwszy URL w obiekcie JSON, form-data albo tekście."""
     if isinstance(obj, str):
         m = URL_RE.search(obj)
         return m.group(0) if m else None
@@ -18,23 +17,19 @@ def find_url_anywhere(obj):
                 return v
         for v in obj.values():
             u = find_url_anywhere(v)
-            if u:
-                return u
     if isinstance(obj, Sequence) and not isinstance(obj, (str, bytes)):
         for it in obj:
             u = find_url_anywhere(it)
-            if u:
-                return u
     return None
 @app.get("/healthz")
 def health():
-    """Prosty test czy apka działa."""
     return {"status": "ok"}
 @app.get("/crawl")
 async def crawl_get(url: str | None = None):
-    """GET /crawl?url=https://..."""
     if not url:
         raise HTTPException(status_code=400, detail="Provide ?url=https://...")
     return await do_crawl(url)
@@ -44,56 +39,48 @@ async def crawl_post(
     request: Request,
     payload: dict | None = Body(None, example={"url": "https://example.com"})
 ):
-    """POST /crawl z dowolnym formatem wejściowym (JSON, form-data, raw)."""
     url = request.query_params.get("url")
-    # JSON body
     if not url and isinstance(payload, dict):
         url = find_url_anywhere(payload)
-    # form-data / x-www-form-urlencoded
     if not url:
         try:
             form = await request.form()
             url = find_url_anywhere(dict(form))
         except Exception:
             pass
-    # raw body (np. text/plain)
     if not url:
         raw = await request.body()
         url = find_url_anywhere(raw.decode("utf-8", errors="ignore"))
     if not url:
-        raise HTTPException(
-            status_code=400,
-            detail="No URL found. Send JSON {'url':'https://...'} or use GET /crawl?url=..."
-        )
     return await do_crawl(url)
 async def do_crawl(url: str):
-    """Właściwe pobranie treści przez Crawl4AI."""
     try:
-        cfg = CrawlerRunConfig()  # bez 'js' – zgodne z 0.7.x
         async with AsyncWebCrawler() as crawler:
             result = await crawler.arun(url=url, config=cfg)
         text_val = getattr(result, "cleaned_text", None)
-        md_val = getattr(result, "markdown", None)
-        # 🔥 ZWRACAMY WSZYSTKO + KLUCZ 'results' dla Dify
         return {
             "url": url,
             "status": "ok",
             "text": text_val,
             "markdown": md_val,
-            "results": {  # 👈 kompatybilność z Dify Marketplace
-                "url": url,
-                "text": text_val,
-                "markdown": md_val
-            }
         }
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Crawl error: {e}")

 URL_RE = re.compile(r"https?://[^\s\"'>)]+", re.IGNORECASE)
 def find_url_anywhere(obj):
     if isinstance(obj, str):
         m = URL_RE.search(obj)
         return m.group(0) if m else None
                 return v
         for v in obj.values():
             u = find_url_anywhere(v)
+            if u: return u
     if isinstance(obj, Sequence) and not isinstance(obj, (str, bytes)):
         for it in obj:
             u = find_url_anywhere(it)
+            if u: return u
     return None
 @app.get("/healthz")
 def health():
     return {"status": "ok"}
 @app.get("/crawl")
 async def crawl_get(url: str | None = None):
     if not url:
         raise HTTPException(status_code=400, detail="Provide ?url=https://...")
     return await do_crawl(url)
     request: Request,
     payload: dict | None = Body(None, example={"url": "https://example.com"})
 ):
     url = request.query_params.get("url")
     if not url and isinstance(payload, dict):
         url = find_url_anywhere(payload)
     if not url:
         try:
             form = await request.form()
             url = find_url_anywhere(dict(form))
         except Exception:
             pass
     if not url:
         raw = await request.body()
         url = find_url_anywhere(raw.decode("utf-8", errors="ignore"))
     if not url:
+        raise HTTPException(status_code=400, detail="No URL found. Send {'url':'https://...'}")
     return await do_crawl(url)
 async def do_crawl(url: str):
     try:
+        cfg = CrawlerRunConfig()  # kompatybilne z Twoją wersją crawl4ai
         async with AsyncWebCrawler() as crawler:
             result = await crawler.arun(url=url, config=cfg)
         text_val = getattr(result, "cleaned_text", None)
+        md_val   = getattr(result, "markdown", None)
+        content  = md_val or text_val or ""
+        # Odpowiedź zgodna i z Twoim API, i z wtyczką Dify:
         return {
             "url": url,
             "status": "ok",
             "text": text_val,
             "markdown": md_val,
+            # 👇 WTYCZKA MARKETPLACE patrzy na results[0].content
+            "results": [
+                {
+                    "url": url,
+                    "content": content
+                }
+            ],
+            "success": True
         }
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Crawl error: {e}" )