Spaces:

spunteam
/

api-web-crawler

Sleeping

App Files Files Community

mrfirdauss commited on Oct 13, 2025

Commit

bbe93df

1 Parent(s): eced25c

fix: change json loarder

Browse files

Files changed (1) hide show

server.py +32 -22

server.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import logging
 from flask import Flask, request, jsonify
 from dotenv import load_dotenv
-import asyncio
 from app.util.gen_ai_base import GenAIBaseClient
 from app.util.browser_agent import BrowserAgent
@@ -17,32 +17,42 @@ def create_app() -> Flask:
     @app.route('/scrape', methods=['POST'])
     async def scrape():
-        body = request.get_json(force=True, silent=False)
-        logging.info(f"Headers: {dict(request.headers)}")
-        logging.info(f"Raw data: {request.data}")
-        url = body.get('url')
-        max_depth = body.get('max_depth', 2)
-        if not url:
-            return jsonify({"error": "URL is required"}), 400
-        api_key = os.getenv("GOOGLE_AI_STUDIO_API_KEY")
-        explorer = GenAIBaseClient(api_key=api_key)
-        try:
-            async with BrowserAgent(model=explorer, max_depth=max_depth) as agent:
-                root_node = await agent.run(start_url=url)
-                if not root_node:
-                    return jsonify({"error": "Exploration failed or returned no data"}), 500
         except Exception as e:
-            logging.error(f"Error during scraping: {e}")
             return jsonify({"error": str(e)}), 500
-        response_data = {
-            "link_map": {href: node.model_dump() for href, node in agent.link_map.items()},
-            "token_usage": explorer.token_usage
-        }
-        return jsonify(response_data), 200
     @app.route('/', methods=['GET'])
     def hello_world():
         return "Flask server is running.", 200

 import logging
 from flask import Flask, request, jsonify
 from dotenv import load_dotenv
+import json
 from app.util.gen_ai_base import GenAIBaseClient
 from app.util.browser_agent import BrowserAgent
     @app.route('/scrape', methods=['POST'])
     async def scrape():
+        try:
+            raw = request.get_data(as_text=True)
+            print("Raw body:", raw)
+            # Manual parse so Content-Type doesn't matter
+            body = json.loads(raw)
+            logging.info(f"Headers: {dict(request.headers)}")
+            logging.info(f"Raw data: {request.data}")
+            url = body.get('url')
+            max_depth = body.get('max_depth', 2)
+            if not url:
+                return jsonify({"error": "URL is required"}), 400
+            api_key = os.getenv("GOOGLE_AI_STUDIO_API_KEY")
+            explorer = GenAIBaseClient(api_key=api_key)
+            try:
+                async with BrowserAgent(model=explorer, max_depth=max_depth) as agent:
+                    root_node = await agent.run(start_url=url)
+                    if not root_node:
+                        return jsonify({"error": "Exploration failed or returned no data"}), 500
+            except Exception as e:
+                logging.error(f"Error during scraping: {e}")
+                return jsonify({"error": str(e)}), 500
+            response_data = {
+                "link_map": {href: node.model_dump() for href, node in agent.link_map.items()},
+                "token_usage": explorer.token_usage
+            }
+            return jsonify(response_data), 200
+        except json.JSONDecodeError as e:
+            return jsonify({"error": f"Invalid JSON: {e}"}), 400
         except Exception as e:
+            import traceback
+            traceback.print_exc()
             return jsonify({"error": str(e)}), 500
     @app.route('/', methods=['GET'])
     def hello_world():
         return "Flask server is running.", 200