broadfield-dev commited on
Commit
32711b3
·
verified ·
1 Parent(s): 3773da8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -16
app.py CHANGED
@@ -1,15 +1,21 @@
1
  import os
2
- os.system("playwright install")
3
  import re
4
  import urllib.parse
5
  import asyncio
6
  from typing import Dict, Optional
7
  from itertools import cycle
8
 
9
- import gradio as gr
 
 
 
10
  from bs4 import BeautifulSoup, NavigableString
11
  from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
12
 
 
 
 
 
13
  class CredentialRevolver:
14
  def __init__(self, proxy_string: str):
15
  self.proxies = self._parse_proxies(proxy_string)
@@ -57,6 +63,7 @@ SEARCH_ENGINES = {
57
  "Perplexity": "https://www.perplexity.ai/search?q={query}",
58
  }
59
 
 
60
  class HTML_TO_MARKDOWN_CONVERTER:
61
  def __init__(self, soup: BeautifulSoup, base_url: str):
62
  self.soup = soup
@@ -108,6 +115,7 @@ class HTML_TO_MARKDOWN_CONVERTER:
108
  return f"\n\n![{alt}]({full_src})\n\n"
109
  return inner_md
110
 
 
111
  async def perform_web_browse(action: str, query: str, browser_name: str, search_engine_name: str):
112
  browser_key = browser_name.lower()
113
  if "playwright" not in PLAYWRIGHT_STATE:
@@ -170,21 +178,54 @@ async def perform_web_browse(action: str, query: str, browser_name: str, search_
170
  if 'page' in locals() and not page.is_closed(): await page.close()
171
  if 'context' in locals(): await context.close()
172
 
173
- with gr.Blocks(title="Web Browse API", theme=gr.themes.Soft()) as demo:
174
- gr.Markdown("# Web Browse API")
175
- gr.Markdown(f"This interface exposes a stateless API endpoint (`/api/web_browse`) to fetch and parse web content. {REVOLVER.count()} proxies loaded.")
176
-
177
- action_input = gr.Radio(label="Action", choices=["Search", "Scrape URL"], value="Search")
178
- query_input = gr.Textbox(label="Query or URL", placeholder="e.g., 'best cat food' or 'www.wikipedia.org'")
179
-
180
- with gr.Row():
181
- browser_input = gr.Dropdown(label="Browser", choices=["firefox", "chromium", "webkit"], value="firefox", scale=1)
182
- search_engine_input = gr.Dropdown(label="Search Engine (if action is Search)", choices=sorted(list(SEARCH_ENGINES.keys())), value="DuckDuckGo", scale=2)
 
183
 
184
- submit_button = gr.Button("Browse", variant="primary")
185
- output_json = gr.JSON(label="API Result")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
- submit_button.click(fn=perform_web_browse, inputs=[action_input, query_input, browser_input, search_engine_input], outputs=output_json, api_name="web_browse")
 
 
 
 
 
 
 
 
 
 
188
 
 
189
  if __name__ == "__main__":
190
- demo.launch(mcp_server=True)
 
 
 
1
  import os
 
2
  import re
3
  import urllib.parse
4
  import asyncio
5
  from typing import Dict, Optional
6
  from itertools import cycle
7
 
8
+ # Install playwright if not present
9
+ os.system("playwright install")
10
+
11
+ from flask import Flask, request, jsonify
12
  from bs4 import BeautifulSoup, NavigableString
13
  from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
14
 
15
+ # --- Flask App Initialization ---
16
+ app = Flask(__name__)
17
+
18
+ # --- Credential and State Management (largely unchanged) ---
19
  class CredentialRevolver:
20
  def __init__(self, proxy_string: str):
21
  self.proxies = self._parse_proxies(proxy_string)
 
63
  "Perplexity": "https://www.perplexity.ai/search?q={query}",
64
  }
65
 
66
+ # --- HTML to Markdown Conversion (unchanged) ---
67
  class HTML_TO_MARKDOWN_CONVERTER:
68
  def __init__(self, soup: BeautifulSoup, base_url: str):
69
  self.soup = soup
 
115
  return f"\n\n![{alt}]({full_src})\n\n"
116
  return inner_md
117
 
118
+ # --- Core Web Browsing Logic (unchanged) ---
119
  async def perform_web_browse(action: str, query: str, browser_name: str, search_engine_name: str):
120
  browser_key = browser_name.lower()
121
  if "playwright" not in PLAYWRIGHT_STATE:
 
178
  if 'page' in locals() and not page.is_closed(): await page.close()
179
  if 'context' in locals(): await context.close()
180
 
181
+
182
+ # --- API Endpoint Definition ---
183
+ @app.route('/web_browse', methods=['POST'])
184
+ def web_browse():
185
+ """
186
+ API endpoint to perform a web search or scrape a URL.
187
+ This endpoint expects a JSON payload with the following parameters:
188
+ - "action": "Search" or "Scrape URL" (required)
189
+ - "query": The search term or the URL to scrape (required)
190
+ - "browser_name": "firefox", "chromium", or "webkit" (optional, default: "firefox")
191
+ - "search_engine_name": Name of the search engine (optional, default: "DuckDuckGo")
192
 
193
+ Example usage with curl:
194
+ curl -X POST http://127.0.0.1:5000/web_browse \
195
+ -H "Content-Type: application/json" \
196
+ -d '{
197
+ "action": "Search",
198
+ "query": "latest news on AI",
199
+ "browser_name": "firefox",
200
+ "search_engine_name": "Google"
201
+ }'
202
+ """
203
+ if not request.is_json:
204
+ return jsonify({"status": "error", "error_message": "Invalid input: payload must be JSON"}), 400
205
+
206
+ data = request.get_json()
207
+ action = data.get('action')
208
+ query = data.get('query')
209
+ browser_name = data.get('browser_name', 'firefox') # Default to firefox
210
+ search_engine_name = data.get('search_engine_name', 'DuckDuckGo') # Default to DuckDuckGo
211
+
212
+ if not action or not query:
213
+ return jsonify({"status": "error", "error_message": "Missing required parameters: 'action' and 'query' are mandatory."}), 400
214
 
215
+ if action not in ["Search", "Scrape URL"]:
216
+ return jsonify({"status": "error", "error_message": "Invalid 'action'. Must be 'Search' or 'Scrape URL'."}), 400
217
+
218
+ # Run the async function in the current event loop
219
+ try:
220
+ result = asyncio.run(perform_web_browse(action, query, browser_name, search_engine_name))
221
+ status_code = 200 if result.get("status") == "success" else 500
222
+ return jsonify(result), status_code
223
+ except Exception as e:
224
+ return jsonify({"status": "error", "query": query, "error_message": f"An unexpected server error occurred: {str(e)}"}), 500
225
+
226
 
227
+ # --- Main Application Runner ---
228
  if __name__ == "__main__":
229
+ print(f"Flask server starting... {REVOLVER.count()} proxies loaded.")
230
+ print("API Endpoint available at POST http://127.0.0.1:7860/web_browse")
231
+ app.run(host='0.0.0.0', port=7860, debug=True)