Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests, time, json
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
from flask import Flask, request, jsonify
|
| 4 |
+
|
| 5 |
+
app = Flask(__name__)
|
| 6 |
+
|
| 7 |
+
def scrape_startpage(query, n=10):
|
| 8 |
+
s = requests.Session()
|
| 9 |
+
s.headers.update({'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'keep-alive'})
|
| 10 |
+
try:
|
| 11 |
+
time.sleep(1)
|
| 12 |
+
r = s.get('https://www.startpage.com/sp/search', params={'query': query, 'cat': 'web', 'pl': 'opensearch'})
|
| 13 |
+
r.raise_for_status()
|
| 14 |
+
soup = BeautifulSoup(r.content, 'html.parser')
|
| 15 |
+
results = []
|
| 16 |
+
for c in soup.find_all('div', class_='result')[:n]:
|
| 17 |
+
t = c.find('a', class_='result-title')
|
| 18 |
+
if not t: continue
|
| 19 |
+
d = c.find('p', class_='result-description') or c.find('span', class_='result-description')
|
| 20 |
+
results.append({'title': t.get_text(strip=True), 'url': t.get('href'), 'desc': d.get_text(strip=True) if d else ''})
|
| 21 |
+
return results
|
| 22 |
+
except Exception as e:
|
| 23 |
+
print(f"Error: {e}")
|
| 24 |
+
return []
|
| 25 |
+
|
| 26 |
+
@app.route('/search', methods=['GET'])
|
| 27 |
+
def search():
|
| 28 |
+
query = request.args.get('q')
|
| 29 |
+
if not query:
|
| 30 |
+
return jsonify({'error': 'Missing query parameter "q"'}), 400
|
| 31 |
+
|
| 32 |
+
n = request.args.get('n', default=10, type=int)
|
| 33 |
+
results = scrape_startpage(query, n)
|
| 34 |
+
return jsonify({'query': query, 'results': results})
|
| 35 |
+
|
| 36 |
+
@app.route('/', methods=['GET'])
|
| 37 |
+
def health():
|
| 38 |
+
return jsonify({'status': 'running', 'message': 'GridLock search API'})
|
| 39 |
+
|
| 40 |
+
if __name__ == '__main__':
|
| 41 |
+
app.run(host='0.0.0.0', port=7860, debug=True)
|