rairo commited on
Commit
623feb1
·
verified ·
1 Parent(s): caf1d25

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +111 -0
main.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import os
3
+ import json
4
+ import time
5
+ import subprocess
6
+ import nest_asyncio
7
+ from scrapegraphai.graphs import SearchGraph
8
+ from flask_cors import CORS, cross_origin
9
+
10
+ # Ensure Playwright installs required browsers and dependencies
11
+ subprocess.run(["playwright", "install"])
12
+ nest_asyncio.apply()
13
+
14
+ app = Flask(__name__)
15
+ CORS(app)
16
+
17
+ # Set your Google API key as an environment variable.
18
+ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
19
+ if not GOOGLE_API_KEY:
20
+ raise ValueError("GOOGLE_API_KEY environment variable is not set.")
21
+
22
+ graph_config = {
23
+ "llm": {
24
+ "api_key": GOOGLE_API_KEY,
25
+ "model": "google_genai/gemini-2.0-flash-lite",
26
+ },
27
+ "max_results": 8,
28
+ "verbose": True,
29
+ "headless": True
30
+ }
31
+
32
+ def get_data(search_term):
33
+ """
34
+ Run the SearchGraph for a given search term.
35
+ If a rate-limit error (202) occurs, wait 10 seconds and retry.
36
+ """
37
+ full_prompt = (
38
+ f"search for {search_term} grants\n\n"
39
+ "List me all grants or funds with:\n"
40
+ "- Grant name/title\n"
41
+ "- Short summary \n"
42
+ "- Funding organization\n"
43
+ "- Grant value (numeric only)\n"
44
+ "- Application deadline\n"
45
+ "- Eligible countries\n"
46
+ "- Sector/field\n"
47
+ "- Eligibility criteria\n"
48
+ "Return in JSON format."
49
+ )
50
+ try:
51
+ search_graph = SearchGraph(prompt=full_prompt, config=graph_config)
52
+ result = search_graph.run()
53
+ if not result or not result.get("grants"):
54
+ return {"error": f"No results returned for '{search_term}'. Please try again with a different search term."}
55
+ return result
56
+ except Exception as e:
57
+ err_str = str(e)
58
+ if "202" in err_str:
59
+ time.sleep(10)
60
+ try:
61
+ search_graph = SearchGraph(prompt=full_prompt, config=graph_config)
62
+ result = search_graph.run()
63
+ if not result or not result.get("grants"):
64
+ return {"error": f"No results returned for '{search_term}' after retry. Please try again with a different search term."}
65
+ return result
66
+ except Exception as e2:
67
+ return {"error": f"Retry failed for '{search_term}': {str(e2)}. Please try again later."}
68
+ else:
69
+ return {"error": f"An error occurred for '{search_term}': {str(e)}. Please try again."}
70
+
71
+ def process_multiple_search_terms(search_terms):
72
+ """
73
+ Process multiple search terms and aggregate results.
74
+ Returns a dictionary with a 'grants' key containing combined results.
75
+ """
76
+ all_data = {"grants": []}
77
+ for term in search_terms:
78
+ term = term.strip()
79
+ if not term:
80
+ continue
81
+ result = get_data(term)
82
+ if result and result.get("grants"):
83
+ all_data["grants"].extend(result["grants"])
84
+ return all_data
85
+
86
+ @app.route("/scrape", methods=["POST"])
87
+ def scrape():
88
+ """
89
+ Endpoint to scrape grant opportunities.
90
+ Expects a JSON body with the key 'search_terms' (a string with newline-separated search terms
91
+ or a list of strings). Returns JSON with the aggregated results.
92
+ """
93
+ data = request.get_json()
94
+ if not data or "search_terms" not in data:
95
+ return jsonify({"error": "Request must include 'search_terms' key."}), 400
96
+
97
+ search_terms = data["search_terms"]
98
+ # If search_terms is a string, split it by newlines.
99
+ if isinstance(search_terms, str):
100
+ search_terms = [s.strip() for s in search_terms.split("\n") if s.strip()]
101
+ elif not isinstance(search_terms, list):
102
+ return jsonify({"error": "'search_terms' must be a string or list of strings."}), 400
103
+
104
+ if not search_terms:
105
+ return jsonify({"error": "No valid search terms provided."}), 400
106
+
107
+ result = process_multiple_search_terms(search_terms)
108
+ return jsonify(result), 200
109
+
110
+ if __name__ == "__main__":
111
+ app.run(debug=True, host="0.0.0.0", port=7860)