pos-image-api

Sleeping

App Files Files Community

rairo commited on Mar 27, 2025

Commit

1ae42b1

verified ·

1 Parent(s): 8bb8601

Update main.py

Browse files

Files changed (1) hide show

main.py +112 -14

main.py CHANGED Viewed

@@ -4,8 +4,12 @@ import json
 import time
 import subprocess
 import nest_asyncio
 from scrapegraphai.graphs import SearchGraph
-from flask_cors import CORS, cross_origin
 # Ensure Playwright installs required browsers and dependencies
 subprocess.run(["playwright", "install"])
@@ -14,11 +18,18 @@ nest_asyncio.apply()
 app = Flask(__name__)
 CORS(app)
-# Set your Google API key as an environment variable.
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
 if not GOOGLE_API_KEY:
     raise ValueError("GOOGLE_API_KEY environment variable is not set.")
 graph_config = {
     "llm": {
         "api_key": GOOGLE_API_KEY,
@@ -34,9 +45,7 @@ def get_data(search_term):
     """
     Run the SearchGraph for a given search term.
     If a rate-limit error (202) occurs, wait 10 seconds and retry.
-    Includes debugging steps to compare Flask and Streamlit behavior.
     """
     full_prompt = (
         f"search for {search_term} grants\n\n"
         "List me all grants or funds with:\n"
@@ -59,7 +68,6 @@ def get_data(search_term):
     try:
         search_graph = SearchGraph(prompt=full_prompt, config=graph_config)
         result = search_graph.run()
         print("\n=== DEBUG: Raw result from search_graph.run() ===")
         print(result)
         print("===========================================")
@@ -73,7 +81,6 @@ def get_data(search_term):
                 print("ERROR: Failed to parse JSON from search result.")
                 return {"error": "Failed to parse JSON from search result."}
-        # Check if grants data exists
         if not result or "grants" not in result or not result["grants"]:
             print(f"DEBUG: No grants found for '{search_term}'.")
             return {"error": f"No results returned for '{search_term}'. Please try again with a different search term."}
@@ -88,26 +95,20 @@ def get_data(search_term):
         if "202" in err_str:
             print("DEBUG: Rate limit (202) detected. Retrying in 10 seconds...")
             time.sleep(10)
             try:
                 search_graph = SearchGraph(prompt=full_prompt, config=graph_config)
                 result = search_graph.run()
                 print("\n=== DEBUG: Retrying search_graph.run() ===")
                 print(result)
                 print("===========================================")
                 if not result or "grants" not in result or not result["grants"]:
                     print(f"DEBUG: No grants found after retry for '{search_term}'.")
                     return {"error": f"No results returned for '{search_term}' after retry. Please try again with a different search term."}
                 print("DEBUG: Grants found on retry, returning results.")
                 return result
             except Exception as e2:
                 print(f"ERROR: Retry failed - {str(e2)}")
                 return {"error": f"Retry failed for '{search_term}': {str(e2)}. Please try again later."}
         else:
             return {"error": f"An error occurred for '{search_term}': {str(e)}. Please try again."}
@@ -127,10 +128,11 @@ def process_multiple_search_terms(search_terms):
             all_data["grants"].extend(result["grants"])
     return all_data
 @app.route("/scrape", methods=["POST"])
 def scrape():
     """
-    Endpoint to scrape grant opportunities.
     Expects a JSON body with the key 'search_terms' (a string with newline-separated search terms
     or a list of strings). Returns JSON with the aggregated results.
     """
@@ -139,7 +141,6 @@ def scrape():
         return jsonify({"error": "Request must include 'search_terms' key."}), 400
     search_terms = data["search_terms"]
-    # If search_terms is a string, split it by newlines.
     if isinstance(search_terms, str):
         search_terms = [s.strip() for s in search_terms.split("\n") if s.strip()]
     elif not isinstance(search_terms, list):
@@ -151,5 +152,102 @@ def scrape():
     result = process_multiple_search_terms(search_terms)
     return jsonify(result), 200
 if __name__ == "__main__":
     app.run(debug=True, host="0.0.0.0", port=7860)

 import time
 import subprocess
 import nest_asyncio
+import requests  # For API fallback call to Supadata
 from scrapegraphai.graphs import SearchGraph
+from flask_cors import CORS
+from google import genai
+from google.genai import types
+from supadata import Supadata, SupadataError
 # Ensure Playwright installs required browsers and dependencies
 subprocess.run(["playwright", "install"])
 app = Flask(__name__)
 CORS(app)
+# Environment variables
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
 if not GOOGLE_API_KEY:
     raise ValueError("GOOGLE_API_KEY environment variable is not set.")
+SUPADATA_API_KEY = os.environ.get("SUPADATA_API_KEY")
+if not SUPADATA_API_KEY:
+    raise ValueError("SUPADATA_API_KEY environment variable is not set.")
+# Initialize Supadata client
+supadata = Supadata(api_key=SUPADATA_API_KEY)
 graph_config = {
     "llm": {
         "api_key": GOOGLE_API_KEY,
     """
     Run the SearchGraph for a given search term.
     If a rate-limit error (202) occurs, wait 10 seconds and retry.
     """
     full_prompt = (
         f"search for {search_term} grants\n\n"
         "List me all grants or funds with:\n"
     try:
         search_graph = SearchGraph(prompt=full_prompt, config=graph_config)
         result = search_graph.run()
         print("\n=== DEBUG: Raw result from search_graph.run() ===")
         print(result)
         print("===========================================")
                 print("ERROR: Failed to parse JSON from search result.")
                 return {"error": "Failed to parse JSON from search result."}
         if not result or "grants" not in result or not result["grants"]:
             print(f"DEBUG: No grants found for '{search_term}'.")
             return {"error": f"No results returned for '{search_term}'. Please try again with a different search term."}
         if "202" in err_str:
             print("DEBUG: Rate limit (202) detected. Retrying in 10 seconds...")
             time.sleep(10)
             try:
                 search_graph = SearchGraph(prompt=full_prompt, config=graph_config)
                 result = search_graph.run()
                 print("\n=== DEBUG: Retrying search_graph.run() ===")
                 print(result)
                 print("===========================================")
                 if not result or "grants" not in result or not result["grants"]:
                     print(f"DEBUG: No grants found after retry for '{search_term}'.")
                     return {"error": f"No results returned for '{search_term}' after retry. Please try again with a different search term."}
                 print("DEBUG: Grants found on retry, returning results.")
                 return result
             except Exception as e2:
                 print(f"ERROR: Retry failed - {str(e2)}")
                 return {"error": f"Retry failed for '{search_term}': {str(e2)}. Please try again later."}
         else:
             return {"error": f"An error occurred for '{search_term}': {str(e)}. Please try again."}
             all_data["grants"].extend(result["grants"])
     return all_data
 @app.route("/scrape", methods=["POST"])
 def scrape():
     """
+    Endpoint to scrape grant opportunities using search terms.
     Expects a JSON body with the key 'search_terms' (a string with newline-separated search terms
     or a list of strings). Returns JSON with the aggregated results.
     """
         return jsonify({"error": "Request must include 'search_terms' key."}), 400
     search_terms = data["search_terms"]
     if isinstance(search_terms, str):
         search_terms = [s.strip() for s in search_terms.split("\n") if s.strip()]
     elif not isinstance(search_terms, list):
     result = process_multiple_search_terms(search_terms)
     return jsonify(result), 200
+def get_data_from_url(url):
+    """
+    Scrape the provided URL using Supadata. If it fails, fall back to the Supadata API.
+    Extract grant data using Gemini AI.
+    """
+    page_content = None  # Placeholder for storing scraped page content
+    # Step 1: Attempt Supadata's built-in scraper
+    try:
+        web_content = supadata.web.scrape(url)
+        page_content = web_content.content
+    except TypeError as te:
+        if "unexpected keyword argument 'type'" in str(te):
+            print("Falling back to Supadata API due to unexpected keyword 'type' error.")
+        else:
+            print(f"Unexpected error in Supadata scrape: {te}")
+    # Step 2: If Supadata's built-in scraper fails, use Supadata API
+    if not page_content:
+        try:
+            api_url = "https://api.supadata.ai/v1/web/scrape"
+            headers = {"X-API-Key": SUPADATA_API_KEY}
+            response = requests.get(api_url, headers=headers, params={"url": url})
+            if response.status_code == 200:
+                page_content = response.json().get("content", "")
+            else:
+                print(f"Supadata API failed with status {response.status_code}")
+                return {}
+        except Exception as e:
+            print(f"Error calling Supadata API: {e}")
+            return {}
+    # Pass content to Gemini AI
+    full_prompt = (
+        "Extract the following grant data from the provided web content. "
+        "- Grant name/title\n"
+        "- Short summary\n"
+        "- Funding organization\n"
+        "- Grant value (numeric only)\n"
+        "- Application deadline\n"
+        "- Eligible countries\n"
+        "- Sector/field\n"
+        "- Eligibility criteria\n"
+        "Return in JSON format.\n\n"
+        f"Web content: {page_content}"
+    )
+    client = genai.Client(api_key=GOOGLE_API_KEY)
+    new_answer = client.models.generate_content(
+        model="models/gemini-2.0-flash-lite",
+        contents=f"{full_prompt}, return the json string and nothing else"
+    )
+    response = new_answer.text
+    # Extract JSON output from Gemini
+    try:
+        start_index = response.find('[')
+        end_index = response.rfind(']') + 1
+        json_string = response[start_index:end_index]
+        result = json.loads(json_string)
+    except Exception as parse_error:
+        print(f"Error parsing JSON from Gemini model response. Response: {response}")
+        return {}
+    # Ensure JSON is wrapped correctly
+    if isinstance(result, list):
+        result = {"grants": result}
+    if not result.get("grants"):
+        print("No grant opportunities found in the scraped URL.")
+        return {}
+    print(f"First grant opportunity: {result['grants'][0]}")
+    return result
+@app.route("/scrape_url", methods=["POST"])
+def scrape_url():
+    """
+    Endpoint to scrape a provided URL for grant opportunities.
+    Expects a JSON body with the key 'url'.
+    Returns the scraped and processed grant data in JSON format.
+    """
+    data = request.get_json()
+    if not data or "url" not in data:
+        return jsonify({"error": "Request must include 'url' key."}), 400
+    url = data["url"]
+    result = get_data_from_url(url)
+    if not result:
+        return jsonify({"error": "Failed to scrape URL or no grants found."}), 500
+    return jsonify(result), 200
 if __name__ == "__main__":
     app.run(debug=True, host="0.0.0.0", port=7860)