Spaces:

rairo
/

QuantGrantsList

Sleeping

App Files Files Community

rairo commited on Mar 27, 2025

Commit

e4fc921

verified ·

1 Parent(s): 65acf1d

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -51

app.py CHANGED Viewed

@@ -89,68 +89,104 @@ def get_data(search_term):
             st.error(f"An error occurred for search term: {search_term}, error: {e}. Please try again.")
             return {}
 def get_data_from_url(url):
     """
-    Scrape the provided URL using Supadata and pass the page content directly to the Gemini model
-    (using raw Google model) to extract grant data in a JSON structure.
     """
     try:
-        # Try using the supadata.web.scrape method
         try:
-            web_content = supadata.web.scrape(url)
-        except TypeError as te:
-            # Fallback if an unexpected keyword argument 'type' is raised
-            if "unexpected keyword argument 'type'" in str(te):
-                st.warning("Falling back due to unexpected keyword argument 'type'.")
-                if hasattr(supadata, "scrape"):
-                    web_content = supadata.scrape(url)
-                else:
-                    st.error("Fallback method not available: supadata does not have 'scrape'.")
-                    return {}
             else:
-                raise te
-        page_content = web_content.content
-        full_prompt = (
-            "Extract the following grant data from the provided web content. "
-            "- Grant name/title\n"
-            "- Short summary\n"
-            "- Funding organization\n"
-            "- Grant value (numeric only)\n"
-            "- Application deadline\n"
-            "- Eligible countries\n"
-            "- Sector/field\n"
-            "- Eligibility criteria\n"
-            "Return in JSON format.\n\n"
-            f"Web content: {page_content}"
-        )
-        client = genai.Client(api_key=GOOGLE_API_KEY)
-        new_answer = client.models.generate_content(
-            model="models/gemini-2.0-flash-lite",
-            contents=f"{full_prompt}, return the json string and nothing else"
-        )
-        response = new_answer.text
         try:
-            # Extract the JSON string from the response
-            start_index = response.find('[')
-            end_index = response.rfind(']') + 1
-            json_string = response[start_index:end_index]
-            result = json.loads(json_string)
-        except Exception as parse_error:
-            st.error(f"Error parsing JSON from Gemini model response. Here is the response: {response}")
-            return {}
-        # If result is a list, wrap it in a dictionary with the key "grants"
-        if isinstance(result, list):
-            result = {"grants": result}
-        if not result.get("grants"):
-            st.error("No grant opportunities found in the scraped URL.")
             return {}
-        st.success(f"First grant opportunity: {result['grants'][0]}")
-        return result
-    except Exception as e:
-        st.error(f"An error occurred while scraping URL {url}: {e}")
         return {}

             st.error(f"An error occurred for search term: {search_term}, error: {e}. Please try again.")
             return {}
+import requests
+import json
+import streamlit as st
+import google.generativeai as genai
+SUPADATA_API_KEY = "your_supadata_api_key"  # Replace with actual key
+GOOGLE_API_KEY = "your_google_api_key"  # Replace with actual key
 def get_data_from_url(url):
     """
+    Scrape the provided URL using Supadata. If it fails, fall back to the Supadata API,
+    and if that fails, fall back to a direct request. Extract grant data using Gemini AI.
     """
+    page_content = None  # Placeholder for storing scraped page content
+    # **Step 1: Attempt Supadata's Built-in Scraper**
     try:
+        web_content = supadata.web.scrape(url)
+        page_content = web_content.content
+    except TypeError as te:
+        if "unexpected keyword argument 'type'" in str(te):
+            st.warning("Falling back to Supadata API due to unexpected keyword 'type' error.")
+        else:
+            st.error(f"Unexpected error in Supadata scrape: {te}")
+    # **Step 2: If Supadata's Built-in Scraper Fails, Use Supadata API**
+    if not page_content:
         try:
+            api_url = "https://api.supadata.ai/v1/web/scrape"
+            headers = {"X-API-Key": SUPADATA_API_KEY}
+            response = requests.get(api_url, headers=headers, params={"url": url})
+            if response.status_code == 200:
+                page_content = response.json().get("content", "")
             else:
+                st.error(f"Supadata API failed with status {response.status_code}")
+        except Exception as e:
+            st.error(f"Error calling Supadata API: {e}")
+    # **Step 3: If Supadata API Fails, Use Direct Web Request**
+    if not page_content:
         try:
+            r = requests.get(url, timeout=10)
+            if r.status_code == 200:
+                page_content = r.text
+            else:
+                st.error(f"Manual scraping failed with status code {r.status_code}")
+                return {}
+        except Exception as e:
+            st.error(f"Manual scraping error: {e}")
             return {}
+    # **Pass Content to Gemini AI**
+    full_prompt = (
+        "Extract the following grant data from the provided web content. "
+        "- Grant name/title\n"
+        "- Short summary\n"
+        "- Funding organization\n"
+        "- Grant value (numeric only)\n"
+        "- Application deadline\n"
+        "- Eligible countries\n"
+        "- Sector/field\n"
+        "- Eligibility criteria\n"
+        "Return in JSON format.\n\n"
+        f"Web content: {page_content}"
+    )
+    client = genai.Client(api_key=GOOGLE_API_KEY)
+    new_answer = client.models.generate_content(
+        model="models/gemini-2.0-flash-lite",
+        contents=f"{full_prompt}, return the json string and nothing else"
+    )
+    response = new_answer.text
+    # **Extract JSON Output from Gemini**
+    try:
+        start_index = response.find('[')
+        end_index = response.rfind(']') + 1
+        json_string = response[start_index:end_index]
+        result = json.loads(json_string)
+    except Exception as parse_error:
+        st.error(f"Error parsing JSON from Gemini model response. Response: {response}")
         return {}
+    # **Ensure JSON is Wrapped Correctly**
+    if isinstance(result, list):
+        result = {"grants": result}
+    if not result.get("grants"):
+        st.error("No grant opportunities found in the scraped URL.")
+        return {}
+    st.success(f"First grant opportunity: {result['grants'][0]}")
+    return result