Spaces:

iarbel
/

amazon-feature-bullets-demo

Sleeping

iarbel commited on Nov 21, 2023

Commit

1896c1d

1 Parent(s): 17d7e7d

add error handling

Files changed (2) hide show

app.py CHANGED Viewed

@@ -17,6 +17,10 @@ def asin_to_pdp(asin_or_url: str) -> dict:
     html = scrape.zyte_call(asin_url)
     asin_pdp = scrape.get_asin_pdp(BeautifulSoup(html, 'html.parser'))
     return asin_pdp

     html = scrape.zyte_call(asin_url)
     asin_pdp = scrape.get_asin_pdp(BeautifulSoup(html, 'html.parser'))
+    if not asin_pdp:
+        raise gr.Error('Input URL not found (404)')
+    elif not asin_pdp.get('title') or not asin_pdp.get('tech_data'):
+        raise gr.Error("Couldn't fetch title or technical details from input URL")
     return asin_pdp

src/scrape.py CHANGED Viewed

@@ -3,9 +3,10 @@ import os
 import requests
 from base64 import b64decode
 from bs4 import BeautifulSoup
-from typing import Dict
 Z_KEY = os.environ.get('ZYTE_KEY')
 def zyte_call(url: str) -> bytes:
@@ -22,7 +23,11 @@ def zyte_call(url: str) -> bytes:
     return http_response_body
-def get_asin_pdp(soup: BeautifulSoup) -> Dict[str, str]:
     # Get ASIN
     try:
         asin = soup.find('link', rel='canonical')['href'].split('/')[-1]

 import requests
 from base64 import b64decode
 from bs4 import BeautifulSoup
+from typing import Dict, Optional
 Z_KEY = os.environ.get('ZYTE_KEY')
+PAGE_NOT_FOUND_STR = 'page not found'
 def zyte_call(url: str) -> bytes:
     return http_response_body
+def get_asin_pdp(soup: BeautifulSoup) -> Optional[Dict[str, str]]:
+    # Check if 404
+    if PAGE_NOT_FOUND_STR in soup.find('title').text.lower():
+        return None
     # Get ASIN
     try:
         asin = soup.find('link', rel='canonical')['href'].split('/')[-1]