Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -139,5 +139,34 @@ async def root(site: str = 'abcnews.go'):
|
|
| 139 |
return {"RESULTS": ii_list}
|
| 140 |
except requests.RequestException as e:
|
| 141 |
return {"error": str(e), "status_code": 500}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
|
|
|
|
| 139 |
return {"RESULTS": ii_list}
|
| 140 |
except requests.RequestException as e:
|
| 141 |
return {"error": str(e), "status_code": 500}
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
@app.get("/extract-images-livemint")
|
| 145 |
+
async def root(site: str = 'livemint'):
|
| 146 |
+
|
| 147 |
+
try:
|
| 148 |
+
ii_list = []
|
| 149 |
+
response = supabase.table('news').select("*").eq('source', f'www.livemint.com').is_('image_url', 'null').order('published_date', desc=True).limit(15).execute()
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
for i in range(len(response.data)):
|
| 153 |
+
|
| 154 |
+
url = response.data[i]['article_url']
|
| 155 |
+
try:
|
| 156 |
+
res = requests.get(url)
|
| 157 |
+
soup = BeautifulSoup(res.text, 'html.parser')
|
| 158 |
+
results = soup.find_all(['img'])
|
| 159 |
+
url = results[18]['src']
|
| 160 |
+
ii_list.append(url)
|
| 161 |
+
|
| 162 |
+
insert_image(url, response.data[i]['id'] )
|
| 163 |
+
|
| 164 |
+
except Exception as e:
|
| 165 |
+
print(e)
|
| 166 |
+
results = None
|
| 167 |
+
|
| 168 |
+
return {"RESULTS": ii_list}
|
| 169 |
+
except requests.RequestException as e:
|
| 170 |
+
return {"error": str(e), "status_code": 500}
|
| 171 |
|
| 172 |
|