|
|
from fastapi import FastAPI, HTTPException |
|
|
from bs4 import BeautifulSoup |
|
|
import requests |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
@app.get("/scrape") |
|
|
async def scrape_titles_and_links(url: str): |
|
|
try: |
|
|
|
|
|
response = requests.get(url) |
|
|
|
|
|
|
|
|
if response.status_code == 200: |
|
|
|
|
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
|
|
|
|
|
titles = soup.find_all('h1', class_='entry-title') |
|
|
|
|
|
|
|
|
summaries = soup.find_all('div', class_='entry-summary') |
|
|
|
|
|
|
|
|
results = [] |
|
|
|
|
|
|
|
|
for title, summary in zip(titles, summaries): |
|
|
|
|
|
title_text = title.find('a').text |
|
|
title_link = title.find('a')['href'] |
|
|
|
|
|
|
|
|
links = summary.find('p').text.split() |
|
|
|
|
|
|
|
|
links = [link for link in links if link.startswith('http')] |
|
|
|
|
|
|
|
|
results.append({ |
|
|
"title": title_text, |
|
|
"link": title_link, |
|
|
"links": links |
|
|
}) |
|
|
|
|
|
|
|
|
return {"results": results} |
|
|
|
|
|
else: |
|
|
raise HTTPException(status_code=500, detail="Failed to retrieve the webpage. Status code: " + str(response.status_code)) |
|
|
|
|
|
except Exception as e: |
|
|
raise HTTPException(status_code=500, detail="An error occurred: " + str(e)) |
|
|
|