KJ24 commited on
Commit
bea80f3
·
verified ·
1 Parent(s): 2ce25e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -8
app.py CHANGED
@@ -1,6 +1,8 @@
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
- from newspaper import Article
 
 
4
 
5
  app = FastAPI()
6
 
@@ -10,15 +12,24 @@ class URLRequest(BaseModel):
10
  @app.post("/extract")
11
  def extract_article(request: URLRequest):
12
  try:
13
- article = Article(request.url)
14
- article.download()
15
- article.parse()
 
 
 
 
 
 
 
 
 
 
16
 
17
  return {
18
- "title": article.title,
19
- "content": article.text,
20
- "author": article.authors,
21
- "date": str(article.publish_date) if article.publish_date else None
22
  }
 
23
  except Exception as e:
24
  raise HTTPException(status_code=500, detail=str(e))
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ import html2text
6
 
7
  app = FastAPI()
8
 
 
12
  @app.post("/extract")
13
  def extract_article(request: URLRequest):
14
  try:
15
+ response = requests.get(request.url)
16
+ response.raise_for_status()
17
+ soup = BeautifulSoup(response.text, 'html.parser')
18
+
19
+ # Chercher le bloc principal d'article (à affiner si besoin)
20
+ article_tag = soup.find('article') or soup.find('main') or soup.body
21
+
22
+ html_content = str(article_tag)
23
+
24
+ # Convertir HTML → Markdown
25
+ markdown = html2text.html2text(html_content)
26
+
27
+ title = soup.title.string if soup.title else "Untitled"
28
 
29
  return {
30
+ "title": title,
31
+ "markdown": markdown
 
 
32
  }
33
+
34
  except Exception as e:
35
  raise HTTPException(status_code=500, detail=str(e))