Spaces:
Sleeping
Sleeping
| from flask import Flask, jsonify | |
| from bs4 import BeautifulSoup | |
| from datetime import datetime | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.options import Options | |
| import time | |
| import re | |
| app = Flask(__name__) | |
| KEYWORDS = ["élection présidentielle", "présidentielle", "élections présidentielles", | |
| "élection législative", "législative", "élections législatives"] | |
| START_URL = "https://aps.sn/politique/" | |
| def clean_text(text): | |
| if not text: | |
| return "" | |
| return re.sub(r'\s+', ' ', text).strip() | |
| def fetch_election_articles(): | |
| options = Options() | |
| options.add_argument("--headless") | |
| options.add_argument("--disable-gpu") | |
| options.add_argument("--no-sandbox") | |
| driver = webdriver.Chrome(options=options) | |
| driver.get(START_URL) | |
| time.sleep(5) | |
| soup = BeautifulSoup(driver.page_source, "html.parser") | |
| article_links = soup.select("h6.p-ttl a.ttl-link") or soup.select("a.ttl-link") | |
| results = [] | |
| for link in article_links[:50]: | |
| url = link.get("href") | |
| title = clean_text(link.text) | |
| try: | |
| driver.get(url) | |
| time.sleep(3) | |
| article_soup = BeautifulSoup(driver.page_source, "html.parser") | |
| # Récupération du contenu | |
| selectors = [ | |
| "div.td-post-content", "div.elementor-widget-theme-post-content", | |
| "div.content-inner", "div.post-content", "article", | |
| "main .entry-content", ".post-content-wrap" | |
| ] | |
| content_tag = None | |
| for selector in selectors: | |
| content_tag = article_soup.select_one(selector) | |
| if content_tag and len(content_tag.text.strip()) > 100: | |
| break | |
| content = clean_text(content_tag.text if content_tag else "") | |
| if not content: | |
| paragraphs = article_soup.select("p") | |
| content = "\n\n".join([p.text for p in paragraphs if len(p.text.strip()) > 50]) | |
| if not content or len(content) < 100: | |
| continue | |
| combined_text = f"{title.lower()} {content.lower()}" | |
| if any(kw in combined_text for kw in KEYWORDS): | |
| description = content.split("\n")[0] if "\n" in content else content[:200] | |
| results.append({ | |
| "title": title, | |
| "description": description, | |
| "content": content, | |
| "url": url | |
| }) | |
| except Exception as e: | |
| print(f"Erreur pour {title}: {e}") | |
| driver.quit() | |
| return results | |
| def index(): | |
| # Redirige vers l'API ou affiche un message simple | |
| return "API d'articles sur les élections. Utilisez /api/election-articles pour obtenir les données." | |
| def get_election_articles(): | |
| articles = fetch_election_articles() | |
| return jsonify(articles) | |
| if __name__ == '__main__': | |
| app.run(host = "0.0.0.0", debug=True, port=5001) |