Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| from datetime import datetime, timezone | |
| from fasthtml.common import * | |
| from huggingface_hub import HfApi, hf_hub_download | |
| from starlette.responses import FileResponse | |
| from generate_newsletter import process_new_papers | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| from apscheduler.triggers.cron import CronTrigger | |
| # Initialize Hugging Face API | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| DATASET_NAME = "cmcmaster/this_week_in_rheumatology" | |
| api = HfApi(token=HF_TOKEN) | |
| # Initialize scheduler | |
| scheduler = BackgroundScheduler() | |
| # Schedule newsletter generation to run every Monday at 1 AM UTC | |
| scheduler.add_job(process_new_papers, | |
| trigger="interval", | |
| hours=6, | |
| kwargs={ | |
| 'end_date': '{{ (execution_date - timedelta(days=execution_date.weekday() + 1)).strftime("%Y-%m-%d") }}', | |
| 'test': False | |
| }, | |
| id='generate_newsletter', | |
| name='Weekly newsletter generation', | |
| replace_existing=True) | |
| css = Style(""" | |
| body { | |
| font-family: Georgia, Times, serif; | |
| line-height: 1.6; | |
| color: #333; | |
| max-width: 800px; | |
| margin: 0 auto; | |
| padding: 20px; | |
| background: #fff; | |
| } | |
| h1, h2 { | |
| color: #2c3e50; | |
| font-family: Georgia, Times, serif; | |
| } | |
| a { | |
| color: #2c3e50; | |
| text-decoration: none; | |
| } | |
| a:hover { | |
| text-decoration: underline; | |
| } | |
| ul { | |
| list-style-type: none; | |
| padding: 0; | |
| } | |
| li { | |
| margin-bottom: 10px; | |
| } | |
| .newsletter-content { | |
| margin-top: 20px; | |
| } | |
| .download-links { | |
| margin: 20px 0; | |
| } | |
| .download-link { | |
| display: inline-block; | |
| padding: 10px 20px; | |
| background-color: #2c3e50; | |
| color: white; | |
| border-radius: 3px; | |
| margin: 0 10px 10px 0; | |
| font-family: Georgia, Times, serif; | |
| } | |
| .download-link:hover { | |
| background-color: #34495e; | |
| text-decoration: none; | |
| } | |
| """) | |
| app = FastHTML(hdrs=(css, MarkdownJS(), | |
| HighlightJS( | |
| langs=['python', 'javascript', 'html', 'css']))) | |
| # Start the scheduler when the app starts | |
| async def start_scheduler(): | |
| scheduler.start() | |
| # Shut down the scheduler when the app stops | |
| async def shutdown_scheduler(): | |
| scheduler.shutdown() | |
| def get_newsletter_list(): | |
| # Fetch the list of newsletters from the Hugging Face repository | |
| files = api.list_repo_files(repo_id=DATASET_NAME, repo_type="dataset") | |
| newsletters = [f for f in files if f.endswith('newsletter.json')] | |
| return sorted(newsletters, reverse=True) | |
| def get_newsletter_content(path): | |
| # Download and parse the newsletter content | |
| content = api.hf_hub_download(repo_id=DATASET_NAME, | |
| filename=path, | |
| repo_type="dataset") | |
| with open(content, 'r') as f: | |
| return json.load(f) | |
| def check_format_exists(date: str, format: str) -> bool: | |
| """Check if a specific format exists for a given date""" | |
| try: | |
| api.hf_hub_download( | |
| repo_id=DATASET_NAME, | |
| filename=f"{date}/newsletter.{format}", | |
| repo_type="dataset" | |
| ) | |
| return True | |
| except Exception: | |
| return False | |
| def index(): | |
| newsletters = get_newsletter_list() | |
| links = [ | |
| Li( | |
| A(datetime.strptime(n.split('/')[0], '%Y%m%d').strftime('%B %d, %Y'), | |
| href=f"/newsletter/{n.split('/')[0]}")) for n in newsletters | |
| ] | |
| return Titled("This Week in Rheumatology", H2("Available Newsletters"), | |
| Ul(*links)) | |
| def newsletter(date: str): | |
| path = f"{date}/newsletter.json" | |
| try: | |
| content = get_newsletter_content(path) | |
| # Create download links div | |
| download_links = [] | |
| # Check for PDF | |
| if check_format_exists(date, "pdf"): | |
| download_links.append( | |
| A("Download PDF", href=f"/download/{date}/pdf", cls="download-link") | |
| ) | |
| # Check for EPUB | |
| if check_format_exists(date, "epub"): | |
| download_links.append( | |
| A("Download EPUB", href=f"/download/{date}/epub", cls="download-link") | |
| ) | |
| return Titled( | |
| f"This Week in Rheumatology - {content['date']}", | |
| A("Back to Index", href="/"), | |
| Div(*download_links, cls="download-links"), | |
| Div(content['content'], cls="marked")) | |
| except Exception as e: | |
| return Titled("Error", H2("Newsletter not found"), | |
| P(f"Unable to load newsletter for date: {date}"), | |
| A("Back to Index", href="/")) | |
| def download_file(date: str, format: str): | |
| try: | |
| file_path = f"{date}/newsletter.{format}" | |
| content = api.hf_hub_download(repo_id=DATASET_NAME, | |
| filename=file_path, | |
| repo_type="dataset") | |
| # Set appropriate media type and filename | |
| if format == "pdf": | |
| media_type = "application/pdf" | |
| elif format == "epub": | |
| media_type = "application/epub+zip" | |
| else: | |
| raise ValueError(f"Unsupported format: {format}") | |
| return FileResponse(content, | |
| media_type=media_type, | |
| filename=f"newsletter_{date}.{format}") | |
| except Exception as e: | |
| return Titled("Error", H2(f"{format.upper()} not found"), | |
| P(f"Unable to load {format.upper()} for date: {date}"), | |
| A("Back to Index", href="/")) | |
| serve() | |