Spaces:
Running
Running
michaelkri commited on
Commit ·
6cb2a59
1
Parent(s): ac35357
Improved news update mechanism
Browse files- app/database.py +3 -5
- app/main.py +5 -7
- app/templates/index.html +9 -4
- app/update_news.py +18 -10
app/database.py
CHANGED
|
@@ -74,7 +74,6 @@ def add_article(session: Session, article: Article):
|
|
| 74 |
Adds a new article to the database
|
| 75 |
'''
|
| 76 |
session.add(article)
|
| 77 |
-
session.commit()
|
| 78 |
|
| 79 |
|
| 80 |
def retrieve_articles(session: Session):
|
|
@@ -86,10 +85,10 @@ def retrieve_articles(session: Session):
|
|
| 86 |
|
| 87 |
def clear_articles(session: Session):
|
| 88 |
'''
|
| 89 |
-
Deletes all articles in the database
|
| 90 |
'''
|
|
|
|
| 91 |
session.query(Article).delete()
|
| 92 |
-
session.commit()
|
| 93 |
|
| 94 |
|
| 95 |
def add_sources(session: Session, sources: list[Source]):
|
|
@@ -97,5 +96,4 @@ def add_sources(session: Session, sources: list[Source]):
|
|
| 97 |
Adds the given sources to the database
|
| 98 |
'''
|
| 99 |
for source in sources:
|
| 100 |
-
session.add(source)
|
| 101 |
-
session.commit()
|
|
|
|
| 74 |
Adds a new article to the database
|
| 75 |
'''
|
| 76 |
session.add(article)
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
def retrieve_articles(session: Session):
|
|
|
|
| 85 |
|
| 86 |
def clear_articles(session: Session):
|
| 87 |
'''
|
| 88 |
+
Deletes all articles and sources in the database
|
| 89 |
'''
|
| 90 |
+
session.query(Source).delete()
|
| 91 |
session.query(Article).delete()
|
|
|
|
| 92 |
|
| 93 |
|
| 94 |
def add_sources(session: Session, sources: list[Source]):
|
|
|
|
| 96 |
Adds the given sources to the database
|
| 97 |
'''
|
| 98 |
for source in sources:
|
| 99 |
+
session.add(source)
|
|
|
app/main.py
CHANGED
|
@@ -8,13 +8,12 @@ import threading
|
|
| 8 |
import os
|
| 9 |
|
| 10 |
|
| 11 |
-
|
| 12 |
|
| 13 |
-
DEFAULT_UPDATE_INTERVAL = 180 # in minutes
|
| 14 |
|
| 15 |
# set update interval from environment variable if available
|
| 16 |
try:
|
| 17 |
-
update_interval_str = os.getenv(
|
| 18 |
if update_interval_str is not None:
|
| 19 |
update_interval = int(update_interval_str)
|
| 20 |
else:
|
|
@@ -33,7 +32,7 @@ is_updating = threading.Lock()
|
|
| 33 |
@asynccontextmanager
|
| 34 |
async def lifespan(app: FastAPI):
|
| 35 |
# update news periodically
|
| 36 |
-
scheduler.add_job(update_news, 'interval',
|
| 37 |
scheduler.start()
|
| 38 |
yield
|
| 39 |
# stop the scheduler when closing the server
|
|
@@ -72,8 +71,7 @@ async def read_root(request: Request, background_tasks: BackgroundTasks):
|
|
| 72 |
{
|
| 73 |
'request': request,
|
| 74 |
'articles': articles,
|
| 75 |
-
'
|
| 76 |
-
' Please check back in a few minutes.'
|
| 77 |
}
|
| 78 |
)
|
| 79 |
|
|
@@ -82,6 +80,6 @@ async def read_root(request: Request, background_tasks: BackgroundTasks):
|
|
| 82 |
{
|
| 83 |
'request': request,
|
| 84 |
'articles': articles,
|
| 85 |
-
'
|
| 86 |
}
|
| 87 |
)
|
|
|
|
| 8 |
import os
|
| 9 |
|
| 10 |
|
| 11 |
+
DEFAULT_UPDATE_INTERVAL = 12 # in hours
|
| 12 |
|
|
|
|
| 13 |
|
| 14 |
# set update interval from environment variable if available
|
| 15 |
try:
|
| 16 |
+
update_interval_str = os.getenv('UPDATE_INTERVAL')
|
| 17 |
if update_interval_str is not None:
|
| 18 |
update_interval = int(update_interval_str)
|
| 19 |
else:
|
|
|
|
| 32 |
@asynccontextmanager
|
| 33 |
async def lifespan(app: FastAPI):
|
| 34 |
# update news periodically
|
| 35 |
+
scheduler.add_job(update_news, 'interval', hours=update_interval)
|
| 36 |
scheduler.start()
|
| 37 |
yield
|
| 38 |
# stop the scheduler when closing the server
|
|
|
|
| 71 |
{
|
| 72 |
'request': request,
|
| 73 |
'articles': articles,
|
| 74 |
+
'updateInProgress': True
|
|
|
|
| 75 |
}
|
| 76 |
)
|
| 77 |
|
|
|
|
| 80 |
{
|
| 81 |
'request': request,
|
| 82 |
'articles': articles,
|
| 83 |
+
'updateInProgress': is_updating.locked()
|
| 84 |
}
|
| 85 |
)
|
app/templates/index.html
CHANGED
|
@@ -48,15 +48,20 @@
|
|
| 48 |
<svg class="h-8 w-8 mr-2" viewBox="0 0 32 32" fill="black" xmlns="http://www.w3.org/2000/svg">
|
| 49 |
<circle cx="16" cy="16" r="10" />
|
| 50 |
</svg>
|
| 51 |
-
<a href="#" class="font-display text-2xl
|
| 52 |
</div>
|
| 53 |
</header>
|
| 54 |
|
| 55 |
<main class="container mx-auto px-6 py-12 max-w-4xl">
|
| 56 |
|
| 57 |
-
{% if
|
| 58 |
-
<div class="text-center
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
</div>
|
| 61 |
{% endif %}
|
| 62 |
|
|
|
|
| 48 |
<svg class="h-8 w-8 mr-2" viewBox="0 0 32 32" fill="black" xmlns="http://www.w3.org/2000/svg">
|
| 49 |
<circle cx="16" cy="16" r="10" />
|
| 50 |
</svg>
|
| 51 |
+
<a href="#" class="font-display text-2xl">Focal</a>
|
| 52 |
</div>
|
| 53 |
</header>
|
| 54 |
|
| 55 |
<main class="container mx-auto px-6 py-12 max-w-4xl">
|
| 56 |
|
| 57 |
+
{% if updateInProgress %}
|
| 58 |
+
<div class="text-center my-24">
|
| 59 |
+
<svg class="animate-spin h-8 w-8 text-primary mx-auto mb-4" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
|
| 60 |
+
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
|
| 61 |
+
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
|
| 62 |
+
</svg>
|
| 63 |
+
<h3 class="text-xl font-medium text-gray-900">Updating news articles now...</h3>
|
| 64 |
+
<p class="text-gray-500 mt-1">Please check back in a few minutes.</p>
|
| 65 |
</div>
|
| 66 |
{% endif %}
|
| 67 |
|
app/update_news.py
CHANGED
|
@@ -31,19 +31,27 @@ def update_news():
|
|
| 31 |
# rss feeds for current headlines
|
| 32 |
rss_feed_urls = read_rss_feed_urls()
|
| 33 |
|
| 34 |
-
#
|
| 35 |
with get_session() as session:
|
|
|
|
| 36 |
clear_articles(session)
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
for source in
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
if __name__ == '__main__':
|
|
|
|
| 31 |
# rss feeds for current headlines
|
| 32 |
rss_feed_urls = read_rss_feed_urls()
|
| 33 |
|
| 34 |
+
# update database
|
| 35 |
with get_session() as session:
|
| 36 |
+
# clear old articles
|
| 37 |
clear_articles(session)
|
| 38 |
|
| 39 |
+
# summarize and collect all articles
|
| 40 |
+
articles = []
|
| 41 |
+
sources = []
|
| 42 |
+
for article, article_sources in news_summary(summarizer, rss_feed_urls):
|
| 43 |
+
articles.append(article)
|
| 44 |
+
sources.extend((source, article) for source in article_sources)
|
| 45 |
+
|
| 46 |
+
# add all articles to database and commit to generate IDs
|
| 47 |
+
session.add_all(articles)
|
| 48 |
+
session.commit()
|
| 49 |
+
|
| 50 |
+
# add sources associated with their articles
|
| 51 |
+
for source, article in sources:
|
| 52 |
+
source.article_id = article.id
|
| 53 |
+
session.add_all([source for source, _ in sources])
|
| 54 |
+
session.commit()
|
| 55 |
|
| 56 |
|
| 57 |
if __name__ == '__main__':
|