Spaces:

michaelkri
/

focal

Running

App Files Files Community

michaelkri commited on Sep 25, 2025

Commit

18cd44b

1 Parent(s): ca9a164

Improved logging

Browse files

Files changed (6) hide show

app/database.py +4 -3
app/news_fetcher.py +3 -2
app/scraper.py +5 -2
app/summarizer.py +8 -7
app/update_news.py +2 -1
test.py +0 -4

app/database.py CHANGED Viewed

@@ -4,6 +4,7 @@ from sqlalchemy.sql import func
 from contextlib import contextmanager
 import os
 import datetime
 class Base(DeclarativeBase):
@@ -41,15 +42,15 @@ TURSO_AUTH_TOKEN = os.getenv('TURSO_AUTH_TOKEN')
 # create an engine
 if not TURSO_DATABASE_URL or not TURSO_AUTH_TOKEN:
-    print('Using local SQLite database')
-    engine = create_engine('sqlite:///news.db', echo=True)
 else:
     engine = create_engine(
         f'sqlite+{TURSO_DATABASE_URL}?secure=true',
         connect_args={
             'auth_token': TURSO_AUTH_TOKEN
         },
-        echo=True,
     )
 # create tables if needed

 from contextlib import contextmanager
 import os
 import datetime
+import logging
 class Base(DeclarativeBase):
 # create an engine
 if not TURSO_DATABASE_URL or not TURSO_AUTH_TOKEN:
+    logging.info('Using local SQLite database')
+    engine = create_engine('sqlite:///news.db', echo=False)
 else:
     engine = create_engine(
         f'sqlite+{TURSO_DATABASE_URL}?secure=true',
         connect_args={
             'auth_token': TURSO_AUTH_TOKEN
         },
+        echo=False,
     )
 # create tables if needed

app/news_fetcher.py CHANGED Viewed

@@ -2,12 +2,13 @@ import feedparser
 from .summarizer import Summarizer
 from .scraper import get_articles
 from .database import Article, Source
 def topic_summary(summarizer: Summarizer, query: str, max_results: int = 5, min_cluster_size: int = 2) -> str:
-    print(f'Beginning search for \'{query}\'...')
     articles_and_urls = get_articles(query, max_results=max_results)
-    print(f'Retrieved {len(articles_and_urls)} articles')
     articles = [item[0] for item in articles_and_urls]
     urls = [item[1] for item in articles_and_urls]
     summary = summarizer.combined_summary(articles, min_cluster_size=min_cluster_size)

 from .summarizer import Summarizer
 from .scraper import get_articles
 from .database import Article, Source
+import logging
 def topic_summary(summarizer: Summarizer, query: str, max_results: int = 5, min_cluster_size: int = 2) -> str:
+    logging.debug(f'Beginning search for \'{query}\'...')
     articles_and_urls = get_articles(query, max_results=max_results)
+    logging.debug(f'Retrieved {len(articles_and_urls)} articles')
     articles = [item[0] for item in articles_and_urls]
     urls = [item[1] for item in articles_and_urls]
     summary = summarizer.combined_summary(articles, min_cluster_size=min_cluster_size)

app/scraper.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from ddgs import DDGS
 import trafilatura
 def retrieve_article(url: str) -> str:
@@ -7,7 +8,8 @@ def retrieve_article(url: str) -> str:
         page = trafilatura.fetch_url(url)
         return trafilatura.extract(page)
     except Exception as e:
-        print(e.args)
         return None
@@ -17,7 +19,8 @@ def get_articles(query, max_results=5, min_article_length=100) -> list[tuple[str
     try:
         search_results = DDGS().news(query, timelimit='d', max_results=max_results)
     except Exception as e:
-        print(e.args)
     # get article urls
     urls = set([r['url'] for r in search_results])

 from ddgs import DDGS
 import trafilatura
+import logging
 def retrieve_article(url: str) -> str:
         page = trafilatura.fetch_url(url)
         return trafilatura.extract(page)
     except Exception as e:
+        logging.debug(e.args)
+        logging.error('Error retrieving article')
         return None
     try:
         search_results = DDGS().news(query, timelimit='d', max_results=max_results)
     except Exception as e:
+        logging.debug(e.args)
+        logging.error('Error searching for articles')
     # get article urls
     urls = set([r['url'] for r in search_results])

app/summarizer.py CHANGED Viewed

@@ -3,6 +3,7 @@ from nltk import tokenize
 from sklearn.cluster import HDBSCAN
 from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np
 class Summarizer:
@@ -71,7 +72,7 @@ class Summarizer:
         # combine key sentences from each cluster
         key_sentences = []
         for i, cluster in enumerate(clusters):
-            print(f'Extracting from cluster {i + 1}...')
             top_sentences = Summarizer.rank_cluster_sentences(cluster)
             content = '\n'.join(top_sentences[:3])
             key_sentences.append(content)
@@ -79,7 +80,7 @@ class Summarizer:
         combined = ' '.join(key_sentences)
         # summarize all key sentences
-        print('Creating response...')
         summary = self.summarize(
             combined,
             min_length=60,
@@ -92,7 +93,7 @@ class Summarizer:
         if not articles:
             return None
-        print('Tokenizing into sentences...')
         # create a list of all sentences from all articles
         sentences = []
         for article in articles:
@@ -100,19 +101,19 @@ class Summarizer:
         # remove duplicate sentences
         sentences = sorted(list(set(sentences)), key=sentences.index)
-        print(f'Found {len(sentences)} unique sentences')
         if not sentences:
             return None
-        print('Creating sentence embeddings...')
         # create embeddings
         embeddings = self.create_embeddings(sentences)
-        print('Grouping sentences into clusters...')
         # group (embeddings of) sentences by similarity
         clusters = self.cluster_sentences(sentences, embeddings, min_cluster_size=min_cluster_size)
-        print(f'Created {len(clusters)} clusters')
         # summarize all clusters into a single summary
         summary = self.summarize_clusters(clusters)

 from sklearn.cluster import HDBSCAN
 from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np
+import logging
 class Summarizer:
         # combine key sentences from each cluster
         key_sentences = []
         for i, cluster in enumerate(clusters):
+            logging.debug(f'Extracting from cluster {i + 1}...')
             top_sentences = Summarizer.rank_cluster_sentences(cluster)
             content = '\n'.join(top_sentences[:3])
             key_sentences.append(content)
         combined = ' '.join(key_sentences)
         # summarize all key sentences
+        logging.debug('Creating response...')
         summary = self.summarize(
             combined,
             min_length=60,
         if not articles:
             return None
+        logging.debug('Tokenizing into sentences...')
         # create a list of all sentences from all articles
         sentences = []
         for article in articles:
         # remove duplicate sentences
         sentences = sorted(list(set(sentences)), key=sentences.index)
+        logging.debug(f'Found {len(sentences)} unique sentences')
         if not sentences:
             return None
+        logging.debug('Creating sentence embeddings...')
         # create embeddings
         embeddings = self.create_embeddings(sentences)
+        logging.debug('Grouping sentences into clusters...')
         # group (embeddings of) sentences by similarity
         clusters = self.cluster_sentences(sentences, embeddings, min_cluster_size=min_cluster_size)
+        logging.debug(f'Created {len(clusters)} clusters')
         # summarize all clusters into a single summary
         summary = self.summarize_clusters(clusters)

app/update_news.py CHANGED Viewed

@@ -4,6 +4,7 @@ from .summarizer import Summarizer
 from .news_fetcher import news_summary
 from .database import get_session, clear_articles, add_article, add_sources
 import datetime
 def read_rss_feed_urls(filename : str ='rss_feeds.txt') -> list[str]:
@@ -17,7 +18,7 @@ def read_rss_feed_urls(filename : str ='rss_feeds.txt') -> list[str]:
 def update_news():
-    print(f'Initiating news update: {datetime.datetime.now()}')
     # model to create embeddings from sentences
     embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

 from .news_fetcher import news_summary
 from .database import get_session, clear_articles, add_article, add_sources
 import datetime
+import logging
 def read_rss_feed_urls(filename : str ='rss_feeds.txt') -> list[str]:
 def update_news():
+    logging.info(f'Initiating news update: {datetime.datetime.now()}')
     # model to create embeddings from sentences
     embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

test.py DELETED Viewed

@@ -1,4 +0,0 @@
-from database import DatabaseConnection
-db = DatabaseConnection()
-print(db.retrieve_articles())
-db.close()