Spaces:

gauthamnairy
/

Finder

Sleeping

App Files Files Community

gauthamnairy commited on Feb 23, 2025

Commit

7caeb9f

verified ·

1 Parent(s): 8f28ea1

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -30

app.py CHANGED Viewed

@@ -33,8 +33,6 @@ from pathlib import Path
 from dotenv import load_dotenv
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
-from transformers import pipeline
-import torch
 load_dotenv()  # Load environment variables from .env file
@@ -126,9 +124,6 @@ ENERGY_COMPANIES = [
     "https://www.orano.group/en/"
 ]
-# Initialize local summarization pipeline (using facebook/bart-large-cnn)
-local_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@@ -270,32 +265,14 @@ Remember to:
 def local_summarize(text):
     """
-    Summarizes the given text using a local huggingface model.
-    If the text is too long, it splits the text into manageable chunks.
     """
-    # Maximum number of words (roughly) that the summarizer can handle
-    max_words = 800
-    words = text.split()
-    if len(words) > max_words:
-        # Split text into chunks of ~max_words tokens
-        chunks = []
-        chunk = []
-        for word in words:
-            chunk.append(word)
-            if len(chunk) >= max_words:
-                chunks.append(" ".join(chunk))
-                chunk = []
-        if chunk:
-            chunks.append(" ".join(chunk))
-        summaries = []
-        for chunk in chunks:
-            summary = local_summarizer(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
-            summaries.append(summary)
-        return " ".join(summaries)
-    else:
-        return local_summarizer(text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
 def scrape_company_news(url):
     """

 from dotenv import load_dotenv
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 load_dotenv()  # Load environment variables from .env file
     "https://www.orano.group/en/"
 ]
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
 def local_summarize(text):
     """
+    A simple extractive summarization function that doesn't require downloading models.
     """
+    # Simple extractive summarization
+    sentences = text.split('.')
+    # Take first 2-3 sentences as summary if available
+    summary_sentences = sentences[:min(3, len(sentences))]
+    summary = '. '.join(sentence.strip() for sentence in summary_sentences if sentence.strip())
+    return summary + ('.' if not summary.endswith('.') else '')
 def scrape_company_news(url):
     """