Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -33,8 +33,6 @@ from pathlib import Path
|
|
| 33 |
from dotenv import load_dotenv
|
| 34 |
from requests.adapters import HTTPAdapter
|
| 35 |
from urllib3.util.retry import Retry
|
| 36 |
-
from transformers import pipeline
|
| 37 |
-
import torch
|
| 38 |
|
| 39 |
load_dotenv() # Load environment variables from .env file
|
| 40 |
|
|
@@ -126,9 +124,6 @@ ENERGY_COMPANIES = [
|
|
| 126 |
"https://www.orano.group/en/"
|
| 127 |
]
|
| 128 |
|
| 129 |
-
# Initialize local summarization pipeline (using facebook/bart-large-cnn)
|
| 130 |
-
local_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 131 |
-
|
| 132 |
def allowed_file(filename):
|
| 133 |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
| 134 |
|
|
@@ -270,32 +265,14 @@ Remember to:
|
|
| 270 |
|
| 271 |
def local_summarize(text):
|
| 272 |
"""
|
| 273 |
-
|
| 274 |
-
If the text is too long, it splits the text into manageable chunks.
|
| 275 |
"""
|
| 276 |
-
#
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
chunks = []
|
| 283 |
-
chunk = []
|
| 284 |
-
for word in words:
|
| 285 |
-
chunk.append(word)
|
| 286 |
-
if len(chunk) >= max_words:
|
| 287 |
-
chunks.append(" ".join(chunk))
|
| 288 |
-
chunk = []
|
| 289 |
-
if chunk:
|
| 290 |
-
chunks.append(" ".join(chunk))
|
| 291 |
-
|
| 292 |
-
summaries = []
|
| 293 |
-
for chunk in chunks:
|
| 294 |
-
summary = local_summarizer(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
|
| 295 |
-
summaries.append(summary)
|
| 296 |
-
return " ".join(summaries)
|
| 297 |
-
else:
|
| 298 |
-
return local_summarizer(text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
|
| 299 |
|
| 300 |
def scrape_company_news(url):
|
| 301 |
"""
|
|
|
|
| 33 |
from dotenv import load_dotenv
|
| 34 |
from requests.adapters import HTTPAdapter
|
| 35 |
from urllib3.util.retry import Retry
|
|
|
|
|
|
|
| 36 |
|
| 37 |
load_dotenv() # Load environment variables from .env file
|
| 38 |
|
|
|
|
| 124 |
"https://www.orano.group/en/"
|
| 125 |
]
|
| 126 |
|
|
|
|
|
|
|
|
|
|
| 127 |
def allowed_file(filename):
|
| 128 |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
| 129 |
|
|
|
|
| 265 |
|
| 266 |
def local_summarize(text):
|
| 267 |
"""
|
| 268 |
+
A simple extractive summarization function that doesn't require downloading models.
|
|
|
|
| 269 |
"""
|
| 270 |
+
# Simple extractive summarization
|
| 271 |
+
sentences = text.split('.')
|
| 272 |
+
# Take first 2-3 sentences as summary if available
|
| 273 |
+
summary_sentences = sentences[:min(3, len(sentences))]
|
| 274 |
+
summary = '. '.join(sentence.strip() for sentence in summary_sentences if sentence.strip())
|
| 275 |
+
return summary + ('.' if not summary.endswith('.') else '')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
def scrape_company_news(url):
|
| 278 |
"""
|