gauthamnairy commited on
Commit
7caeb9f
·
verified ·
1 Parent(s): 8f28ea1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -30
app.py CHANGED
@@ -33,8 +33,6 @@ from pathlib import Path
33
  from dotenv import load_dotenv
34
  from requests.adapters import HTTPAdapter
35
  from urllib3.util.retry import Retry
36
- from transformers import pipeline
37
- import torch
38
 
39
  load_dotenv() # Load environment variables from .env file
40
 
@@ -126,9 +124,6 @@ ENERGY_COMPANIES = [
126
  "https://www.orano.group/en/"
127
  ]
128
 
129
- # Initialize local summarization pipeline (using facebook/bart-large-cnn)
130
- local_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
131
-
132
  def allowed_file(filename):
133
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
134
 
@@ -270,32 +265,14 @@ Remember to:
270
 
271
  def local_summarize(text):
272
  """
273
- Summarizes the given text using a local huggingface model.
274
- If the text is too long, it splits the text into manageable chunks.
275
  """
276
- # Maximum number of words (roughly) that the summarizer can handle
277
- max_words = 800
278
- words = text.split()
279
-
280
- if len(words) > max_words:
281
- # Split text into chunks of ~max_words tokens
282
- chunks = []
283
- chunk = []
284
- for word in words:
285
- chunk.append(word)
286
- if len(chunk) >= max_words:
287
- chunks.append(" ".join(chunk))
288
- chunk = []
289
- if chunk:
290
- chunks.append(" ".join(chunk))
291
-
292
- summaries = []
293
- for chunk in chunks:
294
- summary = local_summarizer(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
295
- summaries.append(summary)
296
- return " ".join(summaries)
297
- else:
298
- return local_summarizer(text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
299
 
300
  def scrape_company_news(url):
301
  """
 
33
  from dotenv import load_dotenv
34
  from requests.adapters import HTTPAdapter
35
  from urllib3.util.retry import Retry
 
 
36
 
37
  load_dotenv() # Load environment variables from .env file
38
 
 
124
  "https://www.orano.group/en/"
125
  ]
126
 
 
 
 
127
  def allowed_file(filename):
128
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
129
 
 
265
 
266
  def local_summarize(text):
267
  """
268
+ A simple extractive summarization function that doesn't require downloading models.
 
269
  """
270
+ # Simple extractive summarization
271
+ sentences = text.split('.')
272
+ # Take first 2-3 sentences as summary if available
273
+ summary_sentences = sentences[:min(3, len(sentences))]
274
+ summary = '. '.join(sentence.strip() for sentence in summary_sentences if sentence.strip())
275
+ return summary + ('.' if not summary.endswith('.') else '')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
  def scrape_company_news(url):
278
  """