clementBE commited on
Commit
49df268
·
verified ·
1 Parent(s): eb1aa3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -0
app.py CHANGED
@@ -11,6 +11,13 @@ from pydub import AudioSegment
11
  from sumy.parsers.plaintext import PlaintextParser
12
  from sumy.nlp.tokenizers import Tokenizer
13
  from sumy.summarizers.lex_rank import LexRankSummarizer
 
 
 
 
 
 
 
14
 
15
  # --- Model definitions ---
16
  MODEL_SIZES = {
@@ -40,6 +47,9 @@ def get_model_pipeline(model_name, progress):
40
 
41
  # --- Extractive summary ---
42
  def extractive_summary(text, sentences_count=7):
 
 
 
43
  parser = PlaintextParser.from_string(text, Tokenizer("french"))
44
  summarizer = LexRankSummarizer()
45
  summary = summarizer(parser.document, sentences_count)
 
11
  from sumy.parsers.plaintext import PlaintextParser
12
  from sumy.nlp.tokenizers import Tokenizer
13
  from sumy.summarizers.lex_rank import LexRankSummarizer
14
+ import nltk
15
+
16
+ # --- Ensure NLTK punkt tokenizer is downloaded ---
17
+ try:
18
+ nltk.data.find("tokenizers/punkt")
19
+ except LookupError:
20
+ nltk.download("punkt")
21
 
22
  # --- Model definitions ---
23
  MODEL_SIZES = {
 
47
 
48
  # --- Extractive summary ---
49
  def extractive_summary(text, sentences_count=7):
50
+ """
51
+ Summarize the text using LexRank (extractive summarization)
52
+ """
53
  parser = PlaintextParser.from_string(text, Tokenizer("french"))
54
  summarizer = LexRankSummarizer()
55
  summary = summarizer(parser.document, sentences_count)