Update app.py
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import gradio as gr
|
|
| 2 |
from transformers import pipeline, AutoTokenizer
|
| 3 |
import nltk
|
| 4 |
from nltk.tokenize import sent_tokenize
|
|
|
|
| 5 |
|
| 6 |
# Download NLTK data
|
| 7 |
nltk.download('punkt')
|
|
@@ -23,8 +24,9 @@ summarization_models = {
|
|
| 23 |
# Initialize tokenizer
|
| 24 |
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
| 25 |
|
| 26 |
-
#
|
| 27 |
-
|
|
|
|
| 28 |
|
| 29 |
# Initialize translation pipeline
|
| 30 |
def get_translator(language):
|
|
@@ -60,11 +62,12 @@ def summarize_text(text, model_name):
|
|
| 60 |
if len(text) < 200: # Adjust the threshold as needed
|
| 61 |
print("Input text is too short for summarization. Please provide longer text.")
|
| 62 |
return ""
|
|
|
|
| 63 |
chunks = split_text(text)
|
| 64 |
summaries = []
|
| 65 |
for chunk in chunks:
|
| 66 |
try:
|
| 67 |
-
summary =
|
| 68 |
summaries.append(summary)
|
| 69 |
except Exception as e:
|
| 70 |
print(f"Error summarizing chunk: {chunk}\nError: {e}")
|
|
@@ -83,6 +86,7 @@ def translate_text(text, language):
|
|
| 83 |
return text
|
| 84 |
|
| 85 |
def process_text(input_text, model, language):
|
|
|
|
| 86 |
print(f"Input text: {input_text[:500]}...") # Show only the first 500 characters for brevity
|
| 87 |
summary = summarize_text(input_text, model)
|
| 88 |
if not summary:
|
|
@@ -96,6 +100,8 @@ def process_text(input_text, model, language):
|
|
| 96 |
print(f"Bullet Points: {bullet_points}")
|
| 97 |
translated_text = translate_text(bullet_points, language)
|
| 98 |
print(f"Translated Text: {translated_text}")
|
|
|
|
|
|
|
| 99 |
return bullet_points, translated_text
|
| 100 |
|
| 101 |
def generate_bullet_points(summary):
|
|
@@ -148,4 +154,5 @@ iface.launch()
|
|
| 148 |
|
| 149 |
|
| 150 |
|
|
|
|
| 151 |
|
|
|
|
| 2 |
from transformers import pipeline, AutoTokenizer
|
| 3 |
import nltk
|
| 4 |
from nltk.tokenize import sent_tokenize
|
| 5 |
+
import time
|
| 6 |
|
| 7 |
# Download NLTK data
|
| 8 |
nltk.download('punkt')
|
|
|
|
| 24 |
# Initialize tokenizer
|
| 25 |
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
| 26 |
|
| 27 |
+
# Helper function to initialize summarization pipeline
|
| 28 |
+
def get_summarizer(model_name):
|
| 29 |
+
return pipeline("summarization", model=model_name)
|
| 30 |
|
| 31 |
# Initialize translation pipeline
|
| 32 |
def get_translator(language):
|
|
|
|
| 62 |
if len(text) < 200: # Adjust the threshold as needed
|
| 63 |
print("Input text is too short for summarization. Please provide longer text.")
|
| 64 |
return ""
|
| 65 |
+
summarizer = get_summarizer(model_name)
|
| 66 |
chunks = split_text(text)
|
| 67 |
summaries = []
|
| 68 |
for chunk in chunks:
|
| 69 |
try:
|
| 70 |
+
summary = summarizer(chunk, max_length=150, min_length=20, do_sample=False)[0]['summary_text']
|
| 71 |
summaries.append(summary)
|
| 72 |
except Exception as e:
|
| 73 |
print(f"Error summarizing chunk: {chunk}\nError: {e}")
|
|
|
|
| 86 |
return text
|
| 87 |
|
| 88 |
def process_text(input_text, model, language):
|
| 89 |
+
start_time = time.time()
|
| 90 |
print(f"Input text: {input_text[:500]}...") # Show only the first 500 characters for brevity
|
| 91 |
summary = summarize_text(input_text, model)
|
| 92 |
if not summary:
|
|
|
|
| 100 |
print(f"Bullet Points: {bullet_points}")
|
| 101 |
translated_text = translate_text(bullet_points, language)
|
| 102 |
print(f"Translated Text: {translated_text}")
|
| 103 |
+
end_time = time.time()
|
| 104 |
+
print(f"Processing time: {end_time - start_time} seconds")
|
| 105 |
return bullet_points, translated_text
|
| 106 |
|
| 107 |
def generate_bullet_points(summary):
|
|
|
|
| 154 |
|
| 155 |
|
| 156 |
|
| 157 |
+
|
| 158 |
|