Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,45 +18,62 @@ def fetch_text_from_url(url):
|
|
| 18 |
except Exception as e:
|
| 19 |
return None, f"Error fetching URL: {e}"
|
| 20 |
|
|
|
|
| 21 |
# Function to summarize text using T5
|
| 22 |
def summarize_t5(text, size):
|
| 23 |
-
model_name = "
|
| 24 |
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
| 25 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
| 26 |
|
| 27 |
input_text = f"summarize: {text}"
|
| 28 |
inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
|
| 29 |
|
|
|
|
| 30 |
if size == "Short":
|
| 31 |
-
max_len = 50
|
| 32 |
elif size == "Medium":
|
| 33 |
-
max_len = 100
|
| 34 |
else: # Long
|
| 35 |
-
max_len = 200
|
| 36 |
|
| 37 |
-
summary_ids = model.generate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
| 39 |
return summary
|
| 40 |
|
| 41 |
# Function to summarize text using BART
|
| 42 |
def summarize_bart(text, size):
|
| 43 |
-
model_name = "
|
| 44 |
tokenizer = BartTokenizer.from_pretrained(model_name)
|
| 45 |
model = BartForConditionalGeneration.from_pretrained(model_name)
|
| 46 |
|
| 47 |
inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
|
| 48 |
|
|
|
|
| 49 |
if size == "Short":
|
| 50 |
-
max_len = 50
|
| 51 |
elif size == "Medium":
|
| 52 |
-
max_len = 100
|
| 53 |
else: # Long
|
| 54 |
-
max_len = 200
|
| 55 |
|
| 56 |
-
summary_ids = model.generate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
| 58 |
return summary
|
| 59 |
-
|
| 60 |
# Function to convert text to speech and save as a file
|
| 61 |
def text_to_speech(text):
|
| 62 |
tts = gtts.gTTS(text)
|
|
|
|
| 18 |
except Exception as e:
|
| 19 |
return None, f"Error fetching URL: {e}"
|
| 20 |
|
| 21 |
+
# Function to summarize text using T5
|
| 22 |
# Function to summarize text using T5
|
| 23 |
def summarize_t5(text, size):
|
| 24 |
+
model_name = "C:\\Users\\zurin\\Desktop\\text summarization\\fine_tuned_t52"
|
| 25 |
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
| 26 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
| 27 |
|
| 28 |
input_text = f"summarize: {text}"
|
| 29 |
inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
|
| 30 |
|
| 31 |
+
# Define length parameters
|
| 32 |
if size == "Short":
|
| 33 |
+
min_len, max_len = 30, 50
|
| 34 |
elif size == "Medium":
|
| 35 |
+
min_len, max_len = 50, 100
|
| 36 |
else: # Long
|
| 37 |
+
min_len, max_len = 100, 200
|
| 38 |
|
| 39 |
+
summary_ids = model.generate(
|
| 40 |
+
inputs["input_ids"],
|
| 41 |
+
max_length=max_len,
|
| 42 |
+
min_length=min_len, # Use the specified min_length instead of fixed 10
|
| 43 |
+
length_penalty=1.0, # Reduced from 2.0 to allow more length variation
|
| 44 |
+
num_beams=4,
|
| 45 |
+
early_stopping=True
|
| 46 |
+
)
|
| 47 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
| 48 |
return summary
|
| 49 |
|
| 50 |
# Function to summarize text using BART
|
| 51 |
def summarize_bart(text, size):
|
| 52 |
+
model_name = "C:\\Users\\zurin\\Desktop\\text summarization\\fine_tuned_bart"
|
| 53 |
tokenizer = BartTokenizer.from_pretrained(model_name)
|
| 54 |
model = BartForConditionalGeneration.from_pretrained(model_name)
|
| 55 |
|
| 56 |
inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
|
| 57 |
|
| 58 |
+
# Define length parameters
|
| 59 |
if size == "Short":
|
| 60 |
+
min_len, max_len = 30, 50
|
| 61 |
elif size == "Medium":
|
| 62 |
+
min_len, max_len = 50, 100
|
| 63 |
else: # Long
|
| 64 |
+
min_len, max_len = 100, 200
|
| 65 |
|
| 66 |
+
summary_ids = model.generate(
|
| 67 |
+
inputs["input_ids"],
|
| 68 |
+
max_length=max_len,
|
| 69 |
+
min_length=min_len,
|
| 70 |
+
length_penalty=0.8, # Reduced from 1.0 to encourage length variation
|
| 71 |
+
num_beams=6,
|
| 72 |
+
no_repeat_ngram_size=2, # Added to prevent repetition
|
| 73 |
+
early_stopping=True
|
| 74 |
+
)
|
| 75 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
| 76 |
return summary
|
|
|
|
| 77 |
# Function to convert text to speech and save as a file
|
| 78 |
def text_to_speech(text):
|
| 79 |
tts = gtts.gTTS(text)
|