Spaces:
Runtime error
Runtime error
Add application file
Browse files
app.py
CHANGED
|
@@ -13,6 +13,8 @@ from IPython.display import Audio
|
|
| 13 |
import numpy as np
|
| 14 |
from datasets import load_dataset
|
| 15 |
import sentencepiece as spm
|
|
|
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
|
|
@@ -145,32 +147,43 @@ def extract_abstract(text_per_pagy):
|
|
| 145 |
return abstract_text
|
| 146 |
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
-
#
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
# Restituisci testo e audio
|
| 176 |
return summary, audio_file_path
|
|
|
|
| 13 |
import numpy as np
|
| 14 |
from datasets import load_dataset
|
| 15 |
import sentencepiece as spm
|
| 16 |
+
import os
|
| 17 |
+
import tempfile
|
| 18 |
|
| 19 |
|
| 20 |
|
|
|
|
| 147 |
return abstract_text
|
| 148 |
|
| 149 |
|
| 150 |
+
def main_function(uploaded_file):
|
| 151 |
+
# Controlla se un file è stato effettivamente caricato
|
| 152 |
+
if uploaded_file is None:
|
| 153 |
+
return "No file loaded", None
|
| 154 |
|
| 155 |
+
# Crea un file temporaneo per salvare il PDF caricato
|
| 156 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
|
| 157 |
+
temp_pdf.write(uploaded_file.read())
|
| 158 |
+
temp_pdf_path = temp_pdf.name
|
| 159 |
|
| 160 |
+
# Utilizza il percorso del file temporaneo per leggere e processare il PDF
|
| 161 |
+
try:
|
| 162 |
+
text_per_pagy = read_pdf(temp_pdf_path)
|
| 163 |
|
| 164 |
+
# Pulisci il testo e estrai l'abstract
|
| 165 |
+
for key, value in text_per_pagy.items():
|
| 166 |
+
cleaned_text = clean_text(' '.join(value[0]))
|
| 167 |
+
text_per_pagy[key] = cleaned_text
|
| 168 |
+
abstract_text = extract_abstract(text_per_pagy)
|
| 169 |
|
| 170 |
+
# Riassumi l'abstract
|
| 171 |
+
summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-sci-simplify-elife")
|
| 172 |
+
summary = summarizer(abstract_text, max_length=50, min_length=30, do_sample=False)[0]['summary_text']
|
| 173 |
|
| 174 |
+
# Genera l'audio dal riassunto
|
| 175 |
+
synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
| 176 |
+
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
| 177 |
+
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
| 178 |
+
speech = synthesiser(summary, forward_params={"speaker_embeddings": speaker_embedding})
|
| 179 |
|
| 180 |
+
# Salva l'audio in un file temporaneo
|
| 181 |
+
audio_file_path = "summary.wav"
|
| 182 |
+
sf.write(audio_file_path, speech["audio"], samplerate=speech["sampling_rate"])
|
| 183 |
+
|
| 184 |
+
finally:
|
| 185 |
+
# Elimina il file temporaneo
|
| 186 |
+
os.remove(temp_pdf_path)
|
| 187 |
|
| 188 |
# Restituisci testo e audio
|
| 189 |
return summary, audio_file_path
|