Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import soundfile as sf | |
| from transformers import pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan | |
| import pdfplumber | |
| import tqdm | |
| import soundfile as sf | |
| # Initialize summarization pipeline globally (load once) | |
| summarizer = pipeline("summarization", model="t5-large") | |
| def extract_abstract(path): | |
| with pdfplumber.open(path) as pdf: | |
| text = "" | |
| for page in tqdm.tqdm(pdf.pages): | |
| txt = page.extract_text(x_tolerance=2) | |
| if txt: | |
| text += txt | |
| abstract_start = text.find("Abstract") | |
| if text.find("Introduction") != -1: | |
| abstract_end = text.find("Introduction") | |
| else: | |
| abstract_end = text.find("Contents") | |
| abstract = text[abstract_start+len("Abstract"):abstract_end].strip() | |
| return abstract | |
| def summarize_text(text): | |
| # Add prompt to encourage single sentence summary | |
| prompt = "Summarize the following text in one sentence: " + text | |
| result = summarizer(prompt, max_length=68, min_length=40, do_sample=False, num_beams=1, early_stopping=True) | |
| summary = result[0]['summary_text'].replace(" .", ",").replace(" . ", ", ").strip() | |
| if not summary.endswith("."): | |
| summary += "." | |
| return summary | |
| def pdf_to_speech(file): | |
| # Gradio file input provides file.name as local path | |
| abstract = extract_abstract(file.name) | |
| summary = summarize_text(abstract) | |
| processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") | |
| model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") | |
| vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") | |
| inputs = processor(text=summary, return_tensors="pt") | |
| speaker_embeddings = torch.zeros(1, 512) # Neutral speaker embedding | |
| speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) | |
| speech_np = speech.squeeze().cpu().numpy() | |
| wav_output_path = "output.wav" | |
| sf.write(wav_output_path, speech_np, 16000) | |
| # Return path of wav file for Gradio audio output and download | |
| return wav_output_path | |
| # Create Gradio interface with file input and audio output | |
| demo = gr.Interface( | |
| fn=pdf_to_speech, | |
| inputs=gr.File(file_types=[".pdf"]), | |
| outputs=gr.Audio(type="filepath", label="Generated Audio"), | |
| title="PDF to Speech summary", | |
| description="Upload a PDF and get a spoken summary audio output." | |
| ) | |
| demo.launch(debug=True, prevent_thread_lock=True, share=True) |