Spaces:
Sleeping
Sleeping
| import os | |
| import warnings | |
| import torch | |
| import gradio as gr | |
| from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor | |
| import pdfplumber | |
| from reportlab.lib.pagesizes import letter | |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| # Suppress warnings globally | |
| warnings.filterwarnings("ignore") | |
| # Setup models | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| whisper_model_id = "openai/whisper-medium" | |
| # Load Whisper model and processor | |
| whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(whisper_model_id) | |
| whisper_processor = AutoProcessor.from_pretrained(whisper_model_id) | |
| # Create Whisper pipeline | |
| whisper_pipe = pipeline( | |
| "automatic-speech-recognition", | |
| model=whisper_model, | |
| tokenizer=whisper_processor.tokenizer, | |
| feature_extractor=whisper_processor.feature_extractor, | |
| device=device | |
| ) | |
| # Setup FLAN-T5 model and tokenizer | |
| flan_t5_model_id = "google/flan-t5-large" | |
| flan_t5_tokenizer = T5Tokenizer.from_pretrained(flan_t5_model_id) | |
| flan_t5_model = T5ForConditionalGeneration.from_pretrained(flan_t5_model_id) | |
| # Function to transcribe audio files | |
| def transcribe_audio(file): | |
| result = whisper_pipe(file) | |
| return result['text'] | |
| # Function to extract text and questions from PDF | |
| def extract_text_from_pdf(pdf_file): | |
| text = "" | |
| questions = [] | |
| with pdfplumber.open(pdf_file) as pdf: | |
| for page in pdf.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text | |
| # Extract questions based on numbering | |
| lines = page_text.split("\n") | |
| for line in lines: | |
| if line.strip() and line.strip()[0].isdigit(): | |
| questions.append(line.strip()) | |
| return text, questions | |
| # Function to generate form data with FLAN-T5 | |
| def generate_form_data(text, questions): | |
| responses = [] | |
| for question in questions: | |
| input_text = f"""The following text is a transcript from an audio recording. Read the text and answer the following question in a complete sentence.\n\nText: {text}\n\nQuestion: {question}\n\nAnswer:""" | |
| # Tokenize the input text | |
| inputs = flan_t5_tokenizer(input_text, return_tensors='pt', max_length=1024, truncation=True) | |
| # Generate the answer using the model | |
| with torch.no_grad(): | |
| outputs = flan_t5_model.generate(**inputs, max_length=100) | |
| # Decode the generated text | |
| generated_text = flan_t5_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Handle incomplete or missing answers | |
| if not generated_text.strip(): | |
| generated_text = "The answer to this question is not present in the script." | |
| elif len(generated_text.strip()) < 10: # Arbitrary threshold for short/incomplete answers | |
| input_text = f"""Based on the following transcript, provide a more detailed answer to the question.\n\nText: {text}\n\nQuestion: {question}\n\nAnswer:""" | |
| inputs = flan_t5_tokenizer(input_text, return_tensors='pt', max_length=1024, truncation=True) | |
| outputs = flan_t5_model.generate(**inputs, max_length=100) | |
| generated_text = flan_t5_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Append question and response | |
| responses.append(f"Question: {question}\nAnswer: {generated_text.strip()}") | |
| return "\n\n".join(responses) | |
| # Function to save responses to PDF | |
| def save_responses_to_pdf(responses, output_pdf_path): | |
| document = SimpleDocTemplate(output_pdf_path, pagesize=letter) | |
| styles = getSampleStyleSheet() | |
| # Custom style for responses | |
| response_style = ParagraphStyle( | |
| name='ResponseStyle', | |
| parent=styles['BodyText'], | |
| fontSize=10, | |
| spaceAfter=12 | |
| ) | |
| content = [] | |
| for index, response in enumerate(responses, start=1): | |
| # Add the response number and content | |
| heading = Paragraph(f"<b>File {index}:</b>", styles['Heading2']) | |
| response_text = Paragraph(response.replace("\n", "<br/>"), response_style) | |
| content.append(heading) | |
| content.append(Spacer(1, 6)) # Space between heading and response | |
| content.append(response_text) | |
| content.append(Spacer(1, 18)) # Space between responses | |
| document.build(content) | |
| # Gradio interface function | |
| def process_files(audio_files, pdf_file): | |
| responses = [] | |
| for audio_file in audio_files: | |
| # Transcribe audio | |
| transcribed_text = transcribe_audio(audio_file.name) | |
| # Extract text and form fields from PDF | |
| pdf_text, pdf_questions = extract_text_from_pdf(pdf_file.name) | |
| # Generate form data | |
| form_data = generate_form_data(transcribed_text, pdf_questions) | |
| responses.append(form_data) | |
| # Save all responses to a PDF | |
| output_pdf_path = "output.pdf" | |
| save_responses_to_pdf(responses, output_pdf_path) | |
| # Return the PDF path and the generated responses | |
| return output_pdf_path, "\n\n".join(responses) | |
| # Gradio interface definition | |
| interface = gr.Interface( | |
| fn=process_files, | |
| inputs=[ | |
| gr.Files(label="Upload Audio Dataset"), | |
| gr.File(label="Upload PDF File with Questions") | |
| ], | |
| outputs=[ | |
| gr.File(label="Download Output PDF"), | |
| gr.Textbox(label="Generated Responses", lines=20, placeholder="The responses will be shown here...") | |
| ], | |
| title="FillUp by Umar Majeed", | |
| description="""This is a beta version of FillUp, an application designed to auto-fill predefined forms using call data. | |
| Upload the audio files from which you want to extract text and a PDF form that contains the questions to be answered. | |
| At the end, you will receive a PDF file with the responses. | |
| For reference, you can download a sample form from [https://drive.google.com/drive/folders/13LolIqxufzysqNoGMfuCAvpA9AkbRfL7?usp=drive_link]. Use this dummy data to understand how the model works.""" | |
| ) | |
| # Launch the interface | |
| interface.launch() | |