| import os | |
| import tempfile | |
| import numpy as np | |
| from subprocess import Popen, PIPE | |
| import torch | |
| import gradio as gr | |
| from pydub import AudioSegment | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
| from transformers.pipelines.audio_utils import ffmpeg_read | |
| from sentence_transformers import SentenceTransformer, util | |
| import spacy | |
| import spacy.cli | |
| spacy.cli.download("en_core_web_sm") | |
| # Constants | |
| MODEL_NAME = "openai/whisper-large-v3-turbo" | |
| BATCH_SIZE = 8 | |
| FILE_LIMIT_MB = 1000 | |
| device = 0 if torch.cuda.is_available() else "cpu" | |
| # Whisper pipeline | |
| whisper_pipeline = pipeline( | |
| task="automatic-speech-recognition", | |
| model=MODEL_NAME, | |
| chunk_length_s=30, | |
| device=device, | |
| ) | |
| # NLP model and other helpers | |
| nlp = spacy.load("en_core_web_sm") | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Summarization model | |
| summarizer_model_name = "Mahalingam/DistilBart-Med-Summary" | |
| tokenizer = AutoTokenizer.from_pretrained(summarizer_model_name) | |
| summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(summarizer_model_name) | |
| summarizer = pipeline("summarization", model=summarizer_model, tokenizer=tokenizer) | |
| # SOAP prompts and embeddings | |
| soap_prompts = { | |
| "subjective": "Personal reports, symptoms described by patients, or personal health concerns. Details reflecting individual symptoms or health descriptions.", | |
| "objective": "Observable facts, clinical findings, professional observations, specific medical specialties, and diagnoses.", | |
| "assessment": "Clinical assessments, expertise-based opinions on conditions, and significance of medical interventions. Focused on medical evaluations or patient condition summaries.", | |
| "plan": "Future steps, recommendations for treatment, follow-up instructions, and healthcare management plans." | |
| } | |
| soap_embeddings = {section: embedder.encode(prompt, convert_to_tensor=True) for section, prompt in soap_prompts.items()} | |
| # Convert MP4 to MP3 | |
| def convert_mp4_to_mp3(mp4_path, mp3_path): | |
| try: | |
| audio = AudioSegment.from_file(mp4_path, format="mp4") | |
| audio.export(mp3_path, format="mp3") | |
| except Exception as e: | |
| raise RuntimeError(f"Error converting MP4 to MP3: {e}") | |
| # Transcribe audio | |
| def transcribe_audio(audio_path): | |
| try: | |
| if not os.path.exists(audio_path): | |
| raise FileNotFoundError(f"Audio file not found: {audio_path}") | |
| # Read and process the audio file | |
| audio_array = ffmpeg_read(audio_path, whisper_pipeline.feature_extractor.sampling_rate) | |
| # Ensure audio data is a numpy array of type float32 | |
| if not isinstance(audio_array, np.ndarray): | |
| raise TypeError("Audio data should be a numpy array.") | |
| audio_array = audio_array.astype(np.float32) | |
| # Create input dictionary for Whisper | |
| inputs = { | |
| "array": audio_array, | |
| "sampling_rate": whisper_pipeline.feature_extractor.sampling_rate, | |
| } | |
| # Perform transcription | |
| result = whisper_pipeline(inputs, batch_size=BATCH_SIZE, return_timestamps=False) | |
| return result["text"] | |
| except Exception as e: | |
| return f"Error during transcription: {e}" | |
| # Classify the sentence to the correct SOAP section | |
| def classify_sentence(sentence): | |
| similarities = {section: util.pytorch_cos_sim(embedder.encode(sentence), soap_embeddings[section]) for section in soap_prompts.keys()} | |
| return max(similarities, key=similarities.get) | |
| # Summarize the section if it's too long | |
| def summarize_section(section_text): | |
| if len(section_text.split()) < 50: | |
| return section_text | |
| target_length = int(len(section_text.split()) * 0.50) | |
| inputs = tokenizer.encode(section_text, return_tensors="pt", truncation=True, max_length=1024) | |
| summary_ids = summarizer_model.generate( | |
| inputs, | |
| max_length=target_length, | |
| min_length=int(target_length * 0.45), | |
| length_penalty=1.0, | |
| num_beams=4 | |
| ) | |
| return tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
| # Analyze the SOAP content and divide into sections | |
| def soap_analysis(text): | |
| doc = nlp(text) | |
| soap_note = {section: "" for section in soap_prompts.keys()} | |
| for sentence in doc.sents: | |
| section = classify_sentence(sentence.text) | |
| soap_note[section] += sentence.text + " " | |
| # Summarize each section of the SOAP note | |
| for section in soap_note: | |
| soap_note[section] = summarize_section(soap_note[section].strip()) | |
| return format_soap_output(soap_note) | |
| # Format the SOAP note output | |
| def format_soap_output(soap_note): | |
| return ( | |
| f"Subjective:\n{soap_note['subjective']}\n\n" | |
| f"Objective:\n{soap_note['objective']}\n\n" | |
| f"Assessment:\n{soap_note['assessment']}\n\n" | |
| f"Plan:\n{soap_note['plan']}\n" | |
| ) | |
| # Process file function for audio/video to SOAP | |
| def process_file(file, user_prompt): | |
| # Determine file type and convert if necessary | |
| if file.name.endswith(".mp4"): | |
| temp_mp3_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name | |
| try: | |
| convert_mp4_to_mp3(file.name, temp_mp3_path) | |
| audio_path = temp_mp3_path | |
| except Exception as e: | |
| return f"Error during MP4 to MP3 conversion: {e}", "", "" | |
| else: | |
| audio_path = file.name | |
| # Transcribe audio | |
| transcription = transcribe_audio(audio_path) | |
| print("Transcribed Text: ", transcription) | |
| # Perform SOAP analysis | |
| soap_note = soap_analysis(transcription) | |
| print("SOAP Notes: ", soap_note) | |
| # Clean up temporary files | |
| if file.name.endswith(".mp4"): | |
| os.remove(temp_mp3_path) | |
| return soap_note | |
| # Process text function for text input to SOAP | |
| def process_text(text, user_prompt): | |
| soap_note = soap_analysis(text) | |
| print(soap_note) | |
| return soap_note | |
| # Gradio interface | |
| def launch_gradio(): | |
| with gr.Blocks(theme=gr.themes.Default()) as demo: | |
| gr.Markdown("# Enhanced Video to SOAP Note Generator") | |
| with gr.Tab("Audio/Video File to SOAP"): | |
| gr.Interface( | |
| fn=process_file, | |
| inputs=[gr.File(label="Upload Audio/Video File"), gr.Textbox(label="Enter Prompt for Template", placeholder="Enter a detailed prompt...", lines=6)], | |
| outputs=[ | |
| gr.Textbox(label="SOAP Note"), | |
| ], | |
| ) | |
| with gr.Tab("Text Input to SOAP"): | |
| gr.Interface( | |
| fn=process_text, | |
| inputs=[gr.Textbox(label="Enter Text", placeholder="Enter medical notes...", lines=6), gr.Textbox(label="Enter Prompt for Template", placeholder="Enter a detailed prompt...", lines=6)], | |
| outputs=[ | |
| gr.Textbox(label="SOAP Note"), | |
| ], | |
| ) | |
| demo.launch(share=True, debug=True) | |
| # Run the Gradio app | |
| if __name__ == "__main__": | |
| launch_gradio() | |
| # import os | |
| # import tempfile | |
| # from subprocess import Popen, PIPE | |
| # import torch | |
| # import gradio as gr | |
| # from pydub import AudioSegment | |
| # from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
| # from transformers.pipelines.audio_utils import ffmpeg_read | |
| # from sentence_transformers import SentenceTransformer, util | |
| # import spacy | |
| # import spacy.cli | |
| # spacy.cli.download("en_core_web_sm") | |
| # # Constants | |
| # MODEL_NAME = "openai/whisper-large-v3-turbo" | |
| # BATCH_SIZE = 8 | |
| # FILE_LIMIT_MB = 1000 | |
| # device = 0 if torch.cuda.is_available() else "cpu" | |
| # # Whisper pipeline | |
| # whisper_pipeline = pipeline( | |
| # task="automatic-speech-recognition", | |
| # model=MODEL_NAME, | |
| # chunk_length_s=30, | |
| # device=device, | |
| # ) | |
| # # NLP model and other helpers | |
| # nlp = spacy.load("en_core_web_sm") | |
| # embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| # # Summarization model | |
| # summarizer_model_name = "Mahalingam/DistilBart-Med-Summary" | |
| # tokenizer = AutoTokenizer.from_pretrained(summarizer_model_name) | |
| # summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(summarizer_model_name) | |
| # summarizer = pipeline("summarization", model=summarizer_model, tokenizer=tokenizer) | |
| # # SOAP prompts and embeddings | |
| # soap_prompts = { | |
| # "subjective": "Personal reports, symptoms described by patients, or personal health concerns. Details reflecting individual symptoms or health descriptions.", | |
| # "objective": "Observable facts, clinical findings, professional observations, specific medical specialties, and diagnoses.", | |
| # "assessment": "Clinical assessments, expertise-based opinions on conditions, and significance of medical interventions. Focused on medical evaluations or patient condition summaries.", | |
| # "plan": "Future steps, recommendations for treatment, follow-up instructions, and healthcare management plans." | |
| # } | |
| # soap_embeddings = {section: embedder.encode(prompt, convert_to_tensor=True) for section, prompt in soap_prompts.items()} | |
| # # Convert MP4 to MP3 | |
| # def convert_mp4_to_mp3(mp4_path, mp3_path): | |
| # try: | |
| # audio = AudioSegment.from_file(mp4_path, format="mp4") | |
| # audio.export(mp3_path, format="mp3") | |
| # except Exception as e: | |
| # raise RuntimeError(f"Error converting MP4 to MP3: {e}") | |
| # # Transcribe audio | |
| # def transcribe_audio(audio_path): | |
| # try: | |
| # if not os.path.exists(audio_path): | |
| # raise FileNotFoundError(f"Audio file not found: {audio_path}") | |
| # # Read the audio file and prepare inputs for Whisper | |
| # inputs = ffmpeg_read(audio_path, whisper_pipeline.feature_extractor.sampling_rate) | |
| # inputs = {"array": inputs, "sampling_rate": whisper_pipeline.feature_extractor.sampling_rate} | |
| # # Perform transcription using Whisper | |
| # result = whisper_pipeline(inputs, batch_size=BATCH_SIZE, return_timestamps=False) | |
| # return result["text"] | |
| # except Exception as e: | |
| # return f"Error during transcription: {e}" | |
| # # Classify the sentence to the correct SOAP section | |
| # def classify_sentence(sentence): | |
| # similarities = {section: util.pytorch_cos_sim(embedder.encode(sentence), soap_embeddings[section]) for section in soap_prompts.keys()} | |
| # return max(similarities, key=similarities.get) | |
| # # Summarize the section if it's too long | |
| # def summarize_section(section_text): | |
| # if len(section_text.split()) < 50: | |
| # return section_text | |
| # target_length = int(len(section_text.split()) * 0.50) | |
| # inputs = tokenizer.encode(section_text, return_tensors="pt", truncation=True, max_length=1024) | |
| # summary_ids = summarizer_model.generate( | |
| # inputs, | |
| # max_length=target_length, | |
| # min_length=int(target_length * 0.45), | |
| # length_penalty=1.0, | |
| # num_beams=4 | |
| # ) | |
| # return tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
| # # Analyze the SOAP content and divide into sections | |
| # def soap_analysis(text): | |
| # doc = nlp(text) | |
| # soap_note = {section: "" for section in soap_prompts.keys()} | |
| # for sentence in doc.sents: | |
| # section = classify_sentence(sentence.text) | |
| # soap_note[section] += sentence.text + " " | |
| # # Summarize each section of the SOAP note | |
| # for section in soap_note: | |
| # soap_note[section] = summarize_section(soap_note[section].strip()) | |
| # return format_soap_output(soap_note) | |
| # # Format the SOAP note output | |
| # def format_soap_output(soap_note): | |
| # return ( | |
| # f"Subjective:\n{soap_note['subjective']}\n\n" | |
| # f"Objective:\n{soap_note['objective']}\n\n" | |
| # f"Assessment:\n{soap_note['assessment']}\n\n" | |
| # f"Plan:\n{soap_note['plan']}\n" | |
| # ) | |
| # # Process file function for audio/video to SOAP | |
| # def process_file(file, user_prompt): | |
| # # Determine file type and convert if necessary | |
| # if file.name.endswith(".mp4"): | |
| # temp_mp3_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name | |
| # try: | |
| # convert_mp4_to_mp3(file.name, temp_mp3_path) | |
| # audio_path = temp_mp3_path | |
| # except Exception as e: | |
| # return f"Error during MP4 to MP3 conversion: {e}", "", "" | |
| # else: | |
| # audio_path = file.name | |
| # # Transcribe audio | |
| # transcription = transcribe_audio(audio_path) | |
| # print("Transcribed Text: ", transcription) | |
| # # Perform SOAP analysis | |
| # soap_note = soap_analysis(transcription) | |
| # print("SOAP Notes: ", soap_note) | |
| # # # Generate template and JSON using LLaMA | |
| # # template_output = llama_query(user_prompt, soap_note) | |
| # # print("Template: ", template_output) | |
| # # json_output = llama_convert_to_json(template_output) | |
| # # Clean up temporary files | |
| # if file.name.endswith(".mp4"): | |
| # os.remove(temp_mp3_path) | |
| # return soap_note#, template_output, json_output | |
| # # Process text function for text input to SOAP | |
| # def process_text(text, user_prompt): | |
| # soap_note = soap_analysis(text) | |
| # print(soap_note) | |
| # # template_output = llama_query(user_prompt, soap_note) | |
| # # print(template_output) | |
| # # json_output = llama_convert_to_json(template_output) | |
| # return soap_note#, template_output, json_output | |
| # # # Llama query function | |
| # # def llama_query(user_prompt, soap_note, model="llama3.2"): | |
| # # combined_prompt = f"User Instructions:\n{user_prompt}\n\nContext:\n{soap_note}" | |
| # # try: | |
| # # process = Popen(['ollama', 'run', model], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True, encoding='utf-8') | |
| # # stdout, stderr = process.communicate(input=combined_prompt) | |
| # # if process.returncode != 0: | |
| # # return f"Error: {stderr.strip()}" | |
| # # return stdout.strip() | |
| # # except Exception as e: | |
| # # return f"Unexpected error: {str(e)}" | |
| # # # Convert the response to JSON format | |
| # # def llama_convert_to_json(template_output, model="llama3.2"): | |
| # # json_prompt = f"Convert the following template into a structured JSON format:\n\n{template_output}" | |
| # # try: | |
| # # process = Popen(['ollama', 'run', model], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True, encoding='utf-8') | |
| # # stdout, stderr = process.communicate(input=json_prompt) | |
| # # if process.returncode != 0: | |
| # # return f"Error: {stderr.strip()}" | |
| # # return stdout.strip() # Assuming the model outputs a valid JSON string | |
| # # except Exception as e: | |
| # # return f"Unexpected error: {str(e)}" | |
| # # Gradio interface | |
| # def launch_gradio(): | |
| # with gr.Blocks(theme=gr.themes.Default()) as demo: | |
| # gr.Markdown("# Enhanced Video to SOAP Note Generator") | |
| # with gr.Tab("Audio/Video File to SOAP"): | |
| # gr.Interface( | |
| # fn=process_file, | |
| # inputs=[gr.File(label="Upload Audio/Video File"), gr.Textbox(label="Enter Prompt for Template", placeholder="Enter a detailed prompt...", lines=6)], | |
| # outputs=[ | |
| # gr.Textbox(label="SOAP Note"), | |
| # # gr.Textbox(label="Generated Template from LLaMA"), | |
| # # gr.Textbox(label="JSON Output") | |
| # ], | |
| # ) | |
| # with gr.Tab("Text Input to SOAP"): | |
| # gr.Interface( | |
| # fn=process_text, | |
| # inputs=[gr.Textbox(label="Enter Text", placeholder="Enter medical notes...", lines=6), gr.Textbox(label="Enter Prompt for Template", placeholder="Enter a detailed prompt...", lines=6)], | |
| # outputs=[ | |
| # gr.Textbox(label="SOAP Note"), | |
| # # gr.Textbox(label="Generated Template from LLaMA"), | |
| # # gr.Textbox(label="JSON Output") | |
| # ], | |
| # ) | |
| # demo.launch(share=True, debug=True) | |
| # # Run the Gradio app | |
| # if __name__ == "__main__": | |
| # launch_gradio() | |