Spaces:
Sleeping
Sleeping
| import time | |
| import torch | |
| import requests | |
| import gradio as gr | |
| from transformers import pipeline | |
| from usellm import Message, Options, UseLLM | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
| processor1 = WhisperProcessor.from_pretrained("openai/whisper-large-v2") | |
| model1 = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2") | |
| #tokenizer1 = AutoTokenizer.from_pretrained("microsoft/BioGPT-Large-PubMedQA", add_special_tokens=False) | |
| #model = AutoModelForCausalLM.from_pretrained("microsoft/BioGPT-Large-PubMedQA")#.to('cuda:0') | |
| def text_to_speech(text_input): | |
| CHUNK_SIZE = 1024 | |
| url = "https://api.elevenlabs.io/v1/text-to-speech/TxGEqnHWrfWFTfGW9XjX" | |
| headers = { | |
| "Accept": "audio/mpeg", | |
| "Content-Type": "application/json", | |
| "xi-api-key": "7f91dfdd5390bbfd9d44148c59644039" | |
| } | |
| data = { | |
| "text": text_input, | |
| "model_id": "eleven_monolingual_v1" | |
| } | |
| audio_write_path = f"""output_{int(time.time())}.mp3""" | |
| response = requests.post(url, json=data, headers=headers) | |
| with open(audio_write_path, 'wb') as f: | |
| for chunk in response.iter_content(chunk_size=CHUNK_SIZE): | |
| if chunk: | |
| f.write(chunk) | |
| return audio_write_path | |
| def whisper_inference(input_audio): | |
| processor1 = WhisperProcessor.from_pretrained("openai/whisper-large-v2") | |
| model1 = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2") | |
| forced_decoder_ids = processor1.get_decoder_prompt_ids(task="translate") | |
| input_features = processor1(input_audio, sampling_rate=16000, return_tensors="pt").input_features | |
| predicted_ids = model1.generate(input_features, forced_decoder_ids=forced_decoder_ids) | |
| transcription = processor1.batch_decode(predicted_ids, skip_special_tokens=True) | |
| return transcription | |
| def biogpt_large_infer(input_text): | |
| tokenizer1 = AutoTokenizer.from_pretrained("microsoft/BioGPT-Large-PubMedQA", add_special_tokens=False) | |
| model = AutoModelForCausalLM.from_pretrained("microsoft/BioGPT-Large-PubMedQA")#.to('cuda:0') | |
| generator = pipeline("text-generation", model=model, tokenizer=tokenizer1)#, device="cuda:0") | |
| output = generator(input_text, min_length=100,max_length=1024,num_beams=5,early_stopping=True, | |
| num_return_sequences=1, do_sample=True) | |
| output = output[0]['generated_text'] | |
| output = output.replace('▃','').replace('FREETEXT','').replace('TITLE','').replace('PARAGRAPH','').replace('ABSTRACT','').replace('<','').replace('>','').replace('/','').strip() | |
| return output | |
| def chatgpt_infer(input_text): | |
| # Initialize the service | |
| service = UseLLM(service_url="https://usellm.org/api/llm") | |
| # Prepare the conversation | |
| messages = [ | |
| Message(role="system", content="You are a medical assistant, which answers the query based on factual medical information only."), | |
| Message(role="user", content=f"Give me few points on the disease {input_text} and its treatment."), | |
| ] | |
| options = Options(messages=messages) | |
| # Interact with the service | |
| response = service.chat(options) | |
| return response.content | |
| def audio_interface_demo(input_audio): | |
| en_prompt = whisper_inference(input_audio) | |
| biogpt_output = biogpt_large_infer(en_prompt) | |
| chatgpt_output = chatgpt_infer(en_prompt) | |
| bio_audio_output = text_to_speech(biogpt_output) | |
| chat_audio_output = text_to_speech(chatgpt_output) | |
| return biogpt_output, chatgpt_output, bio_audio_output, chat_audio_output | |
| def text_interface_demo(input_text): | |
| #en_prompt = whisper_inference(input_audio) | |
| biogpt_output = biogpt_large_infer(input_text) | |
| chatgpt_output = chatgpt_infer(input_text) | |
| return biogpt_output, chatgpt_output | |
| examples = [ | |
| ["Meningitis is"], | |
| ["Brain Tumour is"] | |
| ] | |
| app = gr.Blocks() | |
| with app: | |
| gr.Markdown("# **<h4 align='center'>Voice based Medical Informational Bot - Won't Work due to Resource Constraints<h4>**") | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Tab("Text"): | |
| input_text = gr.Textbox(lines=3, value="Brain Tumour is", label="Text") | |
| text_button = gr.Button(value="Predict") | |
| with gr.Tab("Audio"): | |
| input_audio = gr.Audio(value="input.mp3", source="upload", type="filepath", label='Audio') | |
| audio_button = gr.Button(value="Predict") | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Tab("Output Text"): | |
| biogpt_output = gr.Textbox(lines=3, label="BioGpt Output") | |
| chatgpt_output = gr.Textbox(lines=3,label="ChatGPT Output") | |
| with gr.Tab("Output Audio"): | |
| biogpt_output = gr.Textbox(lines=3, label="BioGpt Output") | |
| chatgpt_output = gr.Textbox(lines=3,label="ChatGPT Output") | |
| audio_output1 = gr.Audio(value=None, label="ChatGPT Audio Output") | |
| audio_output2 = gr.Audio(value=None, label="BioGpt Audio Output") | |
| #gr.Examples(examples, inputs=[input_text], outputs=[prompt_text, output_text, translated_text], fn=biogpt_text, cache_examples=False) | |
| text_button.click(text_interface_demo, inputs=[input_text], outputs=[biogpt_output, chatgpt_output]) | |
| audio_button.click(audio_interface_demo, inputs=[input_audio], outputs=[biogpt_output, chatgpt_output, audio_output2, audio_output1]) | |
| app.launch(debug=True) |