MultiMedTulu

Runtime error

App Files Files Community

PayPeer commited on Nov 19, 2023

Commit

f877950

1 Parent(s): be77a46

Refactor / Tidy

Browse files

Files changed (1) hide show

app.py +55 -54

app.py CHANGED Viewed

@@ -2,28 +2,15 @@
 from gradio_client import Client
 import numpy as np
-import base64
 import gradio as gr
-import tempfile
 import requests
 import json
 import dotenv
-from scipy.io.wavfile import write
 import soundfile as sf
-from openai import OpenAI
 import time
-import PIL
 from PIL import Image
-import io
-import hashlib
-import datetime
-from utils import build_logger
-from transformers import AutoTokenizer, MistralForCausalLM
-import torch
-import random
-from textwrap import wrap
-import transformers
-from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
 from peft import PeftModel, PeftConfig
 import torch
 import os
@@ -40,28 +27,34 @@ base_model_id = os.getenv('BASE_MODEL_ID', 'default_base_model_id')
 model_directory = os.getenv('MODEL_DIRECTORY', 'default_model_directory')
 device = "cuda" if torch.cuda.is_available() else "cpu"
-def check_hallucination(assertion,citation):
-    API_URL = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
-    headers = {"Authorization": f"Bearer {HuggingFace_Token}"}
-    payload = {"inputs" : f"{assertion} [SEP] {citation}"}
-    response = requests.post(API_URL, headers=headers, json=payload,timeout=120)
     output = response.json()
     output = output[0][0]["score"]
     return f"**hallucination score:** {output}"
 # Define the API parameters
-VAPI_URL = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
-headers = {"Authorization": f"Bearer {HuggingFace_Token}"}
 # Function to query the API
 def query(payload):
-    response = requests.post(VAPI_URL, headers=headers, json=payload)
     return response.json()
 # Function to evaluate hallucination
 def evaluate_hallucination(input1, input2):
     # Combine the inputs
@@ -81,6 +74,7 @@ def evaluate_hallucination(input1, input2):
     return label
 def save_audio(audio_input, output_dir="saved_audio"):
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
@@ -97,13 +91,14 @@ def save_audio(audio_input, output_dir="saved_audio"):
     return file_path
 def save_image(image_input, output_dir="saved_images"):
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
     # Assuming image_input is a NumPy array
     if isinstance(image_input, np.ndarray):
-        # Convert NumPy array to PIL Image
         image = Image.fromarray(image_input)
         # Generate a unique file name
@@ -118,12 +113,11 @@ def save_image(image_input, output_dir="saved_images"):
         raise ValueError("Invalid image input type")
 def process_speech(input_language, audio_input):
     """
     processing sound using seamless_m4t
     """
-    if audio_input is None :
         return "no audio or audio did not save yet \nplease try again ! "
     print(f"audio : {audio_input}")
     print(f"audio type : {type(audio_input)}")
@@ -131,16 +125,16 @@ def process_speech(input_language, audio_input):
         "S2TT",
         "file",
         None,
-        audio_input, #audio_name
         "",
-        input_language,# source language
-        "English",# target language
         api_name="/run",
     )
-    out = out[1] # get the text
-    try :
         return f"{out}"
-    except Exception as e :
         return f"{e}"
@@ -165,7 +159,8 @@ def convert_text_to_speech(input_text, source_language, target_language):
         # Initialize variables
         original_audio_file = None
         translated_text = ""
         # Check if result contains files
         if isinstance(result, list) and len(result) > 1:
             downloaded_files = []
@@ -197,7 +192,8 @@ def convert_text_to_speech(input_text, source_language, target_language):
         # Return a concise error message
         return f"Error in text-to-speech conversion: {str(e)}", ""
-    return "Unexpected result format or insufficient data received.", ""
 def process_image(image_input):
     # Initialize the Gradio client with the URL of the Gradio server
@@ -220,14 +216,14 @@ def query_vectara(text):
     user_message = text
     # Read authentication parameters from the .env file
-    CUSTOMER_ID = os.getenv('CUSTOMER_ID')
-    CORPUS_ID = os.getenv('CORPUS_ID')
-    API_KEY = os.getenv('API_KEY')
     # Define the headers
     api_key_header = {
-        "customer-id": CUSTOMER_ID,
-        "x-api-key": API_KEY
     }
     # Define the request body in the structure provided in the example
@@ -254,8 +250,8 @@ def query_vectara(text):
                 },
                 "corpusKey": [
                     {
-                        "customerId": CUSTOMER_ID,
-                        "corpusId": CORPUS_ID,
                         "semantics": 0,
                         "metadataFilter": "",
                         "lexicalInterpolationConfig": {
@@ -327,6 +323,8 @@ def wrap_text(text, width=90):
     wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
     wrapped_text = '\n'.join(wrapped_lines)
     return wrapped_text
 def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
     # Combine user input and system prompt
@@ -336,15 +334,15 @@ def multimodal_prompt(user_input, system_prompt="You are an expert medical analy
     encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
     model_inputs = encodeds.to(device)
-    # Generate a response using the model
-    output = model.generate(
         **model_inputs,
-        max_length=max_length,
         use_cache=True,
         early_stopping=True,
-        bos_token_id=model.config.bos_token_id,
-        eos_token_id=model.config.eos_token_id,
-        pad_token_id=model.config.eos_token_id,
         temperature=0.1,
         do_sample=True
     )
@@ -354,6 +352,7 @@ def multimodal_prompt(user_input, system_prompt="You are an expert medical analy
     return response_text
 # Instantiate the Tokenizer
 tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True, padding_side="left")
 # tokenizer = AutoTokenizer.from_pretrained("Tonic/stablemed", trust_remote_code=True, padding_side="left")
@@ -370,18 +369,20 @@ class ChatBot:
     def __init__(self):
         self.history = []
-    def predict(self, user_input, system_prompt="You are an expert medical analyst:"):
         formatted_input = f"{system_prompt}{user_input}"
         user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")
         response = peft_model.generate(input_ids=user_input_ids, max_length=512, pad_token_id=tokenizer.eos_token_id)
         response_text = tokenizer.decode(response[0], skip_special_tokens=True)
         return response_text
 bot = ChatBot()
 def process_summary_with_stablemed(summary):
     system_prompt = "You are a medical instructor . Assess and describe the proper options to your students in minute detail. Propose a course of action for them to base their recommendations on based on your description."
-    response_text = bot.predict(summary, system_prompt)
     return response_text
@@ -391,11 +392,9 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
     try:
         combined_text = ""
-        image_description = ""
         markdown_output = ""
         image_text = ""
-        translated_text = ""
-        audio_output = ""
         # Debugging print statement
         print(f"Image Input Type: {type(image_input)}, Audio Input Type: {type(audio_input)}")
@@ -463,6 +462,7 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
     except Exception as e:
         return f"Error occurred during processing: {e}. No hallucination evaluation.", None
 welcome_message = """
 # 👋🏻Welcome to ⚕🗣️😷MultiMed - Access Chat ⚕🗣️😷
@@ -599,12 +599,12 @@ def create_interface():
         with gr.Accordion("Use Voice", open=False) as voice_accordion:
             audio_input = gr.Audio(label="Speak")
-            audio_output = gr.Markdown(label="Output text")  # Markdown component for audio
             gr.Examples([["audio1.wav"],["audio2.wav"],],inputs=[audio_input])
         with gr.Accordion("Use a Picture", open=False) as picture_accordion:
             image_input = gr.Image(label="Upload image")
-            image_output = gr.Markdown(label="Output text")  # Markdown component for image
             gr.Examples([["image1.png"], ["image2.jpeg"], ["image3.jpeg"],],inputs=[image_input])
         with gr.Accordion("MultiMed", open=False) as multimend_accordion:
@@ -632,5 +632,6 @@ def create_interface():
     return iface
 iface = create_interface()
 iface.launch(show_error=True, debug=True)

 from gradio_client import Client
 import numpy as np
 import gradio as gr
 import requests
 import json
 import dotenv
 import soundfile as sf
 import time
+import textwrap
 from PIL import Image
+from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel, PeftConfig
 import torch
 import os
 model_directory = os.getenv('MODEL_DIRECTORY', 'default_model_directory')
 device = "cuda" if torch.cuda.is_available() else "cpu"
+image_description = ""
+# audio_output = ""
+def check_hallucination(assertion, citation):
+    api_url = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
+    header = {"Authorization": f"Bearer {hf_token}"}
+    payload = {"inputs": f"{assertion} [SEP] {citation}"}
+    response = requests.post(api_url, headers=header, json=payload, timeout=120)
     output = response.json()
     output = output[0][0]["score"]
     return f"**hallucination score:** {output}"
 # Define the API parameters
+vapi_url = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
+headers = {"Authorization": f"Bearer {hf_token}"}
 # Function to query the API
 def query(payload):
+    response = requests.post(vapi_url, headers=headers, json=payload)
     return response.json()
 # Function to evaluate hallucination
 def evaluate_hallucination(input1, input2):
     # Combine the inputs
     return label
 def save_audio(audio_input, output_dir="saved_audio"):
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
     return file_path
 def save_image(image_input, output_dir="saved_images"):
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
     # Assuming image_input is a NumPy array
     if isinstance(image_input, np.ndarray):
+        # Convert NumPy arrays to PIL Image
         image = Image.fromarray(image_input)
         # Generate a unique file name
         raise ValueError("Invalid image input type")
 def process_speech(input_language, audio_input):
     """
     processing sound using seamless_m4t
     """
+    if audio_input is None:
         return "no audio or audio did not save yet \nplease try again ! "
     print(f"audio : {audio_input}")
     print(f"audio type : {type(audio_input)}")
         "S2TT",
         "file",
         None,
+        audio_input,
         "",
+        input_language,
+        "English",
         api_name="/run",
     )
+    out = out[1]  # get the text
+    try:
         return f"{out}"
+    except Exception as e:
         return f"{e}"
         # Initialize variables
         original_audio_file = None
         translated_text = ""
+        new_file_path = ""
         # Check if result contains files
         if isinstance(result, list) and len(result) > 1:
             downloaded_files = []
         # Return a concise error message
         return f"Error in text-to-speech conversion: {str(e)}", ""
+    # return "Unexpected result format or insufficient data received.", "" //UNREACHABLE CODE
 def process_image(image_input):
     # Initialize the Gradio client with the URL of the Gradio server
     user_message = text
     # Read authentication parameters from the .env file
+    customer_id = os.getenv('CUSTOMER_ID')
+    corpus_id = os.getenv('CORPUS_ID')
+    api_key = os.getenv('API_KEY')
     # Define the headers
     api_key_header = {
+        "customer-id": customer_id,
+        "x-api-key": api_key
     }
     # Define the request body in the structure provided in the example
                 },
                 "corpusKey": [
                     {
+                        "customerId": customer_id,
+                        "corpusId": corpus_id,
                         "semantics": 0,
                         "metadataFilter": "",
                         "lexicalInterpolationConfig": {
     wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
     wrapped_text = '\n'.join(wrapped_lines)
     return wrapped_text
 def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
     # Combine user input and system prompt
     encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
     model_inputs = encodeds.to(device)
+    # Generate a response using the model //MODEL UNDEFINED, using peft_model instead.
+    output = peft_model.generate(
         **model_inputs,
+        max_length=512,
         use_cache=True,
         early_stopping=True,
+        bos_token_id=peft_model.config.bos_token_id,
+        eos_token_id=peft_model.config.eos_token_id,
+        pad_token_id=peft_model.config.eos_token_id,
         temperature=0.1,
         do_sample=True
     )
     return response_text
 # Instantiate the Tokenizer
 tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True, padding_side="left")
 # tokenizer = AutoTokenizer.from_pretrained("Tonic/stablemed", trust_remote_code=True, padding_side="left")
     def __init__(self):
         self.history = []
+    def doctor(self, user_input, system_prompt="You are an expert medical analyst:"):
         formatted_input = f"{system_prompt}{user_input}"
         user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")
         response = peft_model.generate(input_ids=user_input_ids, max_length=512, pad_token_id=tokenizer.eos_token_id)
         response_text = tokenizer.decode(response[0], skip_special_tokens=True)
         return response_text
 bot = ChatBot()
 def process_summary_with_stablemed(summary):
     system_prompt = "You are a medical instructor . Assess and describe the proper options to your students in minute detail. Propose a course of action for them to base their recommendations on based on your description."
+    response_text = bot.doctor(summary, system_prompt)
     return response_text
     try:
         combined_text = ""
         markdown_output = ""
         image_text = ""
         # Debugging print statement
         print(f"Image Input Type: {type(image_input)}, Audio Input Type: {type(audio_input)}")
     except Exception as e:
         return f"Error occurred during processing: {e}. No hallucination evaluation.", None
 welcome_message = """
 # 👋🏻Welcome to ⚕🗣️😷MultiMed - Access Chat ⚕🗣️😷
         with gr.Accordion("Use Voice", open=False) as voice_accordion:
             audio_input = gr.Audio(label="Speak")
+            # audio_output = gr.Markdown(label="Output text")  # Markdown component for audio
             gr.Examples([["audio1.wav"],["audio2.wav"],],inputs=[audio_input])
         with gr.Accordion("Use a Picture", open=False) as picture_accordion:
             image_input = gr.Image(label="Upload image")
+            # image_output = gr.Markdown(label="Output text")  # Markdown component for image
             gr.Examples([["image1.png"], ["image2.jpeg"], ["image3.jpeg"],],inputs=[image_input])
         with gr.Accordion("MultiMed", open=False) as multimend_accordion:
     return iface
 iface = create_interface()
 iface.launch(show_error=True, debug=True)