from transformers import pipeline from openai import OpenAI import requests from PIL import Image from pydub import AudioSegment import gradio as gr import os import json import numpy as np client = OpenAI( api_key=os.environ.get('OPEN_AI_KEY') #api_key="NULLas" ) pipe = pipeline(model="Siphh/wablab2", revision="85f5ec9bd0813eec8b403a7fb510d067a4b49397") psychologist_url = "https://raw.githubusercontent.com/rogoran/id2223kth.github.io/master/src/serverless-ml-intro/serverless-ml-lab/wine_images/360_F_169387942_tUJn9ADX31sRDFCuK9nlBuRR4gO6a1TK.jpg" IMG = Image.open(requests.get(psychologist_url, stream=True).raw) latest_messages = [{"role": "system", "content": "Du är en svensktalande psykolog."}] def transcribe(audio): if audio is None or audio == "": print("NO AUDIO FOUND") return "", "" text_obj = pipe(audio) text = text_obj["text"] return chat(text), text def chat(prompt): #response = generate_chat(prompt) latest_messages.append({"role": "user", "content": prompt}) response = client.chat.completions.create( model="gpt-3.5-turbo-1106", messages=latest_messages ) latest_messages.append({"role": "assistant", "content": response.choices[0].message.content}) return response.choices[0].message.content with gr.Blocks() as demo: with gr.Row(): with gr.Column(): inp = gr.Audio(sources="microphone", type="filepath") btn = gr.Button("Send transcribed msg") transcr_text = gr.Textbox(label="Din senaste fråga") with gr.Column(): gr.Image(value=IMG, label="Doctor Amanda") out = gr.Textbox(label="Doctor Amandas answers") btn.click(fn=transcribe, inputs=inp, outputs=[out, transcr_text]) demo.launch()