lab2_scalable / app.py
Hugo Lindgren
model version 2
29526da
from transformers import pipeline
from openai import OpenAI
import requests
from PIL import Image
from pydub import AudioSegment
import gradio as gr
import os
import json
import numpy as np
client = OpenAI(
api_key=os.environ.get('OPEN_AI_KEY')
#api_key="NULLas"
)
pipe = pipeline(model="Siphh/wablab2", revision="85f5ec9bd0813eec8b403a7fb510d067a4b49397")
psychologist_url = "https://raw.githubusercontent.com/rogoran/id2223kth.github.io/master/src/serverless-ml-intro/serverless-ml-lab/wine_images/360_F_169387942_tUJn9ADX31sRDFCuK9nlBuRR4gO6a1TK.jpg"
IMG = Image.open(requests.get(psychologist_url, stream=True).raw)
latest_messages = [{"role": "system", "content": "Du är en svensktalande psykolog."}]
def transcribe(audio):
if audio is None or audio == "":
print("NO AUDIO FOUND")
return "", ""
text_obj = pipe(audio)
text = text_obj["text"]
return chat(text), text
def chat(prompt):
#response = generate_chat(prompt)
latest_messages.append({"role": "user", "content": prompt})
response = client.chat.completions.create(
model="gpt-3.5-turbo-1106",
messages=latest_messages
)
latest_messages.append({"role": "assistant", "content": response.choices[0].message.content})
return response.choices[0].message.content
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
inp = gr.Audio(sources="microphone", type="filepath")
btn = gr.Button("Send transcribed msg")
transcr_text = gr.Textbox(label="Din senaste fråga")
with gr.Column():
gr.Image(value=IMG, label="Doctor Amanda")
out = gr.Textbox(label="Doctor Amandas answers")
btn.click(fn=transcribe, inputs=inp, outputs=[out, transcr_text])
demo.launch()