Upload 12 files
Browse files- .gitattributes +36 -35
- .gitignore +1 -0
- Gpt4oDemo.py +402 -0
- Helper.py +79 -0
- README.md +8 -13
- TEST.mp3 +0 -0
- TEST.mp4 +3 -0
- avatar.webp +0 -0
- hardCodedData.py +98 -0
- newDemo.py +311 -0
- requirements.txt +166 -1
- style.css +50 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,36 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
TEST.mp4 filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.env
|
Gpt4oDemo.py
ADDED
|
@@ -0,0 +1,402 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import plotly.graph_objs as go
|
| 3 |
+
import numpy as np
|
| 4 |
+
import time
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
import os
|
| 7 |
+
from hardCodedData import *
|
| 8 |
+
from Helper import *
|
| 9 |
+
import cv2
|
| 10 |
+
from moviepy.editor import VideoFileClip
|
| 11 |
+
import time
|
| 12 |
+
import base64
|
| 13 |
+
import whisperx
|
| 14 |
+
import gc
|
| 15 |
+
from moviepy.editor import VideoFileClip
|
| 16 |
+
from dotenv import load_dotenv
|
| 17 |
+
|
| 18 |
+
load_dotenv()
|
| 19 |
+
|
| 20 |
+
'''
|
| 21 |
+
Model Information
|
| 22 |
+
GPT4o
|
| 23 |
+
'''
|
| 24 |
+
|
| 25 |
+
import openai
|
| 26 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
| 27 |
+
client = openai.OpenAI(
|
| 28 |
+
api_key=api_key,
|
| 29 |
+
base_url="https://openai.gateway.salt-lab.org/v1",
|
| 30 |
+
)
|
| 31 |
+
MODEL="gpt-4o"
|
| 32 |
+
|
| 33 |
+
# Whisperx config
|
| 34 |
+
device = "cpu"
|
| 35 |
+
batch_size = 16 # reduce if low on GPU mem
|
| 36 |
+
compute_type = "int8" # change to "int8" if low on GPU mem (may reduce accuracy)
|
| 37 |
+
model = whisperx.load_model("large-v2", device, compute_type=compute_type)
|
| 38 |
+
|
| 39 |
+
'''
|
| 40 |
+
Video
|
| 41 |
+
'''
|
| 42 |
+
video_file = None
|
| 43 |
+
audio_path=None
|
| 44 |
+
base64Frames = []
|
| 45 |
+
transcript=""
|
| 46 |
+
|
| 47 |
+
def process_video(video_path, seconds_per_frame=2):
|
| 48 |
+
global base64Frames, audio_path
|
| 49 |
+
base_video_path, _ = os.path.splitext(video_path)
|
| 50 |
+
|
| 51 |
+
video = cv2.VideoCapture(video_path)
|
| 52 |
+
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 53 |
+
fps = video.get(cv2.CAP_PROP_FPS)
|
| 54 |
+
frames_to_skip = int(fps * seconds_per_frame)
|
| 55 |
+
curr_frame=0
|
| 56 |
+
|
| 57 |
+
while curr_frame < total_frames - 1:
|
| 58 |
+
video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
|
| 59 |
+
success, frame = video.read()
|
| 60 |
+
if not success:
|
| 61 |
+
break
|
| 62 |
+
_, buffer = cv2.imencode(".jpg", frame)
|
| 63 |
+
base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
|
| 64 |
+
curr_frame += frames_to_skip
|
| 65 |
+
video.release()
|
| 66 |
+
|
| 67 |
+
audio_path = "./TEST.mp3"
|
| 68 |
+
clip = VideoFileClip(video_path)
|
| 69 |
+
clip.audio.write_audiofile(audio_path, bitrate="32k")
|
| 70 |
+
clip.audio.close()
|
| 71 |
+
clip.close()
|
| 72 |
+
# transcribe_video(audio_path)
|
| 73 |
+
print(f"Extracted {len(base64Frames)} frames")
|
| 74 |
+
print(f"Extracted audio to {audio_path}")
|
| 75 |
+
return base64Frames, audio_path
|
| 76 |
+
|
| 77 |
+
chat_history = []
|
| 78 |
+
# chat_history.append({
|
| 79 |
+
# "role": "system",
|
| 80 |
+
# "content": (
|
| 81 |
+
# """
|
| 82 |
+
# You are an assistant chatbot for a Speech Language Pathologist (SLP).
|
| 83 |
+
# Your task is to help analyze a provided video of a therapy session and answer questions accurately.
|
| 84 |
+
# Provide timestamps for specific events or behaviors mentioned. Conclude each response with possible follow-up questions.
|
| 85 |
+
|
| 86 |
+
# Follow these steps:
|
| 87 |
+
|
| 88 |
+
# 1. Suggest to the user to ask, “To get started, you can try asking me how many people there are in the video.”
|
| 89 |
+
# 2. Detect how many people are in the video.
|
| 90 |
+
# 2. Suggest to the user to tell you the names of the people in the video, starting from left to right.
|
| 91 |
+
# 3. After receiving the names, respond with, “Ok thank you! Now you can ask me any questions about this video.”
|
| 92 |
+
# 4. If the user asks about a behavior, respond with, “My understanding of this behavior is [xxx - AI generated output]. Is this a behavior that you want to track? If it is, please define this behavior and tell me more about it so I can analyze it more accurately according to your practice.”
|
| 93 |
+
# 5. If you receive names, confirm that these are the names of the people from left to right.
|
| 94 |
+
# """
|
| 95 |
+
# )
|
| 96 |
+
# })
|
| 97 |
+
|
| 98 |
+
def transcribe_video(filename):
|
| 99 |
+
global transcript
|
| 100 |
+
if not audio_path:
|
| 101 |
+
raise ValueError("Audio path is None")
|
| 102 |
+
print(audio_path)
|
| 103 |
+
audio = whisperx.load_audio(audio_path)
|
| 104 |
+
result = model.transcribe(audio, batch_size=batch_size)
|
| 105 |
+
|
| 106 |
+
model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
|
| 107 |
+
result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
hf_auth_token = os.getenv("HF_AUTH_TOKEN")
|
| 111 |
+
diarize_model = whisperx.DiarizationPipeline(use_auth_token=hf_auth_token, device=device)
|
| 112 |
+
|
| 113 |
+
diarize_segments = diarize_model(audio)
|
| 114 |
+
|
| 115 |
+
dia_result = whisperx.assign_word_speakers(diarize_segments, result)
|
| 116 |
+
|
| 117 |
+
for res in dia_result["segments"]:
|
| 118 |
+
# transcript += "Speaker: " + str(res.get("speaker", None)) + "\n"
|
| 119 |
+
transcript += "Dialogue: " + str(res["text"].lstrip()) + "\n"
|
| 120 |
+
transcript += "start: " + str(int(res["start"])) + "\n"
|
| 121 |
+
transcript += "end: " + str(int(res["end"])) + "\n"
|
| 122 |
+
transcript += "\n"
|
| 123 |
+
|
| 124 |
+
return transcript
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def handle_video(video=None):
|
| 128 |
+
global video_file, base64Frames, audio_path, chat_history, transcript
|
| 129 |
+
|
| 130 |
+
if video is None:
|
| 131 |
+
# Load example video
|
| 132 |
+
video = "./TEST.mp4"
|
| 133 |
+
|
| 134 |
+
base64Frames, audio_path = process_video(video_path=video, seconds_per_frame=100)
|
| 135 |
+
chat_history.append({
|
| 136 |
+
"role": "user",
|
| 137 |
+
"content": [
|
| 138 |
+
{"type": "text", "text": "These are the frames from the video."},
|
| 139 |
+
*map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)
|
| 140 |
+
]
|
| 141 |
+
})
|
| 142 |
+
|
| 143 |
+
if transcript:
|
| 144 |
+
chat_history[-1]['content'].append({
|
| 145 |
+
"type": "text",
|
| 146 |
+
"text": f"Also, below is the template of transcript from the video:\n"
|
| 147 |
+
"Speaker: <the speaker of the dialogue>\n"
|
| 148 |
+
"Dialogue: <the text of the dialogue>\n"
|
| 149 |
+
"start: <the starting timestamp of the dialogue in the video in second>\n"
|
| 150 |
+
"end: <the ending timestamp of the dialogue in the video in second>\n"
|
| 151 |
+
f"Transcription: {transcript}"
|
| 152 |
+
})
|
| 153 |
+
|
| 154 |
+
video_file = video
|
| 155 |
+
return video_file
|
| 156 |
+
|
| 157 |
+
'''
|
| 158 |
+
Chatbot
|
| 159 |
+
'''
|
| 160 |
+
|
| 161 |
+
def new_prompt(prompt):
|
| 162 |
+
global chat_history, video_file
|
| 163 |
+
chat_history.append({"role": "user","content": prompt,})
|
| 164 |
+
MODEL="gpt-4o"
|
| 165 |
+
# print(chat_history)
|
| 166 |
+
print(transcript)
|
| 167 |
+
try:
|
| 168 |
+
if video_file:
|
| 169 |
+
# Video exists and is processed
|
| 170 |
+
response = client.chat.completions.create(model=MODEL,messages=chat_history,temperature=0,)
|
| 171 |
+
else:
|
| 172 |
+
# No video uploaded yet
|
| 173 |
+
response = client.chat.completions.create(model=MODEL,messages=chat_history,temperature=0,)
|
| 174 |
+
|
| 175 |
+
# Extract the text content from the response and append it to the chat history
|
| 176 |
+
assistant_message = response.choices[0].message.content
|
| 177 |
+
chat_history.append({'role': 'model', 'content': assistant_message})
|
| 178 |
+
print(assistant_message)
|
| 179 |
+
except Exception as e:
|
| 180 |
+
print("Error: ",e)
|
| 181 |
+
assistant_message = "API rate limit has been reached. Please wait a moment and try again."
|
| 182 |
+
chat_history.append({'role': 'model', 'content': assistant_message})
|
| 183 |
+
|
| 184 |
+
# except google.api_core.exceptions.ResourceExhausted:
|
| 185 |
+
# assistant_message = "API rate limit has been reached. Please wait a moment and try again."
|
| 186 |
+
# chat_history.append({'role': 'model', 'parts': [assistant_message]})
|
| 187 |
+
# except Exception as e:
|
| 188 |
+
# assistant_message = f"An error occurred: {str(e)}"
|
| 189 |
+
# chat_history.append({'role': 'model', 'parts': [assistant_message]})
|
| 190 |
+
|
| 191 |
+
return chat_history
|
| 192 |
+
|
| 193 |
+
def user_input(user_message, history):
|
| 194 |
+
return "", history + [[user_message, None]]
|
| 195 |
+
|
| 196 |
+
def bot_response(history):
|
| 197 |
+
user_message = history[-1][0]
|
| 198 |
+
updated_history = new_prompt(user_message)
|
| 199 |
+
assistant_message = updated_history[-1]['content']
|
| 200 |
+
history[-1][1] = assistant_message
|
| 201 |
+
yield history
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
'''
|
| 205 |
+
Behaivor box
|
| 206 |
+
'''
|
| 207 |
+
initial_behaviors = [
|
| 208 |
+
("Initiating Behavioral Request (IBR)",
|
| 209 |
+
("The child's skill in using behavior(s) to elicit aid in obtaining an object, or object related event",
|
| 210 |
+
["00:10", "00:45", "01:30"])),
|
| 211 |
+
|
| 212 |
+
("Initiating Joint Attention (IJA)",
|
| 213 |
+
("The child's skill in using behavior(s) to initiate shared attention to objects or events.",
|
| 214 |
+
["00:15", "00:50", "01:40"])),
|
| 215 |
+
|
| 216 |
+
("Responding to Joint Attention (RJA)",
|
| 217 |
+
("The child's skill in following the examiner’s line of regard and pointing gestures.",
|
| 218 |
+
["00:20", "01:00", "02:00"])),
|
| 219 |
+
|
| 220 |
+
("Initiating Social Interaction (ISI)",
|
| 221 |
+
("The child's skill at initiating turn-taking sequences and the tendency to tease the tester",
|
| 222 |
+
["00:20", "00:50", "02:00"])),
|
| 223 |
+
|
| 224 |
+
("Responding to Social Interaction (RSI)",
|
| 225 |
+
("The child’s skill in responding to turn-taking interactions initiated by the examiner.",
|
| 226 |
+
["00:20", "01:00", "02:00"]))
|
| 227 |
+
]
|
| 228 |
+
|
| 229 |
+
behaviors = initial_behaviors
|
| 230 |
+
behavior_bank = []
|
| 231 |
+
|
| 232 |
+
def add_or_update_behavior(name, definition, timestamps, selected_behavior):
|
| 233 |
+
global behaviors, behavior_bank
|
| 234 |
+
if selected_behavior: # Update existing behavior
|
| 235 |
+
for i, (old_name, _) in enumerate(behaviors):
|
| 236 |
+
if old_name == selected_behavior:
|
| 237 |
+
behaviors[i] = (name, (definition, timestamps))
|
| 238 |
+
break
|
| 239 |
+
# Update behavior in the bank if it exists
|
| 240 |
+
behavior_bank = [name if b == selected_behavior else b for b in behavior_bank]
|
| 241 |
+
else: # Add new behavior
|
| 242 |
+
new_behavior = (name, (definition, timestamps))
|
| 243 |
+
behaviors.append(new_behavior)
|
| 244 |
+
choices = [b[0] for b in behaviors]
|
| 245 |
+
return gr.Dropdown(choices=choices, value=None, interactive=True), gr.CheckboxGroup(choices=behavior_bank, value=behavior_bank, interactive=True), "", "", ""
|
| 246 |
+
|
| 247 |
+
def add_to_behaivor_bank(selected_behavior, checkbox_group_values):
|
| 248 |
+
global behavior_bank
|
| 249 |
+
if selected_behavior and selected_behavior not in checkbox_group_values:
|
| 250 |
+
checkbox_group_values.append(selected_behavior)
|
| 251 |
+
behavior_bank = checkbox_group_values
|
| 252 |
+
return gr.CheckboxGroup(choices=checkbox_group_values, value=checkbox_group_values, interactive=True), gr.Dropdown(value=None,interactive=True)
|
| 253 |
+
|
| 254 |
+
def delete_behavior(selected_behavior, checkbox_group_values):
|
| 255 |
+
global behaviors, behavior_bank
|
| 256 |
+
behaviors = [b for b in behaviors if b[0] != selected_behavior]
|
| 257 |
+
behavior_bank = [b for b in behavior_bank if b != selected_behavior]
|
| 258 |
+
updated_choices = [b[0] for b in behaviors]
|
| 259 |
+
updated_checkbox_group = [cb for cb in checkbox_group_values if cb != selected_behavior]
|
| 260 |
+
return gr.Dropdown(choices=updated_choices, value=None, interactive=True), gr.CheckboxGroup(choices=updated_checkbox_group, value=updated_checkbox_group, interactive=True)
|
| 261 |
+
|
| 262 |
+
def edit_behavior(selected_behavior):
|
| 263 |
+
for name, (definition, timestamps) in behaviors:
|
| 264 |
+
if name == selected_behavior:
|
| 265 |
+
# Return values to populate textboxes
|
| 266 |
+
return name, definition, timestamps
|
| 267 |
+
return "", "", ""
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
welcome_message = """
|
| 271 |
+
Hello! I'm your AI assistant.
|
| 272 |
+
I can help you analyze your video sessions following your instructions.
|
| 273 |
+
To get started, please upload a video or add your behaviors to the Behavior Bank using the Behavior Manager.
|
| 274 |
+
"""
|
| 275 |
+
#If you want to tell me about the people in the video, please name them starting from left to right.
|
| 276 |
+
|
| 277 |
+
css="""
|
| 278 |
+
body {
|
| 279 |
+
background-color: #edf1fa; /* offwhite */
|
| 280 |
+
}
|
| 281 |
+
.gradio-container {
|
| 282 |
+
background-color: #edf1fa; /* offwhite */
|
| 283 |
+
}
|
| 284 |
+
.column-form .wrap {
|
| 285 |
+
flex-direction: column;
|
| 286 |
+
}
|
| 287 |
+
.sidebar {
|
| 288 |
+
background: #ffffff;
|
| 289 |
+
padding: 10px;
|
| 290 |
+
border-right: 1px solid #dee2e6;
|
| 291 |
+
}
|
| 292 |
+
.content {
|
| 293 |
+
padding: 10px;
|
| 294 |
+
}
|
| 295 |
+
"""
|
| 296 |
+
|
| 297 |
+
'''
|
| 298 |
+
Gradio Demo
|
| 299 |
+
'''
|
| 300 |
+
with gr.Blocks(theme='base', css=css, title="Soap.AI") as demo:
|
| 301 |
+
gr.Markdown("# 🤖 AI-Supported SOAP Generation")
|
| 302 |
+
|
| 303 |
+
with gr.Row():
|
| 304 |
+
with gr.Column():
|
| 305 |
+
video = gr.Video(label="Video", visible=True, height=360, container=True)
|
| 306 |
+
with gr.Row():
|
| 307 |
+
with gr.Column(min_width=1, scale=1):
|
| 308 |
+
video_upload_button = gr.Button("Analyze Video", variant="primary")
|
| 309 |
+
with gr.Column(min_width=1, scale=1):
|
| 310 |
+
example_video_button = gr.Button("Load Example Video")
|
| 311 |
+
|
| 312 |
+
video_upload_button.click(handle_video, inputs=video, outputs=video)
|
| 313 |
+
example_video_button.click(handle_video, None, outputs=video)
|
| 314 |
+
|
| 315 |
+
with gr.Column():
|
| 316 |
+
chat_section = gr.Group(visible=True)
|
| 317 |
+
with chat_section:
|
| 318 |
+
chatbot = gr.Chatbot(elem_id="chatbot",
|
| 319 |
+
container=True,
|
| 320 |
+
likeable=True,
|
| 321 |
+
value=[[None, welcome_message]],
|
| 322 |
+
avatar_images=(None, "./avatar.webp"))
|
| 323 |
+
with gr.Row():
|
| 324 |
+
txt = gr.Textbox(show_label=False, placeholder="Type here!")
|
| 325 |
+
with gr.Row():
|
| 326 |
+
send_btn = gr.Button("Send Message", elem_id="send-btn", variant="primary")
|
| 327 |
+
clear_btn = gr.Button("Clear Chat", elem_id="clear-btn")
|
| 328 |
+
|
| 329 |
+
with gr.Row():
|
| 330 |
+
behaivor_bank = gr.CheckboxGroup(label="Behavior Bank",
|
| 331 |
+
choices=[],
|
| 332 |
+
interactive=True,
|
| 333 |
+
info="A space to store all the behaviors you want to analyze.")
|
| 334 |
+
open_sidebar_btn = gr.Button("Show Behavior Manager", scale=0)
|
| 335 |
+
close_sidebar_btn = gr.Button("Hide Behavior Manager", visible=False, scale=0)
|
| 336 |
+
|
| 337 |
+
txt.submit(user_input, [txt, chatbot], [txt, chatbot], queue=False).then(
|
| 338 |
+
bot_response, chatbot, chatbot)
|
| 339 |
+
send_btn.click(user_input, [txt, chatbot], [txt, chatbot], queue=False).then(
|
| 340 |
+
bot_response, chatbot, chatbot)
|
| 341 |
+
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
| 342 |
+
|
| 343 |
+
# Define a sidebar column that is initially hidden
|
| 344 |
+
with gr.Column(visible=False, min_width=200, scale=0.5, elem_classes="sidebar") as sidebar:
|
| 345 |
+
behavior_dropdown = gr.Dropdown(label="Behavior Collection",
|
| 346 |
+
choices=behaviors,
|
| 347 |
+
interactive=True,
|
| 348 |
+
container=True,
|
| 349 |
+
elem_classes="column-form",
|
| 350 |
+
info="Choose a behavior to add to the bank, edit or remove.")
|
| 351 |
+
with gr.Row():
|
| 352 |
+
add_toBank_button = gr.Button("Add Behavior to Bank", variant="primary")
|
| 353 |
+
edit_button = gr.Button("Edit Behavior")
|
| 354 |
+
delete_button = gr.Button("Remove Behavior")
|
| 355 |
+
|
| 356 |
+
with gr.Row():
|
| 357 |
+
name_input = gr.Textbox(label="Behavior Name",
|
| 358 |
+
placeholder="(e.g., IBR)",
|
| 359 |
+
info="The name you give to the specific behavior you're tracking or analyzing.")
|
| 360 |
+
timestamps_input = gr.Textbox(label="Timestamps MM:SS",
|
| 361 |
+
placeholder="(e.g., (01:15,01:35) )",
|
| 362 |
+
info="The exact times during a session when you saw the behavior. The first two digits represent minutes and the last two digits represent seconds.")
|
| 363 |
+
definition_input = gr.Textbox(lines=3,
|
| 364 |
+
label="Behavior Definition",
|
| 365 |
+
placeholder="(e.g., the child's skill in using behavior(s) to elicit aid in obtaining an object, or object related event)",
|
| 366 |
+
info="Provide a clear definition of the behavior.")
|
| 367 |
+
|
| 368 |
+
with gr.Row():
|
| 369 |
+
submit_button = gr.Button("Save Behavior", variant="primary")
|
| 370 |
+
|
| 371 |
+
submit_button.click(fn=add_or_update_behavior,
|
| 372 |
+
inputs=[name_input, definition_input, timestamps_input, behavior_dropdown],
|
| 373 |
+
outputs=[behavior_dropdown, behaivor_bank, name_input, definition_input, timestamps_input])
|
| 374 |
+
|
| 375 |
+
add_toBank_button.click(fn=add_to_behaivor_bank,
|
| 376 |
+
inputs=[behavior_dropdown, behaivor_bank],
|
| 377 |
+
outputs=[behaivor_bank, behavior_dropdown])
|
| 378 |
+
|
| 379 |
+
delete_button.click(fn=delete_behavior,
|
| 380 |
+
inputs=[behavior_dropdown, behaivor_bank],
|
| 381 |
+
outputs=[behavior_dropdown, behaivor_bank])
|
| 382 |
+
|
| 383 |
+
edit_button.click(fn=edit_behavior,
|
| 384 |
+
inputs=[behavior_dropdown],
|
| 385 |
+
outputs=[name_input, definition_input, timestamps_input])
|
| 386 |
+
|
| 387 |
+
# Function to open the sidebar
|
| 388 |
+
open_sidebar_btn.click(lambda: {
|
| 389 |
+
open_sidebar_btn: gr.Button(visible=False),
|
| 390 |
+
close_sidebar_btn: gr.Button(visible=True),
|
| 391 |
+
sidebar: gr.Column(visible=True)
|
| 392 |
+
}, outputs=[open_sidebar_btn, close_sidebar_btn, sidebar])
|
| 393 |
+
|
| 394 |
+
# Function to close the sidebar
|
| 395 |
+
close_sidebar_btn.click(lambda: {
|
| 396 |
+
open_sidebar_btn: gr.Button(visible=True),
|
| 397 |
+
close_sidebar_btn: gr.Button(visible=False),
|
| 398 |
+
sidebar: gr.Column(visible=False)
|
| 399 |
+
}, outputs=[open_sidebar_btn, close_sidebar_btn, sidebar])
|
| 400 |
+
|
| 401 |
+
# Launch the demo
|
| 402 |
+
demo.launch(share=True)
|
Helper.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
# table
|
| 4 |
+
css = """
|
| 5 |
+
.scrollable-table {
|
| 6 |
+
max-height: 300px;
|
| 7 |
+
overflow-y: auto;
|
| 8 |
+
border: 1px solid #ccc;
|
| 9 |
+
padding: 10px;
|
| 10 |
+
|
| 11 |
+
}
|
| 12 |
+
.scrollable-table table {
|
| 13 |
+
width: 100%;
|
| 14 |
+
border-collapse: collapse;
|
| 15 |
+
background-color: #FFFFFF;
|
| 16 |
+
|
| 17 |
+
}
|
| 18 |
+
.scrollable-table th, .scrollable-table td {
|
| 19 |
+
border: 1px solid #ddd;
|
| 20 |
+
padding: 8px;
|
| 21 |
+
text-align: left;
|
| 22 |
+
color: #000000
|
| 23 |
+
|
| 24 |
+
}
|
| 25 |
+
.scrollable-table th {
|
| 26 |
+
background-color: #EAF2FF;
|
| 27 |
+
color: #3366CC;
|
| 28 |
+
}
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def parse_transcript(transcript):
|
| 32 |
+
# Regex to match the timestamps and the text
|
| 33 |
+
pattern = re.compile(r'(\d{2}:\d{2})\s+(.+?)(?=\d{2}:\d{2}|$)', re.DOTALL)
|
| 34 |
+
matches = pattern.findall(transcript)
|
| 35 |
+
|
| 36 |
+
timestamps = []
|
| 37 |
+
texts = []
|
| 38 |
+
|
| 39 |
+
for match in matches:
|
| 40 |
+
timestamps.append(match[0])
|
| 41 |
+
texts.append(match[1].strip())
|
| 42 |
+
|
| 43 |
+
return timestamps, texts
|
| 44 |
+
|
| 45 |
+
def create_transcript_table(timestamps, transcript_text):
|
| 46 |
+
table_html = '<div class="scrollable-table">\n'
|
| 47 |
+
table_html += '<table>\n'
|
| 48 |
+
table_html += ' <thead>\n'
|
| 49 |
+
table_html += ' <tr>\n'
|
| 50 |
+
table_html += ' <th>Timestamp</th>\n'
|
| 51 |
+
table_html += ' <th>Transcript</th>\n'
|
| 52 |
+
table_html += ' </tr>\n'
|
| 53 |
+
table_html += ' </thead>\n'
|
| 54 |
+
table_html += ' <tbody>\n'
|
| 55 |
+
for ts, text in zip(timestamps, transcript_text):
|
| 56 |
+
table_html += ' <tr>\n'
|
| 57 |
+
table_html += f' <td>{ts}</td>\n'
|
| 58 |
+
table_html += f' <td>{text}</td>\n'
|
| 59 |
+
table_html += ' </tr>\n'
|
| 60 |
+
table_html += ' </tbody>\n'
|
| 61 |
+
table_html += '</table>\n'
|
| 62 |
+
return table_html
|
| 63 |
+
|
| 64 |
+
def filter_transcript():
|
| 65 |
+
timestamps = [
|
| 66 |
+
"15.0 - 17.0",
|
| 67 |
+
"38.08 - 39.50"
|
| 68 |
+
]
|
| 69 |
+
transcript_text = [
|
| 70 |
+
"Sad (prompt; 1st)",
|
| 71 |
+
"Because he fell (no prompt; 2nd)"
|
| 72 |
+
]
|
| 73 |
+
return timestamps, transcript_text
|
| 74 |
+
|
| 75 |
+
# Guidance Generation Function
|
| 76 |
+
def generate_guidance():
|
| 77 |
+
guidance_text = """ Engagement: Student may display behaviors such as rocking when showing engagement.
|
| 78 |
+
Impact factors: Weather (e.g., raining) can impact student’s performance."""
|
| 79 |
+
return guidance_text
|
README.md
CHANGED
|
@@ -1,13 +1,8 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: SOAP
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
pinned: false
|
| 10 |
-
license: mit
|
| 11 |
-
---
|
| 12 |
-
|
| 13 |
-
An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: SOAP
|
| 3 |
+
app_file: newDemo.py
|
| 4 |
+
sdk: gradio
|
| 5 |
+
sdk_version: 4.31.0
|
| 6 |
+
---
|
| 7 |
+
# SOAPdemo
|
| 8 |
+
SOAP demo by Qingxiao/Parisa/Aditya
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TEST.mp3
ADDED
|
Binary file (494 kB). View file
|
|
|
TEST.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6278941748127fc61cee67d4e7477b842f6890a3df50afcbece9960137352898
|
| 3 |
+
size 22321588
|
avatar.webp
ADDED
|
|
hardCodedData.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Define chatbot and checkbox options
|
| 2 |
+
First_response = """
|
| 3 |
+
Four people identified in the video. Please select the people you want to analyse:
|
| 4 |
+
"""
|
| 5 |
+
Second_response="""Let’s confirm the learning goals:"""
|
| 6 |
+
|
| 7 |
+
non_verbal_options = [
|
| 8 |
+
"Following directions",
|
| 9 |
+
"On-task percentage",
|
| 10 |
+
"Elopement attempts",
|
| 11 |
+
"Falling response",
|
| 12 |
+
"Overwhelm indicators",
|
| 13 |
+
"Avoidance behaviors",
|
| 14 |
+
"Harm attempts",
|
| 15 |
+
"Eye contact percentage",
|
| 16 |
+
"Eye contact duration",
|
| 17 |
+
"Body language",
|
| 18 |
+
"Stimming behaviors",
|
| 19 |
+
"Attention tracking",
|
| 20 |
+
"Whole-body listening",
|
| 21 |
+
"Gaze tracking",
|
| 22 |
+
"Inappropriate touching",
|
| 23 |
+
"Listening behaviors",
|
| 24 |
+
"Noises or gestures"
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
verbal_options = [
|
| 28 |
+
"Target sounds",
|
| 29 |
+
"Word position",
|
| 30 |
+
"Sound substitutions",
|
| 31 |
+
"Articulation frequency",
|
| 32 |
+
"Mean Length of Utterance (MLU)",
|
| 33 |
+
"Disfluencies",
|
| 34 |
+
"Questions asked",
|
| 35 |
+
"Student answers",
|
| 36 |
+
"Response time",
|
| 37 |
+
"Peer responses",
|
| 38 |
+
"On-topic responses",
|
| 39 |
+
"Correct response ratio",
|
| 40 |
+
"Cues needed",
|
| 41 |
+
"Pragmatic skills",
|
| 42 |
+
"Conversation volume",
|
| 43 |
+
"Initiating conversation",
|
| 44 |
+
"Interruptions",
|
| 45 |
+
"Following directions",
|
| 46 |
+
"Answering questions",
|
| 47 |
+
"Idioms/sarcasm",
|
| 48 |
+
"Grammar and syntax",
|
| 49 |
+
"Vocabulary use",
|
| 50 |
+
"Answering wh-questions"
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
environmental_options = [
|
| 54 |
+
"External noises",
|
| 55 |
+
"Room activity",
|
| 56 |
+
"Room temperature",
|
| 57 |
+
"Fluorescent lights",
|
| 58 |
+
"School vs home",
|
| 59 |
+
"Session location",
|
| 60 |
+
"Furniture size",
|
| 61 |
+
]
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
client_options=[
|
| 65 |
+
"David",
|
| 66 |
+
"John",
|
| 67 |
+
"Sam",
|
| 68 |
+
"Mike",
|
| 69 |
+
]
|
| 70 |
+
|
| 71 |
+
data_insights = """
|
| 72 |
+
- On 9/17, Mike seemed to be distracted by the lighting in the room, which could be the reason he had more approximate words than correct words.
|
| 73 |
+
- On 9/18, there was a level of outside noise present during the session due to weather. This was also where Mike’s incorrect words were at their highest.
|
| 74 |
+
- On 9/19, Mike performed the best he has yet!
|
| 75 |
+
- Between 9/20 - 9/22, Mike's approximate words have tapered out.
|
| 76 |
+
"""
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
subjective_report = """
|
| 80 |
+
Mike exhibited signs of stress at the start of the session due to the rainy weather, but he maintained engagement and participation throughout.
|
| 81 |
+
"""
|
| 82 |
+
|
| 83 |
+
objective_report = """
|
| 84 |
+
- Number of Answers Given: This child named one of the characters in the story as 'Bob.'
|
| 85 |
+
- Cues Required: Responded directly to the SLP’s prompt without requiring additional cues.
|
| 86 |
+
- Observed Behavior: Participation in the naming process indicates engagement, though the child's involvement appears less proactive compared to the middle child.
|
| 87 |
+
'Wh' Questions Addressed: Answered a 'who' question by providing a name for a character.
|
| 88 |
+
- Grammar and Complexity: The response was simple but appropriate for the task at hand.
|
| 89 |
+
- Visual Cues Used: It's not clear from the transcript if the child used visual cues, but the direct response to the SLP's question suggests attentiveness to verbal cues.
|
| 90 |
+
"""
|
| 91 |
+
|
| 92 |
+
assessment_report = """
|
| 93 |
+
His ability to produce /er/ words when presented with a phonemic cue has also improved from only making 20% of words in yesterday’s session.
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
plan_report = """
|
| 97 |
+
|
| 98 |
+
"""
|
newDemo.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import plotly.graph_objs as go
|
| 3 |
+
import numpy as np
|
| 4 |
+
import time
|
| 5 |
+
import google.generativeai as genai
|
| 6 |
+
from hardCodedData import *
|
| 7 |
+
from Helper import *
|
| 8 |
+
import google
|
| 9 |
+
|
| 10 |
+
'''
|
| 11 |
+
Model Information
|
| 12 |
+
Gemini 1.5 pro
|
| 13 |
+
'''
|
| 14 |
+
GOOGLE_API_KEY = "api"
|
| 15 |
+
genai.configure(api_key="AIzaSyC6msuJuuRiXTplyOzgnlZchpu5_olBXYs")
|
| 16 |
+
generation_config = genai.GenerationConfig(temperature=0.5)
|
| 17 |
+
|
| 18 |
+
# Model configuration
|
| 19 |
+
model = genai.GenerativeModel(
|
| 20 |
+
model_name='gemini-1.5-pro-latest',
|
| 21 |
+
system_instruction= """
|
| 22 |
+
You are an assistant chatbot for a Speech Language Pathologist (SLP).
|
| 23 |
+
Your task is to help analyze a provided video of a therapy session and answer questions accurately.
|
| 24 |
+
Provide timestamps for specific events or behaviors mentioned. Conclude each response with possible follow-up questions.
|
| 25 |
+
|
| 26 |
+
Follow these steps:
|
| 27 |
+
|
| 28 |
+
1. Suggest to the user to ask, “To get started, you can try asking me how many people there are in the video.”
|
| 29 |
+
2. Detect how many people are in the video.
|
| 30 |
+
2. Suggest to the user to tell you the names of the people in the video, starting from left to right.
|
| 31 |
+
3. After receiving the names, respond with, “Ok thank you! Now you can ask me any questions about this video.”
|
| 32 |
+
4. If the user asks about a behavior, respond with, “My understanding of this behavior is [xxx - AI generated output]. Is this a behavior that you want to track? If it is, please define this behavior and tell me more about it so I can analyze it more accurately according to your practice.”
|
| 33 |
+
5. If you receive names, confirm that these are the names of the people from left to right.
|
| 34 |
+
"""
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
'''
|
| 40 |
+
Video
|
| 41 |
+
'''
|
| 42 |
+
video_file = None
|
| 43 |
+
|
| 44 |
+
def handle_video(video=None):
|
| 45 |
+
global video_file
|
| 46 |
+
if video is None:
|
| 47 |
+
# Load example video
|
| 48 |
+
video = "./TEST.mp4"
|
| 49 |
+
isTest = True
|
| 50 |
+
|
| 51 |
+
video_file = genai.upload_file(path=video)
|
| 52 |
+
|
| 53 |
+
while video_file.state.name == "PROCESSING":
|
| 54 |
+
print(".", end="")
|
| 55 |
+
time.sleep(10)
|
| 56 |
+
video_file = genai.get_file(video_file.name)
|
| 57 |
+
|
| 58 |
+
if video_file.state.name == "FAILED":
|
| 59 |
+
raise ValueError(video_file.state.name)
|
| 60 |
+
|
| 61 |
+
if isTest:
|
| 62 |
+
return video
|
| 63 |
+
else:
|
| 64 |
+
return video_file
|
| 65 |
+
|
| 66 |
+
'''
|
| 67 |
+
Chatbot
|
| 68 |
+
'''
|
| 69 |
+
chat_history = []
|
| 70 |
+
def new_prompt(prompt):
|
| 71 |
+
global chat_history, video_file
|
| 72 |
+
|
| 73 |
+
# Append user prompt to chat history
|
| 74 |
+
chat_history.append({'role': 'user', 'parts': [prompt]})
|
| 75 |
+
try:
|
| 76 |
+
|
| 77 |
+
if video_file:
|
| 78 |
+
# Video exists and is processed
|
| 79 |
+
chat_history[-1]['parts'].extend([" from video: ", video_file])
|
| 80 |
+
response = model.generate_content(chat_history, request_options={"timeout": 600})
|
| 81 |
+
else:
|
| 82 |
+
# No video uploaded yet
|
| 83 |
+
response = model.generate_content(chat_history)
|
| 84 |
+
|
| 85 |
+
# Extract the text content from the response and append it to the chat history
|
| 86 |
+
assistant_message = response.candidates[0].content.parts[0].text
|
| 87 |
+
chat_history.append({'role': 'model', 'parts': [assistant_message]})
|
| 88 |
+
|
| 89 |
+
except google.api_core.exceptions.ResourceExhausted:
|
| 90 |
+
assistant_message = "API rate limit has been reached. Please wait a moment and try again."
|
| 91 |
+
chat_history.append({'role': 'model', 'parts': [assistant_message]})
|
| 92 |
+
except Exception as e:
|
| 93 |
+
assistant_message = f"An error occurred: {str(e)}"
|
| 94 |
+
chat_history.append({'role': 'model', 'parts': [assistant_message]})
|
| 95 |
+
|
| 96 |
+
return chat_history
|
| 97 |
+
|
| 98 |
+
def user_input(user_message, history):
|
| 99 |
+
return "", history + [[user_message, None]]
|
| 100 |
+
|
| 101 |
+
def bot_response(history):
|
| 102 |
+
user_message = history[-1][0]
|
| 103 |
+
print(history)
|
| 104 |
+
updated_history = new_prompt(user_message)
|
| 105 |
+
print(updated_history)
|
| 106 |
+
assistant_message = updated_history[-1]['parts'][0]
|
| 107 |
+
for i in range(len(assistant_message)):
|
| 108 |
+
time.sleep(0.05)
|
| 109 |
+
history[-1][1] = assistant_message[:i+1]
|
| 110 |
+
yield history
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
'''
|
| 114 |
+
Behaivor box
|
| 115 |
+
'''
|
| 116 |
+
initial_behaviors = [
|
| 117 |
+
("Initiating Behavioral Request (IBR)",
|
| 118 |
+
("The child's skill in using behavior(s) to elicit aid in obtaining an object, or object related event",
|
| 119 |
+
["00:10", "00:45", "01:30"])),
|
| 120 |
+
|
| 121 |
+
("Initiating Joint Attention (IJA)",
|
| 122 |
+
("The child's skill in using behavior(s) to initiate shared attention to objects or events.",
|
| 123 |
+
["00:15", "00:50", "01:40"])),
|
| 124 |
+
|
| 125 |
+
("Responding to Joint Attention (RJA)",
|
| 126 |
+
("The child's skill in following the examiner’s line of regard and pointing gestures.",
|
| 127 |
+
["00:20", "01:00", "02:00"])),
|
| 128 |
+
|
| 129 |
+
("Initiating Social Interaction (ISI)",
|
| 130 |
+
("The child's skill at initiating turn-taking sequences and the tendency to tease the tester",
|
| 131 |
+
["00:20", "00:50", "02:00"])),
|
| 132 |
+
|
| 133 |
+
("Responding to Social Interaction (RSI)",
|
| 134 |
+
("The child’s skill in responding to turn-taking interactions initiated by the examiner.",
|
| 135 |
+
["00:20", "01:00", "02:00"]))
|
| 136 |
+
]
|
| 137 |
+
|
| 138 |
+
behaviors = initial_behaviors
|
| 139 |
+
behavior_bank = []
|
| 140 |
+
|
| 141 |
+
def add_or_update_behavior(name, definition, timestamps, selected_behavior):
|
| 142 |
+
global behaviors, behavior_bank
|
| 143 |
+
if selected_behavior: # Update existing behavior
|
| 144 |
+
for i, (old_name, _) in enumerate(behaviors):
|
| 145 |
+
if old_name == selected_behavior:
|
| 146 |
+
behaviors[i] = (name, (definition, timestamps))
|
| 147 |
+
break
|
| 148 |
+
# Update behavior in the bank if it exists
|
| 149 |
+
behavior_bank = [name if b == selected_behavior else b for b in behavior_bank]
|
| 150 |
+
else: # Add new behavior
|
| 151 |
+
new_behavior = (name, (definition, timestamps))
|
| 152 |
+
behaviors.append(new_behavior)
|
| 153 |
+
choices = [b[0] for b in behaviors]
|
| 154 |
+
return gr.Dropdown(choices=choices, value=None, interactive=True), gr.CheckboxGroup(choices=behavior_bank, value=behavior_bank, interactive=True), "", "", ""
|
| 155 |
+
|
| 156 |
+
def add_to_behaivor_bank(selected_behavior, checkbox_group_values):
|
| 157 |
+
global behavior_bank
|
| 158 |
+
if selected_behavior and selected_behavior not in checkbox_group_values:
|
| 159 |
+
checkbox_group_values.append(selected_behavior)
|
| 160 |
+
behavior_bank = checkbox_group_values
|
| 161 |
+
return gr.CheckboxGroup(choices=checkbox_group_values, value=checkbox_group_values, interactive=True), gr.Dropdown(value=None,interactive=True)
|
| 162 |
+
|
| 163 |
+
def delete_behavior(selected_behavior, checkbox_group_values):
|
| 164 |
+
global behaviors, behavior_bank
|
| 165 |
+
behaviors = [b for b in behaviors if b[0] != selected_behavior]
|
| 166 |
+
behavior_bank = [b for b in behavior_bank if b != selected_behavior]
|
| 167 |
+
updated_choices = [b[0] for b in behaviors]
|
| 168 |
+
updated_checkbox_group = [cb for cb in checkbox_group_values if cb != selected_behavior]
|
| 169 |
+
return gr.Dropdown(choices=updated_choices, value=None, interactive=True), gr.CheckboxGroup(choices=updated_checkbox_group, value=updated_checkbox_group, interactive=True)
|
| 170 |
+
|
| 171 |
+
def edit_behavior(selected_behavior):
|
| 172 |
+
for name, (definition, timestamps) in behaviors:
|
| 173 |
+
if name == selected_behavior:
|
| 174 |
+
# Return values to populate textboxes
|
| 175 |
+
return name, definition, timestamps
|
| 176 |
+
return "", "", ""
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
welcome_message = """
|
| 180 |
+
Hello! I'm your AI assistant.
|
| 181 |
+
I can help you analyze your video sessions following your instructions.
|
| 182 |
+
To get started, please upload a video or add your behaviors to the Behavior Bank using the Behavior Manager.
|
| 183 |
+
"""
|
| 184 |
+
#If you want to tell me about the people in the video, please name them starting from left to right.
|
| 185 |
+
|
| 186 |
+
css="""
|
| 187 |
+
body {
|
| 188 |
+
background-color: #edf1fa; /* offwhite */
|
| 189 |
+
}
|
| 190 |
+
.gradio-container {
|
| 191 |
+
background-color: #edf1fa; /* offwhite */
|
| 192 |
+
}
|
| 193 |
+
.column-form .wrap {
|
| 194 |
+
flex-direction: column;
|
| 195 |
+
}
|
| 196 |
+
.sidebar {
|
| 197 |
+
background: #ffffff;
|
| 198 |
+
padding: 10px;
|
| 199 |
+
border-right: 1px solid #dee2e6;
|
| 200 |
+
}
|
| 201 |
+
.content {
|
| 202 |
+
padding: 10px;
|
| 203 |
+
}
|
| 204 |
+
"""
|
| 205 |
+
|
| 206 |
+
'''
|
| 207 |
+
Gradio Demo
|
| 208 |
+
'''
|
| 209 |
+
with gr.Blocks(theme='base', css=css, title="Soap.AI") as demo:
|
| 210 |
+
gr.Markdown("# 🤖 AI-Supported SOAP Generation")
|
| 211 |
+
|
| 212 |
+
with gr.Row():
|
| 213 |
+
with gr.Column():
|
| 214 |
+
video = gr.Video(label="Video", visible=True, height=360, container=True)
|
| 215 |
+
with gr.Row():
|
| 216 |
+
with gr.Column(min_width=1, scale=1):
|
| 217 |
+
video_upload_button = gr.Button("Analyze Video", variant="primary")
|
| 218 |
+
with gr.Column(min_width=1, scale=1):
|
| 219 |
+
example_video_button = gr.Button("Load Example Video")
|
| 220 |
+
|
| 221 |
+
video_upload_button.click(handle_video, inputs=video, outputs=video)
|
| 222 |
+
example_video_button.click(handle_video, None, outputs=video)
|
| 223 |
+
|
| 224 |
+
with gr.Column():
|
| 225 |
+
chat_section = gr.Group(visible=True)
|
| 226 |
+
with chat_section:
|
| 227 |
+
chatbot = gr.Chatbot(elem_id="chatbot",
|
| 228 |
+
container=True,
|
| 229 |
+
likeable=True,
|
| 230 |
+
value=[[None, welcome_message]],
|
| 231 |
+
avatar_images=(None, "./avatar.webp"))
|
| 232 |
+
with gr.Row():
|
| 233 |
+
txt = gr.Textbox(show_label=False, placeholder="Type here!")
|
| 234 |
+
with gr.Row():
|
| 235 |
+
send_btn = gr.Button("Send Message", elem_id="send-btn", variant="primary")
|
| 236 |
+
clear_btn = gr.Button("Clear Chat", elem_id="clear-btn")
|
| 237 |
+
|
| 238 |
+
with gr.Row():
|
| 239 |
+
behaivor_bank = gr.CheckboxGroup(label="Behavior Bank",
|
| 240 |
+
choices=[],
|
| 241 |
+
interactive=True,
|
| 242 |
+
info="A space to store all the behaviors you want to analyze.")
|
| 243 |
+
open_sidebar_btn = gr.Button("Show Behavior Manager", scale=0)
|
| 244 |
+
close_sidebar_btn = gr.Button("Hide Behavior Manager", visible=False, scale=0)
|
| 245 |
+
|
| 246 |
+
txt.submit(user_input, [txt, chatbot], [txt, chatbot], queue=False).then(
|
| 247 |
+
bot_response, chatbot, chatbot)
|
| 248 |
+
send_btn.click(user_input, [txt, chatbot], [txt, chatbot], queue=False).then(
|
| 249 |
+
bot_response, chatbot, chatbot)
|
| 250 |
+
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
| 251 |
+
|
| 252 |
+
# Define a sidebar column that is initially hidden
|
| 253 |
+
with gr.Column(visible=False, min_width=200, scale=0.5, elem_classes="sidebar") as sidebar:
|
| 254 |
+
behavior_dropdown = gr.Dropdown(label="Behavior Collection",
|
| 255 |
+
choices=behaviors,
|
| 256 |
+
interactive=True,
|
| 257 |
+
container=True,
|
| 258 |
+
elem_classes="column-form",
|
| 259 |
+
info="Choose a behavior to add to the bank, edit or remove.")
|
| 260 |
+
with gr.Row():
|
| 261 |
+
add_toBank_button = gr.Button("Add Behavior to Bank", variant="primary")
|
| 262 |
+
edit_button = gr.Button("Edit Behavior")
|
| 263 |
+
delete_button = gr.Button("Remove Behavior")
|
| 264 |
+
|
| 265 |
+
with gr.Row():
|
| 266 |
+
name_input = gr.Textbox(label="Behavior Name",
|
| 267 |
+
placeholder="(e.g., IBR)",
|
| 268 |
+
info="The name you give to the specific behavior you're tracking or analyzing.")
|
| 269 |
+
timestamps_input = gr.Textbox(label="Timestamps MM:SS",
|
| 270 |
+
placeholder="(e.g., (01:15,01:35) )",
|
| 271 |
+
info="The exact times during a session when you saw the behavior. The first two digits represent minutes and the last two digits represent seconds.")
|
| 272 |
+
definition_input = gr.Textbox(lines=3,
|
| 273 |
+
label="Behavior Definition",
|
| 274 |
+
placeholder="(e.g., the child's skill in using behavior(s) to elicit aid in obtaining an object, or object related event)",
|
| 275 |
+
info="Provide a clear definition of the behavior.")
|
| 276 |
+
|
| 277 |
+
with gr.Row():
|
| 278 |
+
submit_button = gr.Button("Save Behavior", variant="primary")
|
| 279 |
+
|
| 280 |
+
submit_button.click(fn=add_or_update_behavior,
|
| 281 |
+
inputs=[name_input, definition_input, timestamps_input, behavior_dropdown],
|
| 282 |
+
outputs=[behavior_dropdown, behaivor_bank, name_input, definition_input, timestamps_input])
|
| 283 |
+
|
| 284 |
+
add_toBank_button.click(fn=add_to_behaivor_bank,
|
| 285 |
+
inputs=[behavior_dropdown, behaivor_bank],
|
| 286 |
+
outputs=[behaivor_bank, behavior_dropdown])
|
| 287 |
+
|
| 288 |
+
delete_button.click(fn=delete_behavior,
|
| 289 |
+
inputs=[behavior_dropdown, behaivor_bank],
|
| 290 |
+
outputs=[behavior_dropdown, behaivor_bank])
|
| 291 |
+
|
| 292 |
+
edit_button.click(fn=edit_behavior,
|
| 293 |
+
inputs=[behavior_dropdown],
|
| 294 |
+
outputs=[name_input, definition_input, timestamps_input])
|
| 295 |
+
|
| 296 |
+
# Function to open the sidebar
|
| 297 |
+
open_sidebar_btn.click(lambda: {
|
| 298 |
+
open_sidebar_btn: gr.Button(visible=False),
|
| 299 |
+
close_sidebar_btn: gr.Button(visible=True),
|
| 300 |
+
sidebar: gr.Column(visible=True)
|
| 301 |
+
}, outputs=[open_sidebar_btn, close_sidebar_btn, sidebar])
|
| 302 |
+
|
| 303 |
+
# Function to close the sidebar
|
| 304 |
+
close_sidebar_btn.click(lambda: {
|
| 305 |
+
open_sidebar_btn: gr.Button(visible=True),
|
| 306 |
+
close_sidebar_btn: gr.Button(visible=False),
|
| 307 |
+
sidebar: gr.Column(visible=False)
|
| 308 |
+
}, outputs=[open_sidebar_btn, close_sidebar_btn, sidebar])
|
| 309 |
+
|
| 310 |
+
# Launch the demo
|
| 311 |
+
demo.launch(share=True)
|
requirements.txt
CHANGED
|
@@ -1 +1,166 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiofiles==23.2.1
|
| 2 |
+
aiohttp==3.9.5
|
| 3 |
+
aiosignal==1.3.1
|
| 4 |
+
alembic==1.13.2
|
| 5 |
+
altair==5.3.0
|
| 6 |
+
annotated-types==0.7.0
|
| 7 |
+
antlr4-python3-runtime==4.9.3
|
| 8 |
+
anyio==4.4.0
|
| 9 |
+
asteroid-filterbanks==0.4.0
|
| 10 |
+
async-timeout==4.0.3
|
| 11 |
+
attrs==23.2.0
|
| 12 |
+
audioread==3.0.1
|
| 13 |
+
av==11.0.0
|
| 14 |
+
beautifulsoup4==4.12.3
|
| 15 |
+
certifi==2024.7.4
|
| 16 |
+
cffi==1.16.0
|
| 17 |
+
charset-normalizer==3.3.2
|
| 18 |
+
click==8.1.7
|
| 19 |
+
colorama==0.4.6
|
| 20 |
+
coloredlogs==15.0.1
|
| 21 |
+
colorlog==6.8.2
|
| 22 |
+
contourpy==1.2.1
|
| 23 |
+
ctranslate2==4.3.1
|
| 24 |
+
cycler==0.12.1
|
| 25 |
+
decorator==4.4.2
|
| 26 |
+
distro==1.9.0
|
| 27 |
+
dnspython==2.6.1
|
| 28 |
+
docopt==0.6.2
|
| 29 |
+
einops==0.8.0
|
| 30 |
+
email_validator==2.2.0
|
| 31 |
+
exceptiongroup==1.2.1
|
| 32 |
+
fastapi==0.111.0
|
| 33 |
+
fastapi-cli==0.0.4
|
| 34 |
+
faster-whisper==1.0.0
|
| 35 |
+
ffmpy==0.3.2
|
| 36 |
+
filelock==3.15.4
|
| 37 |
+
flatbuffers==24.3.25
|
| 38 |
+
fonttools==4.53.1
|
| 39 |
+
frozenlist==1.4.1
|
| 40 |
+
fsspec==2024.6.1
|
| 41 |
+
google==3.0.0
|
| 42 |
+
gradio==4.37.2
|
| 43 |
+
gradio_client==1.0.2
|
| 44 |
+
greenlet==3.0.3
|
| 45 |
+
h11==0.14.0
|
| 46 |
+
httpcore==1.0.5
|
| 47 |
+
httptools==0.6.1
|
| 48 |
+
httpx==0.27.0
|
| 49 |
+
huggingface-hub==0.23.4
|
| 50 |
+
humanfriendly==10.0
|
| 51 |
+
HyperPyYAML==1.2.2
|
| 52 |
+
idna==3.7
|
| 53 |
+
imageio==2.34.2
|
| 54 |
+
imageio-ffmpeg==0.5.1
|
| 55 |
+
importlib_resources==6.4.0
|
| 56 |
+
intel-openmp==2021.4.0
|
| 57 |
+
Jinja2==3.1.4
|
| 58 |
+
joblib==1.4.2
|
| 59 |
+
jsonschema==4.23.0
|
| 60 |
+
jsonschema-specifications==2023.12.1
|
| 61 |
+
julius==0.2.7
|
| 62 |
+
kiwisolver==1.4.5
|
| 63 |
+
lazy_loader==0.4
|
| 64 |
+
librosa==0.10.2.post1
|
| 65 |
+
lightning==2.3.3
|
| 66 |
+
lightning-utilities==0.11.3.post0
|
| 67 |
+
llvmlite==0.43.0
|
| 68 |
+
Mako==1.3.5
|
| 69 |
+
markdown-it-py==3.0.0
|
| 70 |
+
MarkupSafe==2.1.5
|
| 71 |
+
matplotlib==3.9.1
|
| 72 |
+
mdurl==0.1.2
|
| 73 |
+
mkl==2021.4.0
|
| 74 |
+
moviepy==1.0.3
|
| 75 |
+
mpmath==1.3.0
|
| 76 |
+
msgpack==1.0.8
|
| 77 |
+
multidict==6.0.5
|
| 78 |
+
networkx==3.2.1
|
| 79 |
+
nltk==3.8.1
|
| 80 |
+
numba==0.60.0
|
| 81 |
+
numpy==1.26.4
|
| 82 |
+
omegaconf==2.3.0
|
| 83 |
+
onnxruntime==1.18.1
|
| 84 |
+
openai==1.35.12
|
| 85 |
+
opencv-python==4.10.0.84
|
| 86 |
+
optuna==3.6.1
|
| 87 |
+
orjson==3.10.6
|
| 88 |
+
packaging==24.1
|
| 89 |
+
pandas==2.2.2
|
| 90 |
+
pillow==10.4.0
|
| 91 |
+
platformdirs==4.2.2
|
| 92 |
+
plotly==5.22.0
|
| 93 |
+
pooch==1.8.2
|
| 94 |
+
primePy==1.3
|
| 95 |
+
proglog==0.1.10
|
| 96 |
+
protobuf==5.27.2
|
| 97 |
+
pyannote.audio==3.1.1
|
| 98 |
+
pyannote.core==5.0.0
|
| 99 |
+
pyannote.database==5.1.0
|
| 100 |
+
pyannote.metrics==3.2.1
|
| 101 |
+
pyannote.pipeline==3.0.1
|
| 102 |
+
pycparser==2.22
|
| 103 |
+
pydantic==2.8.2
|
| 104 |
+
pydantic_core==2.20.1
|
| 105 |
+
pydub==0.25.1
|
| 106 |
+
Pygments==2.18.0
|
| 107 |
+
pyparsing==3.1.2
|
| 108 |
+
pyreadline3==3.4.1
|
| 109 |
+
python-dateutil==2.9.0.post0
|
| 110 |
+
python-dotenv==1.0.1
|
| 111 |
+
python-multipart==0.0.9
|
| 112 |
+
pytorch-lightning==2.3.3
|
| 113 |
+
pytorch-metric-learning==2.5.0
|
| 114 |
+
pytz==2024.1
|
| 115 |
+
PyYAML==6.0.1
|
| 116 |
+
referencing==0.35.1
|
| 117 |
+
regex==2024.5.15
|
| 118 |
+
requests==2.32.3
|
| 119 |
+
rich==13.7.1
|
| 120 |
+
rpds-py==0.19.0
|
| 121 |
+
ruamel.yaml==0.18.6
|
| 122 |
+
ruamel.yaml.clib==0.2.8
|
| 123 |
+
ruff==0.5.1
|
| 124 |
+
safetensors==0.4.3
|
| 125 |
+
scikit-learn==1.5.1
|
| 126 |
+
scipy==1.13.1
|
| 127 |
+
semantic-version==2.10.0
|
| 128 |
+
semver==3.0.2
|
| 129 |
+
sentencepiece==0.2.0
|
| 130 |
+
shellingham==1.5.4
|
| 131 |
+
six==1.16.0
|
| 132 |
+
sniffio==1.3.1
|
| 133 |
+
sortedcontainers==2.4.0
|
| 134 |
+
soundfile==0.12.1
|
| 135 |
+
soupsieve==2.5
|
| 136 |
+
soxr==0.3.7
|
| 137 |
+
speechbrain==1.0.0
|
| 138 |
+
SQLAlchemy==2.0.31
|
| 139 |
+
starlette==0.37.2
|
| 140 |
+
sympy==1.13.0
|
| 141 |
+
tabulate==0.9.0
|
| 142 |
+
tbb==2021.13.0
|
| 143 |
+
tenacity==8.5.0
|
| 144 |
+
tensorboardX==2.6.2.2
|
| 145 |
+
threadpoolctl==3.5.0
|
| 146 |
+
tokenizers==0.15.2
|
| 147 |
+
tomlkit==0.12.0
|
| 148 |
+
toolz==0.12.1
|
| 149 |
+
torch==2.3.1
|
| 150 |
+
torch-audiomentations==0.11.1
|
| 151 |
+
torch-pitch-shift==1.2.4
|
| 152 |
+
torchaudio==2.3.1
|
| 153 |
+
torchmetrics==1.4.0.post0
|
| 154 |
+
tqdm==4.66.4
|
| 155 |
+
transformers==4.39.3
|
| 156 |
+
typer==0.12.3
|
| 157 |
+
typing_extensions==4.12.2
|
| 158 |
+
tzdata==2024.1
|
| 159 |
+
ujson==5.10.0
|
| 160 |
+
urllib3==2.2.2
|
| 161 |
+
uvicorn==0.30.1
|
| 162 |
+
watchfiles==0.22.0
|
| 163 |
+
websockets==11.0.3
|
| 164 |
+
whisperx==3.1.1
|
| 165 |
+
yarl==1.9.4
|
| 166 |
+
zipp==3.19.2
|
style.css
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
body {
|
| 2 |
+
font-family: Arial, sans-serif;
|
| 3 |
+
color: #000; /* Set default text color to black */
|
| 4 |
+
}
|
| 5 |
+
|
| 6 |
+
.gradio-container {
|
| 7 |
+
background-color: #1a3e63; /* Background color similar to the image */
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
.gradio-block, .gradio-column, .gradio-row {
|
| 11 |
+
background-color: #fff; /* White background for blocks */
|
| 12 |
+
border-radius: 10px; /* Rounded corners */
|
| 13 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); /* Subtle shadow for depth */
|
| 14 |
+
margin: 10px;
|
| 15 |
+
padding: 20px;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
.gradio-markdown {
|
| 19 |
+
color: #000; /* Text color set to black */
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
.gradio-button {
|
| 23 |
+
background-color: #4CAF50; /* Button color */
|
| 24 |
+
color: white; /* Button text color */
|
| 25 |
+
border: none;
|
| 26 |
+
padding: 10px 20px;
|
| 27 |
+
text-align: center;
|
| 28 |
+
text-decoration: none;
|
| 29 |
+
display: inline-block;
|
| 30 |
+
font-size: 16px;
|
| 31 |
+
margin: 4px 2px;
|
| 32 |
+
cursor: pointer;
|
| 33 |
+
border-radius: 5px; /* Rounded corners */
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
.gradio-checkbox-group, .gradio-checkbox {
|
| 37 |
+
color: #000; /* Text color set to black */
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
.gradio-textbox {
|
| 41 |
+
color: #000; /* Text color set to black */
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
.gradio-header {
|
| 45 |
+
color: #000; /* Text color set to black */
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
.gradio-container p, .gradio-container h1, .gradio-container h2, .gradio-container h3, .gradio-container h4, .gradio-container h5, .gradio-container h6 {
|
| 49 |
+
color: #000; /* Ensure all header and paragraph texts are black */
|
| 50 |
+
}
|