File size: 13,110 Bytes
98fe990 e799afb 98fe990 e799afb 98fe990 e799afb 98fe990 e799afb 98fe990 e799afb 98fe990 e799afb 98fe990 e799afb 98fe990 e799afb 98fe990 e799afb 98fe990 e799afb 98fe990 e799afb 98fe990 e799afb 98fe990 e799afb 98fe990 0eeece7 98fe990 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 | import openai
import whisper
from io import BytesIO
import os
import sys
import pytube
from moviepy.editor import VideoFileClip
import moviepy.editor as movpy
from moviepy.editor import AudioFileClip
from pydub import AudioSegment
from pytube import YouTube
import gradio as gr
import collections
from gtts import gTTS
##############################################################################
openai.api_key = "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"
#create gradio app
class NamedBytesIO(BytesIO):
def __init__(self, *args, **kwargs):
self._name = kwargs.pop('name', 'unnamed.mp3')
super(NamedBytesIO, self).__init__(*args, **kwargs)
@property
def name(self):
return self._name
##############################################################################
def transcribe_audio(audio_file_path):
model_id = 'whisper-1'
# audio_file_path = "C:\VOLUNTEER\TOASTMASTER\Tell me about yourself - bad graduate job interview answer tell me about yourselfout.mp3"
audio_file_path = audio_file_path
audio = AudioSegment.from_file(audio_file_path, format="mp3")
audio_duration = int(audio.duration_seconds)
print(f"Duration of the audio is {audio_duration} seconds")
chunk_duration = 60 * 1000 * 20 # 20 minute in milliseconds
transcriptions = []
for i in range(0, audio_duration, chunk_duration):
chunk = audio[i:i + chunk_duration]
chunk_buffer = NamedBytesIO(name='chunk.mp3')
chunk.export(chunk_buffer, format="mp3")
chunk_buffer.seek(0)
response = openai.Audio.transcribe(
api_key=openai.api_key,
model=model_id,
file=chunk_buffer,
prompt="""
Dont remove any filler words in transcribe. eg: um, umm, uh, ah, er, mhm, hmm, mm, mmm, oh, ohh, let me think like, Okay, ok, here's what I'm, like, thinking, you know, well, so, actually, basically, literally, right, i mean,anyway...
This is human voices with various tone and accents.
1) Transcibe every words and all signals, dont skip any
2) Dont correct the grammar
3) Dont correct the spelling
4) Dont remove any redundant words or punctuations
5) if there is a pause in the audio, please add a comma(,) in the transcribe
6) if there is a long pause in the audio, please add a period(.) in the transcribe
"""
)
transcriptions.append(response['text'])
final_transcription = ' '.join(transcriptions)
print(final_transcription.replace('.', '.\n'))
return final_transcription
#########################################################################################
def get_feedback(audio_file_path):
transcribed_text = transcribe_audio(audio_file_path)
# response = final_transcription
prompt = f"""
Pretend that you are an interview coach with 25 years of experience.
Evaluate the following response based on the evaluation criteria:
# please provide feedback on the following response based on the evaluation criteria:
# language, tone & personality, and a 5-point system for clarity, vocal variety, comfort level, interest, and well-supported content.
Evaluation criteria:
A) Language: Score 1 - 10. the more score the better.
The higher usage of filler words, redundant words, jargons will decrease the score
1. use of Filler words: count the filler words and list them: eg: um, umm, uh, ah, er, mhm, hmm, mm, mmm, oh, ohh, let me think like, Okay, ok, here's what I'm, like, thinking, you know, well, so, actually, basically, literally, right, i mean,anyway...
example: um: 3, yeah: 2
2. use of redundant words: list them: example: sum total, joint collaboration, unexpected surprise, future plans, new record...etc
example: sum total: 2, joint collaboration: 1
3. use of jargons: list them
B) Tone & personality: Score 1 - 15: the more score the better.
The better of the tone and personality, the higher score, Relevance, Clarity, Specificity, Confidence, Fit
will increase the score
1. Tone: What is the Tone? Choose top one from the following: and just report the headline
A. Positive/optimistic: Expressing a hopeful or upbeat outlook, such as excitement, joy, or satisfaction. Example: "I'm really looking forward to this vacation!"
B. Negative/pessimistic: Expressing a negative or gloomy outlook, such as sadness, frustration, or disappointment. Example: "I don't think I can handle any more bad news."
C. Neutral/objective: Expressing an unbiased or factual outlook, without any emotion or bias. Example: "The temperature is 75 degrees and the sky is clear."
D. Sarcastic: Expressing a tone of mockery or irony, often with the opposite meaning of what is being said. Example: "Oh great, another Monday morning. Just what I needed."
E. Formal/polite: Using formal language and expressions to show respect and politeness. Example: "I would be most grateful if you could provide me with further information."
F. Informal/casual: Using casual language and expressions to show familiarity and informality. Example: "Hey, what's up? Wanna hang out later?"
G. Authoritative: Expressing a tone of authority or control, such as in instructions or commands. Example: "You need to follow these procedures precisely to ensure safety."
H. Condescending: Expressing a tone of superiority or patronization towards others.
2. Personality traits: Insights into the personality traits? Choose top one from the following: and just report the headline
A. Extraversion: Extraverted individuals tend to be outgoing, sociable, and talkative.
B. Introversion: Introverted individuals tend to be more reserved, reflective, and introspective.
C. Conscientiousness: Conscientious individuals tend to be organized, responsible, and diligent.
D. Agreeableness: Agreeable individuals tend to be friendly, cooperative, and empathetic.
E. Anxious / Sensitive : Individuals tend to be anxious, sensitive, and easily stressed.
3. Relevance: Does the response address the question asked and provide relevant information?
4. Clarity: Is the response clear and concise?
5. Specificity / examples or details: Does the response provide specific examples or details that demonstrate the interviewee's skills, experiences, or qualifications?
6. Confidence: Does the interviewee present themselves confidently and effectively communicate their ideas?
7. Fit: Does the response demonstrate how the interviewee's skills, experiences, or qualifications align with the job requirement?
C) Communication metrics: Score 1 - 25: The user to be evaluated on the following criteria with a 3 point system:
Higher the points better the score
1. Clarity: Spoken language is clear and is easily understood Comment
3 Is an exemplary Interviewee who is always understood.
2 Spoken language is clear and is easily understood
1 Spoken language is unclear or not easily understood
2. Comfort Level
3 Appears completely self-assured with the Interviewer
2 Appears comfortable with the Interviewer
1 Appears highly uncomfortable with the Interviewer
3. Interesting content: Engages Interviewer with interesting, well-constructed content Comment
3 Fully engages Interviewer with exemplary, well constructed content
2 Engages Interviewer with interesting, well constructed content
1 Content is neither interesting nor well-constructed
4. Well Supported: Speech content is well-supported and sources are available if requested
3 Delivers exemplary speech with a topic that is well-supported by content of the speech
2 Speech topic is well-supported by content of speech
1 Speech content is unrelated to the topic of the speech
Assess the response and give your honest feedback.
A) Language
B) Tone & personality
C) 3 point evaluation
D) Overall Score = sum of points scored in A + B + C
Put in top 3 bullet points for feedback + 3 bullet points on how to improve it.
example output:
" Feedback: Overall Score = 5/50
A) Language: Score: 5/10
1. Filler words: um: 3, yeah: 2
2. Redundant words: sum total: 2, joint collaboration: 1
3. Jargons: list them
B) Tone & personality: Score: 2/15
1. Tone: Positive/optimistic
2. Personality traits: Extraversion
3. Relevance: Yes
4. Clarity: Yes
5. Specificity / examples or details: Yes
6. Confidence: Yes
7. Fit: Yes
C) 3 points evaluation: Score: 3/25
1. Clarity: 2
2. Comfort Level: 3
3. Interesting content: 2
4. Well Supported: 3
Suggestions to improve:
1. You used a lot of filler words. Try to avoid them.
2. You used a lot of redundant words. Try to avoid them.
3. You used a lot of jargons. Try to avoid them.
4. Improve your tone. Try to be more positive.
5. Improve your personality. Try to be more extraverted.
6. Improve your clarity. Try to be more clear.
..etc
"
Response:
{transcribed_text}
"""
# Call the OpenAI API
api_response = openai.Completion.create(
engine="text-davinci-003", # Use "text-davinci-002" for GPT-3.5, replace with the appropriate engine name for GPT-4 if available
prompt=prompt,
temperature=0,
max_tokens=500,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=0.0
)
# Extract and return the generated feedback
feedback = api_response.choices[0].text.strip()
#########################################################################################
prompt = f"""
Summarize the interviewee's response to the question in 3 - 5 bullet points.
start the summary with a positive note.
make it more professional and friendly.
you should give feedback so that the interviewee can improve their response.
Response:
{feedback}
"""
# Call the OpenAI API
summary_response = openai.Completion.create(
engine="text-davinci-003", # Use "text-davinci-002" for GPT-3.5, replace with the appropriate engine name for GPT-4 if available
prompt=prompt,
temperature=0,
max_tokens=500,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=0.0
)
# Extract and return the generated feedback
summary_response_out = summary_response.choices[0].text.strip()
##################################################################
language = "en"
audioobj = gTTS(text = summary_response_out,
lang = language,
slow = False)
audioobj.save("Temp.mp3")
return [feedback, summary_response_out, transcribed_text, "Temp.mp3"]
# iface = gr.Interface(
# fn=get_feedback,
# inputs=gr.inputs.Textbox(lines=10, label="Interviewee audio file path"),
# outputs=[
# gr.outputs.Textbox(label="Feedback"),
# gr.outputs.Textbox(label="Transcription")
# ],
# title="Interview Feedback",
# examples=[
# "Tell me about yourself - bad graduate job interview answer tell me about yourselfout.mp3",
# "Tell me about yourself - bad entrepreneur answer to tell me about yourselfout.mp3"
# ],
# description="Get feedback on your interview response from an AI interview coach.",
# layout="vertical"
# )
# iface.launch()
# Create a Gradio interface
# the layout should be input, examples, feedback, transcription
# the input should be a textbox
# the examples should be a list of audio files
# the output should be a textbox for feedback and a textbox for transcription
iface = gr.Interface(
fn=get_feedback,
inputs=gr.inputs.Textbox(lines=1, label="Interviewee audio file path"),
examples=[
"Tell me about yourself - bad graduate job interview answer tell me about yourselfout.mp3",
"Tell me about yourself - bad entrepreneur answer to tell me about yourselfout.mp3"
],
outputs=[
gr.outputs.Textbox(label="Feedback"),
gr.outputs.Textbox(label="Summary response"),
gr.outputs.Textbox(label="Transcription"),
gr.Audio("Temp.mp3", label="Speech Output")
],
title="Interview Feedback",
description="Get feedback on your interview response from an AI interview coach.",
layout="vertical"
)
iface.launch()
######################################################################################### |