| | import openai |
| | import whisper |
| | from io import BytesIO |
| | import os |
| | import sys |
| | import pytube |
| | from moviepy.editor import VideoFileClip |
| | import moviepy.editor as movpy |
| | from moviepy.editor import AudioFileClip |
| | from pydub import AudioSegment |
| | from pytube import YouTube |
| | import gradio as gr |
| | import collections |
| | from gtts import gTTS |
| |
|
| | |
| | openai.api_key = "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" |
| | |
| | class NamedBytesIO(BytesIO): |
| | def __init__(self, *args, **kwargs): |
| | self._name = kwargs.pop('name', 'unnamed.mp3') |
| | super(NamedBytesIO, self).__init__(*args, **kwargs) |
| |
|
| | @property |
| | def name(self): |
| | return self._name |
| |
|
| | |
| | def transcribe_audio(audio_file_path): |
| | model_id = 'whisper-1' |
| |
|
| | |
| | audio_file_path = audio_file_path |
| | audio = AudioSegment.from_file(audio_file_path, format="mp3") |
| |
|
| | audio_duration = int(audio.duration_seconds) |
| |
|
| | print(f"Duration of the audio is {audio_duration} seconds") |
| | chunk_duration = 60 * 1000 * 20 |
| |
|
| | transcriptions = [] |
| |
|
| | for i in range(0, audio_duration, chunk_duration): |
| | chunk = audio[i:i + chunk_duration] |
| | chunk_buffer = NamedBytesIO(name='chunk.mp3') |
| | chunk.export(chunk_buffer, format="mp3") |
| | chunk_buffer.seek(0) |
| | |
| | response = openai.Audio.transcribe( |
| | api_key=openai.api_key, |
| | model=model_id, |
| | file=chunk_buffer, |
| | prompt=""" |
| | Dont remove any filler words in transcribe. eg: um, umm, uh, ah, er, mhm, hmm, mm, mmm, oh, ohh, let me think like, Okay, ok, here's what I'm, like, thinking, you know, well, so, actually, basically, literally, right, i mean,anyway... |
| | This is human voices with various tone and accents. |
| | 1) Transcibe every words and all signals, dont skip any |
| | 2) Dont correct the grammar |
| | 3) Dont correct the spelling |
| | 4) Dont remove any redundant words or punctuations |
| | 5) if there is a pause in the audio, please add a comma(,) in the transcribe |
| | 6) if there is a long pause in the audio, please add a period(.) in the transcribe |
| | """ |
| | ) |
| | |
| | transcriptions.append(response['text']) |
| |
|
| | final_transcription = ' '.join(transcriptions) |
| |
|
| | print(final_transcription.replace('.', '.\n')) |
| |
|
| | return final_transcription |
| | |
| |
|
| | def get_feedback(audio_file_path): |
| |
|
| | transcribed_text = transcribe_audio(audio_file_path) |
| | |
| |
|
| | prompt = f""" |
| | Pretend that you are an interview coach with 25 years of experience. |
| | Evaluate the following response based on the evaluation criteria: |
| | # please provide feedback on the following response based on the evaluation criteria: |
| | # language, tone & personality, and a 5-point system for clarity, vocal variety, comfort level, interest, and well-supported content. |
| | |
| | Evaluation criteria: |
| | A) Language: Score 1 - 10. the more score the better. |
| | The higher usage of filler words, redundant words, jargons will decrease the score |
| | 1. use of Filler words: count the filler words and list them: eg: um, umm, uh, ah, er, mhm, hmm, mm, mmm, oh, ohh, let me think like, Okay, ok, here's what I'm, like, thinking, you know, well, so, actually, basically, literally, right, i mean,anyway... |
| | example: um: 3, yeah: 2 |
| | 2. use of redundant words: list them: example: sum total, joint collaboration, unexpected surprise, future plans, new record...etc |
| | example: sum total: 2, joint collaboration: 1 |
| | 3. use of jargons: list them |
| | |
| | B) Tone & personality: Score 1 - 15: the more score the better. |
| | The better of the tone and personality, the higher score, Relevance, Clarity, Specificity, Confidence, Fit |
| | will increase the score |
| | |
| | 1. Tone: What is the Tone? Choose top one from the following: and just report the headline |
| | A. Positive/optimistic: Expressing a hopeful or upbeat outlook, such as excitement, joy, or satisfaction. Example: "I'm really looking forward to this vacation!" |
| | B. Negative/pessimistic: Expressing a negative or gloomy outlook, such as sadness, frustration, or disappointment. Example: "I don't think I can handle any more bad news." |
| | C. Neutral/objective: Expressing an unbiased or factual outlook, without any emotion or bias. Example: "The temperature is 75 degrees and the sky is clear." |
| | D. Sarcastic: Expressing a tone of mockery or irony, often with the opposite meaning of what is being said. Example: "Oh great, another Monday morning. Just what I needed." |
| | E. Formal/polite: Using formal language and expressions to show respect and politeness. Example: "I would be most grateful if you could provide me with further information." |
| | F. Informal/casual: Using casual language and expressions to show familiarity and informality. Example: "Hey, what's up? Wanna hang out later?" |
| | G. Authoritative: Expressing a tone of authority or control, such as in instructions or commands. Example: "You need to follow these procedures precisely to ensure safety." |
| | H. Condescending: Expressing a tone of superiority or patronization towards others. |
| | 2. Personality traits: Insights into the personality traits? Choose top one from the following: and just report the headline |
| | A. Extraversion: Extraverted individuals tend to be outgoing, sociable, and talkative. |
| | B. Introversion: Introverted individuals tend to be more reserved, reflective, and introspective. |
| | C. Conscientiousness: Conscientious individuals tend to be organized, responsible, and diligent. |
| | D. Agreeableness: Agreeable individuals tend to be friendly, cooperative, and empathetic. |
| | E. Anxious / Sensitive : Individuals tend to be anxious, sensitive, and easily stressed. |
| | 3. Relevance: Does the response address the question asked and provide relevant information? |
| | 4. Clarity: Is the response clear and concise? |
| | 5. Specificity / examples or details: Does the response provide specific examples or details that demonstrate the interviewee's skills, experiences, or qualifications? |
| | 6. Confidence: Does the interviewee present themselves confidently and effectively communicate their ideas? |
| | 7. Fit: Does the response demonstrate how the interviewee's skills, experiences, or qualifications align with the job requirement? |
| | |
| | C) Communication metrics: Score 1 - 25: The user to be evaluated on the following criteria with a 3 point system: |
| | Higher the points better the score |
| | 1. Clarity: Spoken language is clear and is easily understood Comment |
| | |
| | 3 Is an exemplary Interviewee who is always understood. |
| | 2 Spoken language is clear and is easily understood |
| | 1 Spoken language is unclear or not easily understood |
| | |
| | 2. Comfort Level |
| | |
| | 3 Appears completely self-assured with the Interviewer |
| | 2 Appears comfortable with the Interviewer |
| | 1 Appears highly uncomfortable with the Interviewer |
| | |
| | 3. Interesting content: Engages Interviewer with interesting, well-constructed content Comment |
| | |
| | 3 Fully engages Interviewer with exemplary, well constructed content |
| | 2 Engages Interviewer with interesting, well constructed content |
| | 1 Content is neither interesting nor well-constructed |
| | |
| | 4. Well Supported: Speech content is well-supported and sources are available if requested |
| | |
| | 3 Delivers exemplary speech with a topic that is well-supported by content of the speech |
| | 2 Speech topic is well-supported by content of speech |
| | 1 Speech content is unrelated to the topic of the speech |
| | |
| | Assess the response and give your honest feedback. |
| | A) Language |
| | B) Tone & personality |
| | C) 3 point evaluation |
| | D) Overall Score = sum of points scored in A + B + C |
| | |
| | Put in top 3 bullet points for feedback + 3 bullet points on how to improve it. |
| | |
| | example output: |
| | " Feedback: Overall Score = 5/50 |
| | |
| | A) Language: Score: 5/10 |
| | 1. Filler words: um: 3, yeah: 2 |
| | 2. Redundant words: sum total: 2, joint collaboration: 1 |
| | 3. Jargons: list them |
| | |
| | B) Tone & personality: Score: 2/15 |
| | 1. Tone: Positive/optimistic |
| | 2. Personality traits: Extraversion |
| | 3. Relevance: Yes |
| | 4. Clarity: Yes |
| | 5. Specificity / examples or details: Yes |
| | 6. Confidence: Yes |
| | 7. Fit: Yes |
| | |
| | C) 3 points evaluation: Score: 3/25 |
| | 1. Clarity: 2 |
| | 2. Comfort Level: 3 |
| | 3. Interesting content: 2 |
| | 4. Well Supported: 3 |
| | |
| | Suggestions to improve: |
| | 1. You used a lot of filler words. Try to avoid them. |
| | 2. You used a lot of redundant words. Try to avoid them. |
| | 3. You used a lot of jargons. Try to avoid them. |
| | 4. Improve your tone. Try to be more positive. |
| | 5. Improve your personality. Try to be more extraverted. |
| | 6. Improve your clarity. Try to be more clear. |
| | ..etc |
| | |
| | " |
| | |
| | Response: |
| | {transcribed_text} |
| | """ |
| |
|
| | |
| | api_response = openai.Completion.create( |
| | engine="text-davinci-003", |
| | prompt=prompt, |
| | temperature=0, |
| | max_tokens=500, |
| | top_p=1.0, |
| | frequency_penalty=0.0, |
| | presence_penalty=0.0 |
| | ) |
| |
|
| | |
| | feedback = api_response.choices[0].text.strip() |
| | |
| |
|
| | prompt = f""" |
| | Summarize the interviewee's response to the question in 3 - 5 bullet points. |
| | start the summary with a positive note. |
| | make it more professional and friendly. |
| | you should give feedback so that the interviewee can improve their response. |
| | |
| | |
| | Response: |
| | {feedback} |
| | """ |
| |
|
| | |
| | summary_response = openai.Completion.create( |
| | engine="text-davinci-003", |
| | prompt=prompt, |
| | temperature=0, |
| | max_tokens=500, |
| | top_p=1.0, |
| | frequency_penalty=0.0, |
| | presence_penalty=0.0 |
| | ) |
| |
|
| | |
| | summary_response_out = summary_response.choices[0].text.strip() |
| |
|
| |
|
| | |
| | |
| | language = "en" |
| | audioobj = gTTS(text = summary_response_out, |
| | lang = language, |
| | slow = False) |
| | |
| | audioobj.save("Temp.mp3") |
| |
|
| | return [feedback, summary_response_out, transcribed_text, "Temp.mp3"] |
| |
|
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | iface = gr.Interface( |
| | fn=get_feedback, |
| | inputs=gr.inputs.Textbox(lines=1, label="Interviewee audio file path"), |
| | examples=[ |
| | "Tell me about yourself - bad graduate job interview answer tell me about yourselfout.mp3", |
| | "Tell me about yourself - bad entrepreneur answer to tell me about yourselfout.mp3" |
| | ], |
| | outputs=[ |
| | gr.outputs.Textbox(label="Feedback"), |
| | gr.outputs.Textbox(label="Summary response"), |
| | gr.outputs.Textbox(label="Transcription"), |
| | gr.Audio("Temp.mp3", label="Speech Output") |
| | ], |
| | title="Interview Feedback", |
| |
|
| | description="Get feedback on your interview response from an AI interview coach.", |
| | layout="vertical" |
| | ) |
| | iface.launch() |
| |
|
| | |