Vlad Bastina commited on
Commit
243586b
·
0 Parent(s):

first commit

Browse files
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .env
2
+
3
+ gen-lang-client-0065207637-eaf8e92995b6.json
4
+
5
+ *.wav
6
+ prompt.txt
__pycache__/gemini_call.cpython-312.pyc ADDED
Binary file (3.76 kB). View file
 
__pycache__/sentiment_analysis.cpython-312.pyc ADDED
Binary file (667 Bytes). View file
 
__pycache__/translation.cpython-312.pyc ADDED
Binary file (2.93 kB). View file
 
gemini_call.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ import os
3
+
4
+ final_prompt = f'''Task:
5
+
6
+ Input Message:
7
+ Analyze the sentiment of each sentence in the provided conversation.
8
+ For each sentence, assign a sentiment score ranging from -10 (furious) to +10 (peaceful).
9
+ Generate a histogram of the conversation's sentences, where:
10
+ Negative numbers represent furious sentences.
11
+ Higher numbers represent more peaceful sentences.
12
+ At the end of the analysis, summarize the general sentiment of the conversation.
13
+
14
+ Instructions for Sentiment Analysis:
15
+
16
+ Sentence-Level Sentiment Analysis:
17
+
18
+ For each sentence, determine its sentiment using the scale from -10 to +10.
19
+ -10: Extremely furious or hostile.
20
+ 0: Neutral or balanced.
21
+ +10: Extremely peaceful or calm.
22
+ Example Sentences:
23
+
24
+ "I am so angry right now!" => Sentiment Score: -9 (furious)
25
+ "Everything is going wrong today." => Sentiment Score: -7 (frustrated)
26
+ "But, I guess there’s nothing I can do." => Sentiment Score: -3 (resigned but slightly peaceful)
27
+ "Maybe tomorrow will be better." => Sentiment Score: +3 (optimistic and calm)
28
+
29
+ Create a Histogram:
30
+
31
+ Based on the sentiment scores, plot a histogram where:
32
+ The x-axis represents the sentence number.
33
+ The y-axis represents the sentiment score.
34
+ The values should range from -10 (furious) to +10 (peaceful).
35
+ Example Histogram (hypothetical values):
36
+
37
+ Sentence 1: -9
38
+ Sentence 2: -7
39
+ Sentence 3: -3
40
+ Sentence 4: +3
41
+
42
+ Histogram:
43
+ | Sentence 1 | Sentence 2 | Sentence 3 | Sentence 4 |
44
+ |------------|------------|------------|------------|
45
+ | -9 | -7 | -3 | +3 |
46
+
47
+ Conclusion:
48
+
49
+ After analyzing all sentences, provide a conclusion about the overall sentiment of the conversation.
50
+ If the majority of sentences have a negative sentiment (below 0), the conversation is likely to be angry, frustrated, or tense.
51
+ If the majority of sentences have a positive sentiment (above 0), the conversation is peaceful or optimistic.
52
+ If the sentiments are mixed (both positive and negative), summarize the shift in mood throughout the conversation.
53
+
54
+ Example of Output:
55
+
56
+ Sentiment Scores:
57
+ "I am so angry right now!" => Sentiment Score: -9 (Furious)
58
+ "Everything is going wrong today." => Sentiment Score: -7 (Frustrated)
59
+ "But, I guess there’s nothing I can do." => Sentiment Score: -3 (Resigned)
60
+ "Maybe tomorrow will be better." => Sentiment Score: +3 (Optimistic)
61
+
62
+ Histogram:
63
+ Sentence 1: -9
64
+ Sentence 2: -7
65
+ Sentence 3: -3
66
+ Sentence 4: +3
67
+ (Plot: A simple histogram with y-axis ranging from -10 to +10, showing the corresponding sentiment values.)'''
68
+
69
+
70
+ api_key = os.getenv("GOOGLE_API_KEY")
71
+ genai.configure(api_key=api_key)
72
+ model = genai.GenerativeModel("gemini-2.0-flash-exp" , system_instruction=final_prompt)
73
+
74
+ def ask_gemini(prompt:str) ->str:
75
+
76
+
77
+ response = model.generate_content(prompt)
78
+ return response.text
79
+
80
+ if __name__=="__main__":
81
+ response = model.generate_content("I hated every minute of my experience with you guys. You are straight garbage. But i love the way you look")
82
+ print(response.text)
sentiment_analysis.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from translation import get_transcription_from_sound
2
+ from gemini_call import ask_gemini
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ def get_analysis(file_path)->str:
8
+
9
+ transcript = get_transcription_from_sound(file_path)
10
+
11
+ analysis = ask_gemini(transcript)
12
+
13
+ return analysis
14
+
15
+ if __name__ == "__main__":
16
+ file_path = "harvard.wav"
17
+ print(f'Analysis result {get_analysis(file_path)}')
streamlit_app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentiment_analysis import get_analysis
2
+
3
+ import streamlit as st
4
+ import speech_recognition as sr
5
+ import pyaudio
6
+ import wave
7
+ import time
8
+
9
+
10
+ # Function to record the user's voice and save it as a .wav file
11
+ def record_voice():
12
+ recognizer = sr.Recognizer()
13
+ mic = sr.Microphone()
14
+
15
+ # Set up the microphone and record the audio
16
+ with mic as source:
17
+ st.write("Listening...")
18
+ recognizer.adjust_for_ambient_noise(source)
19
+ audio = recognizer.listen(source)
20
+ st.write("Recording complete!")
21
+
22
+ # Save the audio to a .wav file
23
+ with open("recorded_audio.wav", "wb") as f:
24
+ with mic as source:
25
+ audio = recognizer.listen(source,timeout=5)
26
+ f.write(audio.get_wav_data())
27
+
28
+ # Return the file name of the recorded audio
29
+ return "recorded_audio.wav", audio
30
+
31
+ # Function to transcribe the audio file
32
+ def transcribe_audio(audio):
33
+ recognizer = sr.Recognizer()
34
+
35
+ try:
36
+ # Recognize the speech using Google's speech recognition service
37
+ text = recognizer.recognize_google(audio)
38
+ st.write(f"Recognized text: {text}")
39
+ return text
40
+ except sr.UnknownValueError:
41
+ st.error("Sorry, I could not understand the audio.")
42
+ except sr.RequestError as e:
43
+ st.error(f"Error with the speech recognition service: {e}")
44
+ return None
45
+
46
+ # Streamlit app setup
47
+ st.title("Voice Chat App")
48
+
49
+ st.sidebar.header("Controls")
50
+ start_button = st.sidebar.button("Start Recording")
51
+
52
+ if start_button:
53
+ st.write("Clicking this will start recording your voice...")
54
+ time.sleep(2) # Pause for a moment before starting to record
55
+ audio_file, audio_data = record_voice()
56
+
57
+ # Saving and displaying the recorded audio
58
+ st.write(f"Audio saved as: {audio_file}")
59
+
60
+ # Transcribe the recorded audio
61
+ if audio_data:
62
+ transcription = transcribe_audio(audio_data)
63
+ if transcription:
64
+ st.text_area("Chat", value=transcription, height=200)
65
+ else:
66
+ st.write("Sorry, no transcription available.")
67
+ else:
68
+ st.write("No audio recorded.")
69
+
70
+
translation.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.cloud import speech
2
+ import wave
3
+ from pydub import AudioSegment
4
+
5
+ def get_audio_properties(file_path):
6
+ """
7
+ Get sample rate and number of channels from the WAV file.
8
+ """
9
+ with wave.open(file_path, "rb") as wav_file:
10
+ sample_rate = wav_file.getframerate()
11
+ channels = wav_file.getnchannels()
12
+ return sample_rate, channels
13
+
14
+ def convert_to_mono(input_path, output_path):
15
+ audio = AudioSegment.from_wav(input_path)
16
+ mono_audio = audio.set_channels(1)
17
+ mono_audio.export(output_path, format="wav")
18
+
19
+ def transcribe_audio(file_path):
20
+ # Initialize the speech client
21
+ client = speech.SpeechClient()
22
+
23
+ # Get audio properties like sample rate and channels
24
+ sample_rate, channels = get_audio_properties(file_path)
25
+
26
+ # Open the audio file and read its content
27
+ with open(file_path, "rb") as audio_file:
28
+ audio_content = audio_file.read()
29
+
30
+ # Prepare the audio content for transcription
31
+ audio = speech.RecognitionAudio(content=audio_content)
32
+ config = speech.RecognitionConfig(
33
+ encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
34
+ sample_rate_hertz=sample_rate,
35
+ language_code="en-US",
36
+ audio_channel_count=channels,
37
+ enable_separate_recognition_per_channel=(channels > 1)
38
+ )
39
+
40
+ # Call the Google Cloud Speech API for recognition
41
+ response = client.recognize(config=config, audio=audio)
42
+
43
+ # Concatenate the transcripts from all results into one string
44
+ concatenated_transcript = ". ".join([result.alternatives[0].transcript for result in response.results])
45
+
46
+ # Return or print the concatenated transcript
47
+ return concatenated_transcript
48
+
49
+ def get_transcription_from_sound(file_path:str)->str:
50
+ output_path = "audio_mono.wav"
51
+ convert_to_mono(file_path,output_path)
52
+
53
+ final_transcript = transcribe_audio(output_path)
54
+
55
+ return final_transcript
56
+
57
+ if __name__=="__main__":
58
+ file_path = "jackhammer.wav"
59
+ output_path = "audio_mono.wav"
60
+ convert_to_mono(file_path,output_path)
61
+ final_transcript = transcribe_audio(output_path)
62
+ print(final_transcript)