Spaces:
Sleeping
Sleeping
Vlad Bastina commited on
Commit ·
243586b
0
Parent(s):
first commit
Browse files- .gitignore +6 -0
- __pycache__/gemini_call.cpython-312.pyc +0 -0
- __pycache__/sentiment_analysis.cpython-312.pyc +0 -0
- __pycache__/translation.cpython-312.pyc +0 -0
- gemini_call.py +82 -0
- sentiment_analysis.py +17 -0
- streamlit_app.py +70 -0
- translation.py +62 -0
.gitignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
|
| 3 |
+
gen-lang-client-0065207637-eaf8e92995b6.json
|
| 4 |
+
|
| 5 |
+
*.wav
|
| 6 |
+
prompt.txt
|
__pycache__/gemini_call.cpython-312.pyc
ADDED
|
Binary file (3.76 kB). View file
|
|
|
__pycache__/sentiment_analysis.cpython-312.pyc
ADDED
|
Binary file (667 Bytes). View file
|
|
|
__pycache__/translation.cpython-312.pyc
ADDED
|
Binary file (2.93 kB). View file
|
|
|
gemini_call.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import google.generativeai as genai
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
final_prompt = f'''Task:
|
| 5 |
+
|
| 6 |
+
Input Message:
|
| 7 |
+
Analyze the sentiment of each sentence in the provided conversation.
|
| 8 |
+
For each sentence, assign a sentiment score ranging from -10 (furious) to +10 (peaceful).
|
| 9 |
+
Generate a histogram of the conversation's sentences, where:
|
| 10 |
+
Negative numbers represent furious sentences.
|
| 11 |
+
Higher numbers represent more peaceful sentences.
|
| 12 |
+
At the end of the analysis, summarize the general sentiment of the conversation.
|
| 13 |
+
|
| 14 |
+
Instructions for Sentiment Analysis:
|
| 15 |
+
|
| 16 |
+
Sentence-Level Sentiment Analysis:
|
| 17 |
+
|
| 18 |
+
For each sentence, determine its sentiment using the scale from -10 to +10.
|
| 19 |
+
-10: Extremely furious or hostile.
|
| 20 |
+
0: Neutral or balanced.
|
| 21 |
+
+10: Extremely peaceful or calm.
|
| 22 |
+
Example Sentences:
|
| 23 |
+
|
| 24 |
+
"I am so angry right now!" => Sentiment Score: -9 (furious)
|
| 25 |
+
"Everything is going wrong today." => Sentiment Score: -7 (frustrated)
|
| 26 |
+
"But, I guess there’s nothing I can do." => Sentiment Score: -3 (resigned but slightly peaceful)
|
| 27 |
+
"Maybe tomorrow will be better." => Sentiment Score: +3 (optimistic and calm)
|
| 28 |
+
|
| 29 |
+
Create a Histogram:
|
| 30 |
+
|
| 31 |
+
Based on the sentiment scores, plot a histogram where:
|
| 32 |
+
The x-axis represents the sentence number.
|
| 33 |
+
The y-axis represents the sentiment score.
|
| 34 |
+
The values should range from -10 (furious) to +10 (peaceful).
|
| 35 |
+
Example Histogram (hypothetical values):
|
| 36 |
+
|
| 37 |
+
Sentence 1: -9
|
| 38 |
+
Sentence 2: -7
|
| 39 |
+
Sentence 3: -3
|
| 40 |
+
Sentence 4: +3
|
| 41 |
+
|
| 42 |
+
Histogram:
|
| 43 |
+
| Sentence 1 | Sentence 2 | Sentence 3 | Sentence 4 |
|
| 44 |
+
|------------|------------|------------|------------|
|
| 45 |
+
| -9 | -7 | -3 | +3 |
|
| 46 |
+
|
| 47 |
+
Conclusion:
|
| 48 |
+
|
| 49 |
+
After analyzing all sentences, provide a conclusion about the overall sentiment of the conversation.
|
| 50 |
+
If the majority of sentences have a negative sentiment (below 0), the conversation is likely to be angry, frustrated, or tense.
|
| 51 |
+
If the majority of sentences have a positive sentiment (above 0), the conversation is peaceful or optimistic.
|
| 52 |
+
If the sentiments are mixed (both positive and negative), summarize the shift in mood throughout the conversation.
|
| 53 |
+
|
| 54 |
+
Example of Output:
|
| 55 |
+
|
| 56 |
+
Sentiment Scores:
|
| 57 |
+
"I am so angry right now!" => Sentiment Score: -9 (Furious)
|
| 58 |
+
"Everything is going wrong today." => Sentiment Score: -7 (Frustrated)
|
| 59 |
+
"But, I guess there’s nothing I can do." => Sentiment Score: -3 (Resigned)
|
| 60 |
+
"Maybe tomorrow will be better." => Sentiment Score: +3 (Optimistic)
|
| 61 |
+
|
| 62 |
+
Histogram:
|
| 63 |
+
Sentence 1: -9
|
| 64 |
+
Sentence 2: -7
|
| 65 |
+
Sentence 3: -3
|
| 66 |
+
Sentence 4: +3
|
| 67 |
+
(Plot: A simple histogram with y-axis ranging from -10 to +10, showing the corresponding sentiment values.)'''
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
api_key = os.getenv("GOOGLE_API_KEY")
|
| 71 |
+
genai.configure(api_key=api_key)
|
| 72 |
+
model = genai.GenerativeModel("gemini-2.0-flash-exp" , system_instruction=final_prompt)
|
| 73 |
+
|
| 74 |
+
def ask_gemini(prompt:str) ->str:
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
response = model.generate_content(prompt)
|
| 78 |
+
return response.text
|
| 79 |
+
|
| 80 |
+
if __name__=="__main__":
|
| 81 |
+
response = model.generate_content("I hated every minute of my experience with you guys. You are straight garbage. But i love the way you look")
|
| 82 |
+
print(response.text)
|
sentiment_analysis.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from translation import get_transcription_from_sound
|
| 2 |
+
from gemini_call import ask_gemini
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
def get_analysis(file_path)->str:
|
| 8 |
+
|
| 9 |
+
transcript = get_transcription_from_sound(file_path)
|
| 10 |
+
|
| 11 |
+
analysis = ask_gemini(transcript)
|
| 12 |
+
|
| 13 |
+
return analysis
|
| 14 |
+
|
| 15 |
+
if __name__ == "__main__":
|
| 16 |
+
file_path = "harvard.wav"
|
| 17 |
+
print(f'Analysis result {get_analysis(file_path)}')
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentiment_analysis import get_analysis
|
| 2 |
+
|
| 3 |
+
import streamlit as st
|
| 4 |
+
import speech_recognition as sr
|
| 5 |
+
import pyaudio
|
| 6 |
+
import wave
|
| 7 |
+
import time
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# Function to record the user's voice and save it as a .wav file
|
| 11 |
+
def record_voice():
|
| 12 |
+
recognizer = sr.Recognizer()
|
| 13 |
+
mic = sr.Microphone()
|
| 14 |
+
|
| 15 |
+
# Set up the microphone and record the audio
|
| 16 |
+
with mic as source:
|
| 17 |
+
st.write("Listening...")
|
| 18 |
+
recognizer.adjust_for_ambient_noise(source)
|
| 19 |
+
audio = recognizer.listen(source)
|
| 20 |
+
st.write("Recording complete!")
|
| 21 |
+
|
| 22 |
+
# Save the audio to a .wav file
|
| 23 |
+
with open("recorded_audio.wav", "wb") as f:
|
| 24 |
+
with mic as source:
|
| 25 |
+
audio = recognizer.listen(source,timeout=5)
|
| 26 |
+
f.write(audio.get_wav_data())
|
| 27 |
+
|
| 28 |
+
# Return the file name of the recorded audio
|
| 29 |
+
return "recorded_audio.wav", audio
|
| 30 |
+
|
| 31 |
+
# Function to transcribe the audio file
|
| 32 |
+
def transcribe_audio(audio):
|
| 33 |
+
recognizer = sr.Recognizer()
|
| 34 |
+
|
| 35 |
+
try:
|
| 36 |
+
# Recognize the speech using Google's speech recognition service
|
| 37 |
+
text = recognizer.recognize_google(audio)
|
| 38 |
+
st.write(f"Recognized text: {text}")
|
| 39 |
+
return text
|
| 40 |
+
except sr.UnknownValueError:
|
| 41 |
+
st.error("Sorry, I could not understand the audio.")
|
| 42 |
+
except sr.RequestError as e:
|
| 43 |
+
st.error(f"Error with the speech recognition service: {e}")
|
| 44 |
+
return None
|
| 45 |
+
|
| 46 |
+
# Streamlit app setup
|
| 47 |
+
st.title("Voice Chat App")
|
| 48 |
+
|
| 49 |
+
st.sidebar.header("Controls")
|
| 50 |
+
start_button = st.sidebar.button("Start Recording")
|
| 51 |
+
|
| 52 |
+
if start_button:
|
| 53 |
+
st.write("Clicking this will start recording your voice...")
|
| 54 |
+
time.sleep(2) # Pause for a moment before starting to record
|
| 55 |
+
audio_file, audio_data = record_voice()
|
| 56 |
+
|
| 57 |
+
# Saving and displaying the recorded audio
|
| 58 |
+
st.write(f"Audio saved as: {audio_file}")
|
| 59 |
+
|
| 60 |
+
# Transcribe the recorded audio
|
| 61 |
+
if audio_data:
|
| 62 |
+
transcription = transcribe_audio(audio_data)
|
| 63 |
+
if transcription:
|
| 64 |
+
st.text_area("Chat", value=transcription, height=200)
|
| 65 |
+
else:
|
| 66 |
+
st.write("Sorry, no transcription available.")
|
| 67 |
+
else:
|
| 68 |
+
st.write("No audio recorded.")
|
| 69 |
+
|
| 70 |
+
|
translation.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from google.cloud import speech
|
| 2 |
+
import wave
|
| 3 |
+
from pydub import AudioSegment
|
| 4 |
+
|
| 5 |
+
def get_audio_properties(file_path):
|
| 6 |
+
"""
|
| 7 |
+
Get sample rate and number of channels from the WAV file.
|
| 8 |
+
"""
|
| 9 |
+
with wave.open(file_path, "rb") as wav_file:
|
| 10 |
+
sample_rate = wav_file.getframerate()
|
| 11 |
+
channels = wav_file.getnchannels()
|
| 12 |
+
return sample_rate, channels
|
| 13 |
+
|
| 14 |
+
def convert_to_mono(input_path, output_path):
|
| 15 |
+
audio = AudioSegment.from_wav(input_path)
|
| 16 |
+
mono_audio = audio.set_channels(1)
|
| 17 |
+
mono_audio.export(output_path, format="wav")
|
| 18 |
+
|
| 19 |
+
def transcribe_audio(file_path):
|
| 20 |
+
# Initialize the speech client
|
| 21 |
+
client = speech.SpeechClient()
|
| 22 |
+
|
| 23 |
+
# Get audio properties like sample rate and channels
|
| 24 |
+
sample_rate, channels = get_audio_properties(file_path)
|
| 25 |
+
|
| 26 |
+
# Open the audio file and read its content
|
| 27 |
+
with open(file_path, "rb") as audio_file:
|
| 28 |
+
audio_content = audio_file.read()
|
| 29 |
+
|
| 30 |
+
# Prepare the audio content for transcription
|
| 31 |
+
audio = speech.RecognitionAudio(content=audio_content)
|
| 32 |
+
config = speech.RecognitionConfig(
|
| 33 |
+
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
| 34 |
+
sample_rate_hertz=sample_rate,
|
| 35 |
+
language_code="en-US",
|
| 36 |
+
audio_channel_count=channels,
|
| 37 |
+
enable_separate_recognition_per_channel=(channels > 1)
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# Call the Google Cloud Speech API for recognition
|
| 41 |
+
response = client.recognize(config=config, audio=audio)
|
| 42 |
+
|
| 43 |
+
# Concatenate the transcripts from all results into one string
|
| 44 |
+
concatenated_transcript = ". ".join([result.alternatives[0].transcript for result in response.results])
|
| 45 |
+
|
| 46 |
+
# Return or print the concatenated transcript
|
| 47 |
+
return concatenated_transcript
|
| 48 |
+
|
| 49 |
+
def get_transcription_from_sound(file_path:str)->str:
|
| 50 |
+
output_path = "audio_mono.wav"
|
| 51 |
+
convert_to_mono(file_path,output_path)
|
| 52 |
+
|
| 53 |
+
final_transcript = transcribe_audio(output_path)
|
| 54 |
+
|
| 55 |
+
return final_transcript
|
| 56 |
+
|
| 57 |
+
if __name__=="__main__":
|
| 58 |
+
file_path = "jackhammer.wav"
|
| 59 |
+
output_path = "audio_mono.wav"
|
| 60 |
+
convert_to_mono(file_path,output_path)
|
| 61 |
+
final_transcript = transcribe_audio(output_path)
|
| 62 |
+
print(final_transcript)
|