Spaces:
Running
Running
File size: 6,070 Bytes
00eef43 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
"""
A voice-activated assistant that interacts with Zoho Books and Dataverse using OpenAI's GPT-5 model.
It records audio input, transcribes it, determines the user's intent, fetches data from the relevant API, and responds with synthesized speech.
Author: Dinesh Uthayakumar
Date: 2024-10-15
Website: https://duitconsulting.com/
"""
import os
import requests
import sounddevice as sd
import whisper
from scipy.io.wavfile import write
from openai import OpenAI
from gtts import gTTS
import tempfile
import subprocess
import warnings
import json
warnings.filterwarnings("ignore", message="FP16 is not supported on CPU")
# === CONFIG ===
OPENAI_KEY = os.getenv("OPENAI_API_KEY")
ZOHO_AUTH_TOKEN = os.getenv("ZOHO_AUTH_TOKEN")
ZOHO_ORG_ID = os.getenv("ZOHO_ORG_ID")
DATAVERSE_ENV = os.getenv("DATAVERSE_ENV_URL")
DATAVERSE_TOKEN = os.getenv("DATAVERSE_BEARER_TOKEN")
DURATION = 6 # seconds of voice input
FS = 44100
client = OpenAI(api_key=OPENAI_KEY)
# === FUNCTIONS ===
def record_audio(filename="command.wav"):
print("ποΈ Listening for command...")
audio = sd.rec(int(DURATION * FS), samplerate=FS, channels=1)
sd.wait()
write(filename, FS, audio)
print("β
Recording complete.")
return filename
def transcribe_audio(filename):
print("π£οΈ Transcribing...")
print(filename)
model = whisper.load_model("base")
try:
result = model.transcribe(filename, language="en")
except Exception as e:
print("β Transcription error:", e)
print("β
You said:", result["text"])
return result["text"].strip()
# The below version bypasses ffmpeg call and directly loads the audio file.
def transcribe_audio2(filename):
model = whisper.load_model("base")
# Directly load audio (bypasses ffmpeg call)
audio = whisper.load_audio(os.path.abspath(filename))
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
options = whisper.DecodingOptions(language="en")
result = whisper.decode(model, mel, options)
print("β
Transcription complete.")
return result.text
def get_intent(text):
print("π€ Understanding command...")
response = client.chat.completions.create(
model="gpt-5",
messages=[
{"role": "system", "content": "You are a data assistant that decides which API to call."},
{"role": "user", "content": f"The user said: '{text}'. Decide whether to fetch Zoho Books outstanding invoice total or Dataverse open opportunities revenue. Reply in JSON with 'source' and 'purpose'."}
]
)
print("β
Intent identified.")
return response.choices[0].message.content
def get_llm_response(text):
print("π€ Thinking...")
response = client.chat.completions.create(
model="gpt-5",
messages=[
{"role": "user", "content": text}
]
)
print("β
Intent identified.")
return response.choices[0].message.content
def get_zoho_outstanding():
print("π Fetching outstanding invoices from Zoho Books...")
url = f"https://www.zohoapis.com/books/v3/invoices?organization_id={ZOHO_ORG_ID}&status=overdue"
headers = {"content-type":"application/x-www-form-urlencoded;charset=UTF-8", "Authorization": f"Zoho-oauthtoken {ZOHO_AUTH_TOKEN}"}
r = requests.get(url, headers=headers)
r.raise_for_status()
data = r.json()
total_due = sum(float(inv.get("balance", 0)) for inv in data.get("invoices", []))
return f"Total outstanding invoice amount in Zoho Books is βΉ{total_due:,.2f}"
def get_dataverse_open_opportunities():
print("πΌ Fetching open opportunities from Dataverse...")
url = f"{DATAVERSE_ENV}/api/data/v9.2/opportunities?$select=name,estimatedvalue,statecode&$filter=statecode eq 0"
headers = {
"Authorization": f"Bearer {DATAVERSE_TOKEN}"
}
r = requests.get(url, params = None, headers=headers)
r.raise_for_status()
data = r.json()
total_revenue = sum(op.get("estimatedvalue", {}) for op in data.get("value", []))
return f"Total estimated revenue from open opportunities is βΉ{total_revenue:,.2f}"
def speak2(text):
print("π£οΈ Speaking result...")
tts = gTTS(text=text, lang='en')
with tempfile.NamedTemporaryFile(delete=True, suffix=".mp3") as fp:
tts.save(fp.name)
subprocess.run(["start", fp.name], shell=True)
def speak(text):
print("π£οΈ Speaking result...")
tts = gTTS(text=text, lang='en')
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
tts.save(fp.name)
os.startfile(fp.name)
def main():
try:
file = record_audio()
#For Evaluation, comment the above line and uncomment one of the below lines
#file = "eval1_capital.wav" # For testing with a pre-recorded file
#file = "eval2_money_customers_owe.wav" # For testing with a pre-recorded file
#file = "eval3_total_estimated_revenue.wav" # For testing with a pre-recorded file
#check if a file exists
if not os.path.exists(file):
raise FileNotFoundError(f"Audio file '{file}' not found.")
command = transcribe_audio(file)
intent_str = get_intent(command)
intent = json.loads(intent_str)
print("Intent Output:", intent)
intent_source = intent["source"].strip().lower()
internt_purpose = intent["purpose"].strip().lower()
if "zoho" in intent_source or "invoice" in intent_source:
result = get_zoho_outstanding()
elif "dataverse" in intent_source or "opportunity" in intent_source:
result = get_dataverse_open_opportunities()
else:
result = get_llm_response(command)
print("\nπ¬", result)
speak(result)
except Exception as e:
print("β Error:", e)
if __name__ == "__main__":
main() |