Spaces:

boi-doingthings
/

expense-converter

Sleeping

App Files Files Community

expense-converter / app.py

boi-doingthings

Update app.py

46b0abc verified about 2 years ago

raw

history blame contribute delete

1.93 kB

	import os
	import gradio as gr
	import numpy as np
	import whisper
	from openai import OpenAI
	from scipy.io.wavfile import write
	client = OpenAI()

	def process_transaction_details(transcribed_text):
	'''
	Extract the transaction details from the given transcribed text and return them as a JSON
	Input:
	transcribed_text (str): The transcribed text to process
	Output:
	dict: A JSON object containing the transaction details
	'''
	prompt = f"Extract the transaction details from the following sentence and categorize the transaction based on the description. Format the response as JSON with fields for 'amount', 'description', and 'category'. Sentence: '{transcribed_text}'."

	try:
	response = client.chat.completions.create(
	model="gpt-3.5-turbo-0125",
	response_format={ "type": "json_object" },
	messages=[
	{"role": "system", "content": "You are a helpful assistant designed to output JSON."},
	{"role": "user", "content": prompt}
	]
	)
	# print(response.choices[0].message.content)
	return response.choices[0].message.content
	except Exception as e:
	print(f"An error occurred: {e}")
	return {}

	def transcribe(audio):
	if audio is None:
	raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")

	sr, y = audio
	y = y.astype(np.float32)
	y /= np.max(np.abs(y))
	model = whisper.load_model("base") # or "small", "medium", "large", depending on your requirement
	temp_filename = "temp_audio.wav"
	write(temp_filename, sr, (y * 32767).astype(np.int16))
	result = model.transcribe(temp_filename)

	return process_transaction_details(result['text'])

	demo = gr.Interface(
	transcribe,
	gr.Audio(sources=["microphone"],max_length=10),
	"json",
	)

	if __name__ == "__main__":
	demo.launch()