Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import numpy as np | |
| import whisper | |
| from openai import OpenAI | |
| from scipy.io.wavfile import write | |
| client = OpenAI() | |
| def process_transaction_details(transcribed_text): | |
| ''' | |
| Extract the transaction details from the given transcribed text and return them as a JSON | |
| Input: | |
| transcribed_text (str): The transcribed text to process | |
| Output: | |
| dict: A JSON object containing the transaction details | |
| ''' | |
| prompt = f"Extract the transaction details from the following sentence and categorize the transaction based on the description. Format the response as JSON with fields for 'amount', 'description', and 'category'. Sentence: '{transcribed_text}'." | |
| try: | |
| response = client.chat.completions.create( | |
| model="gpt-3.5-turbo-0125", | |
| response_format={ "type": "json_object" }, | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant designed to output JSON."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| ) | |
| # print(response.choices[0].message.content) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| return {} | |
| def transcribe(audio): | |
| if audio is None: | |
| raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.") | |
| sr, y = audio | |
| y = y.astype(np.float32) | |
| y /= np.max(np.abs(y)) | |
| model = whisper.load_model("base") # or "small", "medium", "large", depending on your requirement | |
| temp_filename = "temp_audio.wav" | |
| write(temp_filename, sr, (y * 32767).astype(np.int16)) | |
| result = model.transcribe(temp_filename) | |
| return process_transaction_details(result['text']) | |
| demo = gr.Interface( | |
| transcribe, | |
| gr.Audio(sources=["microphone"],max_length=10), | |
| "json", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |