Spaces:

Maramdahmen
/

audio-to-json

Runtime error

App Files Files Community

audio-to-json / app.py

Maramdahmen

Update app.py

00b17ac verified 9 months ago

raw

history blame contribute delete

2.34 kB

	import gradio as gr
	import whisper
	from pydub import AudioSegment
	import re
	import torch
	import os
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from huggingface_hub import login

	import os
	login(token=os.environ["HF_TOKEN"])


	# 📦 Chargement des modèles
	model_whisper = whisper.load_model("large")
	model_id = "mistralai/Mistral-7B-Instruct-v0.2"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model_mistral = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to(device)

	def convert_to_wav(audio_file):
	if audio_file.endswith(".m4a"):
	audio = AudioSegment.from_file(audio_file, format="m4a")
	audio.export("converted.wav", format="wav")
	return "converted.wav"
	return audio_file

	def process(audio):
	file_path = audio.name
	audio_path = convert_to_wav(file_path)
	result = model_whisper.transcribe(audio_path, language="fr", fp16=False)
	transcription = result["text"]

	prompt = f"""
	Tu es un assistant intelligent qui extrait les informations importantes d’un texte décrivant un événement.
	Tu dois retourner un JSON contenant les champs suivants : nom, event, date, heure, capacite, titre, description.
	Si le texte ne donne pas explicitement "titre" ou "description", génère un contenu cohérent et pertinent pour ces champs à partir du contexte.

	La réponse doit être strictement un objet JSON, sans texte additionnel.

	Texte :
	{transcription}

	Résultat JSON :
	"""
	inputs = tokenizer(prompt, return_tensors="pt").to(model_mistral.device)
	outputs = model_mistral.generate(**inputs, max_new_tokens=512, temperature=0.7, top_p=0.9)
	result = tokenizer.decode(outputs[0], skip_special_tokens=True)
	json_result = re.search(r"\{.*\}", result, re.DOTALL)
	return transcription, json_result.group(0) if json_result else "Erreur JSON"

	demo = gr.Interface(
	fn=process,
	inputs=gr.Audio(type="filepath", label="Fichier Audio (.m4a, .wav)"),
	outputs=[
	gr.Textbox(label="Transcription"),
	gr.Textbox(label="JSON extrait")
	],
	title="🗣️ Audio vers JSON structuré",
	description="Uploadez un fichier audio contenant la description d’un événement. Obtenez la transcription + un JSON structuré."
	)

	demo.launch()