Spaces:

syafiqq02
/

DeploySOAP

Sleeping

App Files Files Community

DeploySOAP / app.py

syafiqq02

gradio commit

618375e 8 months ago

raw

history blame contribute delete

13 kB

	import gradio as gr
	import threading
	import os
	import requests
	import string
	import time
	from pydub import AudioSegment
	from nltk.tokenize import word_tokenize
	import nltk
	from nltk.corpus import words, stopwords
	from dotenv import load_dotenv

	# Download resource NLTK (hanya sekali)
	nltk.download('punkt')
	nltk.download('words')
	nltk.download('stopwords')

	load_dotenv()
	API_TRANSCRIBE = os.getenv("API_TRANSCRIBE")
	API_TEXT = os.getenv("API_TEXT")

	english_words = set(words.words())
	indonesian_stopwords = set(stopwords.words('indonesian'))

	def load_indonesian_wordlist(filepath='wordlist.lst'):
	try:
	with open(filepath, encoding='utf-8') as f:
	return set(line.strip().lower() for line in f if line.strip())
	except UnicodeDecodeError:
	try:
	with open(filepath, encoding='latin-1') as f:
	return set(line.strip().lower() for line in f if line.strip())
	except Exception:
	return set()
	except Exception:
	return set()

	indonesian_words = load_indonesian_wordlist()
	valid_words = english_words.union(indonesian_words)

	def contains_medical_terms_auto_threshold(text, medical_words):
	tokens = word_tokenize(text.lower())
	tokens = [w.strip(string.punctuation) for w in tokens if w.isalpha()]
	if not tokens:
	return False
	medical_count = sum(1 for w in tokens if w in medical_words)
	ratio = medical_count / len(tokens)
	threshold = 0.4 if len(tokens) <= 5 else 0.1
	return ratio >= threshold

	medical_words = load_indonesian_wordlist('wordlist.lst')

	MAX_DURATION_SECONDS = 600

	def validate_audio_duration(audio_file):
	try:
	audio = AudioSegment.from_file(audio_file)
	duration_sec = len(audio) / 1000.0
	if duration_sec > MAX_DURATION_SECONDS:
	return False, duration_sec
	return True, duration_sec
	except Exception as e:
	return False, -1

	def start_recording():
	"""Function yang dipanggil ketika tombol record ditekan"""
	print("🎙️ Recording started...")
	return "🔴 Recording..."

	def stop_recording(audio):
	"""Function yang dipanggil ketika recording selesai"""
	if audio is not None:
	print("✅ Recording completed!")
	return "✅ Ready to process"
	else:
	print("❌ No audio recorded")
	return "⚪ Ready to record"

	def test_microphone():
	"""Function untuk test microphone"""
	print("🔧 Testing microphone...")
	return "🔧 Testing microphone... Silakan coba record lagi"

	def reset_recording_status():
	"""Function untuk reset status recording"""
	return "⚪ Ready to record"

	def handle_audio(audio_file):
	"""Handle audio processing - returns (validation_message, transcript, soap, tags)"""
	if audio_file is None:
	return "❌ Tidak ada file audio", "", "", ""

	valid, duration = validate_audio_duration(audio_file)
	if not valid:
	if duration == -1:
	msg = "⚠️ Gagal memproses file audio."
	else:
	msg = f"⚠️ Durasi rekaman terlalu panjang ({duration:.1f}s). Maksimal {MAX_DURATION_SECONDS}s."
	return msg, "", "", ""

	try:
	with open(audio_file, "rb") as f:
	files = {"audio": f}
	response = requests.post(API_TRANSCRIBE, files=files)
	result = response.json()

	transcription = result.get("transcription", "")
	soap_content = result.get("soap_content", "")
	tags_content = result.get("tags_content", "")

	if not transcription and not soap_content and not tags_content:
	return "⚠️ Tidak ada hasil dari proses audio", "", "", ""

	return "", transcription, soap_content, tags_content

	except Exception as e:
	return f"❌ Error processing audio: {str(e)}", "", "", ""

	def handle_text(dialogue):
	"""Handle text processing - returns (validation_message, transcript, soap, tags)"""
	if not dialogue.strip():
	return "⚠️ Teks tidak boleh kosong", "", "", ""

	if not contains_medical_terms_auto_threshold(dialogue, medical_words):
	return "⚠️ Teks tidak mengandung istilah medis yang cukup untuk diproses.", "", "", ""

	try:
	response = requests.post(API_TEXT, json={"dialogue": dialogue})
	result = response.json()

	soap_content = result.get("soap_content", "")
	tags_content = result.get("tags_content", "")

	if not soap_content and not tags_content:
	return "⚠️ Tidak ada hasil dari proses teks", "", "", ""

	return "", dialogue, soap_content, tags_content

	except Exception as e:
	return f"❌ Error processing text: {str(e)}", "", "", ""

	def toggle_inputs_with_refresh(choice):
	# Tampilkan input dan validasi yang sesuai, sembunyikan lainnya
	return (
	gr.update(visible=(choice == "Upload Audio"), value=None), # audio upload
	gr.update(visible=(choice == "Realtime Recording"), value=None), # audio record
	gr.update(visible=(choice == "Input Teks"), value=""), # text input
	gr.update(visible=(choice == "Upload Audio")), # validasi upload
	gr.update(visible=(choice == "Realtime Recording")), # validasi realtime
	gr.update(visible=(choice == "Input Teks")), # validasi teks
	gr.update(value=""), # transcript
	gr.update(value=""), # soap
	gr.update(value=""), # tags
	)

	def clear_all_data():
	return (
	gr.update(value=None), # audio_upload
	gr.update(value=None), # audio_record
	gr.update(value=""), # text_input
	gr.update(value=""), # validation_upload
	gr.update(value=""), # validation_realtime
	gr.update(value=""), # validation_text
	gr.update(value="⚪ Ready to record"), # recording_status
	gr.update(value=""), # transcript_output
	gr.update(value=""), # soap_output
	gr.update(value=""), # tags_output
	)

	def process_data(choice, audio_upload, audio_record, text_input):
	"""
	Process data based on choice and return results in correct order:
	Returns: (validation_upload, validation_realtime, validation_text, transcript, soap, tags)
	"""

	if choice == "Upload Audio":
	# Process upload audio
	validation_msg, transcript, soap, tags = handle_audio(audio_upload)
	return (
	validation_msg, # validation_upload
	"", # validation_realtime (empty)
	"", # validation_text (empty)
	transcript, # transcript_output
	soap, # soap_output
	tags # tags_output
	)

	elif choice == "Realtime Recording":
	# Process realtime recording
	validation_msg, transcript, soap, tags = handle_audio(audio_record)
	return (
	"", # validation_upload (empty)
	validation_msg, # validation_realtime
	"", # validation_text (empty)
	transcript, # transcript_output
	soap, # soap_output
	tags # tags_output
	)

	elif choice == "Input Teks":
	# Process text input
	validation_msg, transcript, soap, tags = handle_text(text_input)
	return (
	"", # validation_upload (empty)
	"", # validation_realtime (empty)
	validation_msg, # validation_text
	transcript, # transcript_output (will be same as input for text)
	soap, # soap_output
	tags # tags_output
	)

	else:
	# Default case - clear all
	return ("", "", "", "", "", "")

	# Buat interface dengan tampilan default Gradio
	with gr.Blocks(title="🩺 SOAP AI") as app:

	# Header
	gr.Markdown("# 🎙️ SOAP AI - Medical Transcription & Analysis")
	gr.Markdown("Aplikasi untuk mengkonversi percakapan dokter-pasien menjadi format SOAP")

	with gr.Row():
	with gr.Column(scale=8):
	input_choice = gr.Dropdown(
	choices=["Upload Audio", "Realtime Recording", "Input Teks"],
	value="Realtime Recording",
	label="🎯 Pilih Metode Input"
	)
	with gr.Column(scale=2):
	clear_button = gr.Button("🗑️ Clear", variant="secondary")

	# Input Section - Upload Audio
	with gr.Group(visible=False) as upload_audio_group:
	gr.Markdown("### 📁 Upload Audio File")
	audio_upload = gr.Audio(
	sources=["upload"],
	label="📁 Upload File Audio",
	type="filepath"
	)

	# Input Section - Record Audio
	with gr.Group(visible=True) as record_audio_group:
	gr.Markdown("### 🎵 Record Your Audio")
	recording_status = gr.Textbox(
	value="⚪ Ready to record",
	interactive=False,
	show_label=False,
	lines=1
	)
	audio_record = gr.Audio(
	sources=["microphone"],
	label="🎙️ Realtime Recording",
	type="filepath"
	)

	# Input Section - Text Input
	with gr.Group(visible=False) as text_input_group:
	gr.Markdown("### 📝 Input Teks")
	text_input = gr.Textbox(
	label="📝 Masukkan Percakapan Dokter-Pasien",
	lines=6,
	placeholder="Ketik percakapan antara dokter dan pasien di sini..."
	)

	# Validation Section
	validation_upload = gr.Textbox(
	label="⚠️ Validasi Upload Audio",
	lines=1,
	interactive=False,
	visible=False
	)
	validation_realtime = gr.Textbox(
	label="⚠️ Validasi Realtime Recording",
	lines=1,
	interactive=False,
	visible=True
	)
	validation_text = gr.Textbox(
	label="⚠️ Validasi Input Teks",
	lines=1,
	interactive=False,
	visible=False
	)

	# Process Button
	process_button = gr.Button("🚀 Proses ke SOAP", variant="primary", size="lg")

	# Output Section
	gr.Markdown("## 📋 Hasil Analisis")

	transcript_output = gr.Textbox(
	label="📝 Hasil Transkripsi",
	lines=4
	)

	soap_output = gr.Textbox(
	label="📋 Ringkasan SOAP",
	lines=8
	)

	tags_output = gr.Textbox(
	label="🏷️ Medical Tags",
	lines=6
	)

	# Event handlers untuk toggle inputs
	input_choice.change(
	fn=lambda choice: (
	gr.update(visible=(choice == "Upload Audio")),
	gr.update(visible=(choice == "Realtime Recording")),
	gr.update(visible=(choice == "Input Teks")),
	gr.update(visible=(choice == "Upload Audio")),
	gr.update(visible=(choice == "Realtime Recording")),
	gr.update(visible=(choice == "Input Teks")),
	gr.update(value=""),
	gr.update(value=""),
	gr.update(value="")
	),
	inputs=input_choice,
	outputs=[
	upload_audio_group,
	record_audio_group,
	text_input_group,
	validation_upload,
	validation_realtime,
	validation_text,
	transcript_output,
	soap_output,
	tags_output
	]
	)

	# Event handlers untuk recording
	audio_record.start_recording(
	fn=start_recording,
	outputs=recording_status
	)

	audio_record.stop_recording(
	fn=stop_recording,
	inputs=audio_record,
	outputs=recording_status
	)

	clear_button.click(
	fn=clear_all_data,
	outputs=[
	audio_upload,
	audio_record,
	text_input,
	validation_upload,
	validation_realtime,
	validation_text,
	recording_status,
	transcript_output,
	soap_output,
	tags_output
	]
	)

	process_button.click(
	fn=process_data,
	inputs=[input_choice, audio_upload, audio_record, text_input],
	outputs=[
	validation_upload,
	validation_realtime,
	validation_text,
	transcript_output,
	soap_output,
	tags_output
	],
	show_progress="minimal"
	)

	# Startup information
	if __name__ == "__main__":
	print("🚀 Starting SOAP AI Application...")
	print("📋 Setup Instructions:")
	print("1. Install dependencies: pip install gradio pydub nltk requests python-dotenv")
	print("2. Make sure wordlist.lst file is available")
	print("3. Set up your .env file with API_TRANSCRIBE and API_TEXT")
	print()

	print("\n🌐 Application will start at: http://localhost:7860")
	print("🎙️ Make sure to allow microphone access when using Realtime Recording!")
	print()

	app.launch()