Spaces:

walaa2022
/

hear

Runtime error

App Files Files Community

hear / app.py

walaa2022

Create app.py

e07cefa verified 9 months ago

raw

history blame contribute delete

2.38 kB

	import gradio as gr
	import torch
	from transformers import AutoFeatureExtractor, AutoModel
	import numpy as np
	from sklearn.linear_model import LogisticRegression

	# Load HeAR model and feature extractor
	MODEL_ID = "google/hear"
	feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID)
	model = AutoModel.from_pretrained(MODEL_ID)

	# Dummy classifier (replace with your trained classifier)
	# For demonstration, we simulate a trained classifier with random weights
	# In real use, train a classifier on HeAR embeddings using your labeled dataset
	clf = LogisticRegression()
	clf.classes_ = np.array(["Normal", "Abnormal"])
	clf.coef_ = np.random.randn(1, 768) # HeAR outputs 768-dim embeddings
	clf.intercept_ = np.random.randn(1)

	def extract_embedding(audio):
	# audio: tuple (sr, np.array)
	if audio is None:
	return None
	sr, y = audio
	# HeAR expects 2-second clips at 16kHz; pad/truncate as needed
	target_sr = 16000
	if sr != target_sr:
	import librosa
	y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
	y = y[:target_sr2] if len(y) > target_sr2 else np.pad(y, (0, max(0, target_sr*2-len(y))))
	inputs = feature_extractor(y, sampling_rate=target_sr, return_tensors="pt")
	with torch.no_grad():
	emb = model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy()
	return emb

	def predict(audio):
	emb = extract_embedding(audio)
	if emb is None:
	return "Please upload a heart or lung sound file."
	# Predict with the dummy classifier
	pred = clf.predict(emb)[0]
	prob = clf.predict_proba(emb)[0]
	return f"Prediction: {pred}\n\nConfidence: {max(prob):.2%}"

	description = """
	# Heart & Lung Sound Classifier (Demo)
	Upload a heart or lung sound (WAV, MP3, etc.).
	This demo uses the [HeAR model](https://huggingface.co/google/hear) for health acoustic embeddings and a simple classifier for normal/abnormal prediction.
	Note: For best results, use 2-second clips. For real diagnosis, a classifier trained on labeled heart/lung sound data should be used.
	"""

	iface = gr.Interface(
	fn=predict,
	inputs=gr.Audio(sources=["upload", "microphone"], type="numpy", label="Upload Heart/Lung Sound"),
	outputs=gr.Markdown(),
	title="Heart & Lung Sound Classifier",
	description=description,
	allow_flagging="never"
	)

	if __name__ == "__main__":
	iface.launch()