Upload folder using huggingface_hub

9a199b4 verified 4 months ago

15.3 kB

	# import sounddevice as sd
	# import streamlit as st
	# import os
	# import re
	# import numpy as np
	# import wave
	# import requests
	# from datetime import datetime

	# API_URL = "http://localhost:8000/transcribe"

	# # Record audio
	# def record_audio(duration=15, fs=16000):
	# st.write("🔴 Recording... Speak Arabic now!")
	# recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
	# sd.wait()
	# st.write("✅ Recording finished")
	# return recording, fs

	# # Save recording
	# def save_wav(recording, fs, out_dir="recordings"):
	# os.makedirs(out_dir, exist_ok=True)
	# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	# file_path = os.path.join(out_dir, f"recording_{timestamp}.wav")

	# # Normalize
	# max_val = np.max(np.abs(recording))
	# if max_val > 0:
	# recording = (recording / max_val * 32767).astype(np.int16)

	# with wave.open(file_path, "wb") as wf:
	# wf.setnchannels(1)
	# wf.setsampwidth(2)
	# wf.setframerate(fs)
	# wf.writeframes(recording.tobytes())

	# return file_path

	# # Call API
	# def transcribe_audio(file_path):
	# with open(file_path, "rb") as f:
	# files = {"file": (file_path, f, "audio/wav")}
	# response = requests.post(API_URL, files=files)

	# if response.status_code == 200:
	# return response.json().get("transcription", "")
	# else:
	# st.error(f"❌ API Error {response.status_code}: {response.text}")
	# return ""

	# # Extract phone number
	# def extract_phone_number(text):
	# match = re.search(r"01[0-9]{9}", text)
	# return match.group(0) if match else None

	# # Extract national ID
	# def extract_national_id(text):
	# digits = re.findall(r"\d+", text)
	# candidate = "".join(digits)
	# if len(candidate) == 14:
	# return candidate
	# elif len(candidate) > 14:
	# return candidate[:14]
	# elif 7 <= len(candidate) < 14:
	# return f"⚠️ Incomplete ID: {candidate} ({len(candidate)} digits)"
	# else:
	# return None

	# # ---------------- UI ----------------
	# st.title("📞 Phone & National ID Capture (with Name + Case Name)")

	# # Session state
	# if "phone_number" not in st.session_state:
	# st.session_state.phone_number = None
	# if "national_id" not in st.session_state:
	# st.session_state.national_id = None
	# if "name" not in st.session_state:
	# st.session_state.name = ""
	# if "case_name" not in st.session_state:
	# st.session_state.case_name = ""

	# # Step 1: Phone number
	# st.subheader("Step 1: Provide your phone number")
	# col1, col2 = st.columns(2)

	# with col1:
	# if st.button("🎙️ Record Phone Number"):
	# rec, fs = record_audio()
	# wav_path = save_wav(rec, fs)
	# st.audio(wav_path)
	# text = transcribe_audio(wav_path)
	# st.write("📝 Transcription:", text)
	# phone = extract_phone_number(text)
	# if phone:
	# st.session_state.phone_number = phone
	# st.success(f"📱 Detected Phone Number: {phone}")
	# else:
	# st.error("❌ No valid phone number detected")

	# with col2:
	# phone_upload = st.file_uploader("Or upload phone number audio", type=["wav", "mp3", "m4a"])
	# if phone_upload is not None:
	# temp_path = os.path.join("recordings", f"upload_phone_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav")
	# os.makedirs("recordings", exist_ok=True)
	# with open(temp_path, "wb") as f:
	# f.write(phone_upload.read())
	# st.audio(temp_path)
	# text = transcribe_audio(temp_path)
	# st.write("📝 Transcription:", text)
	# phone = extract_phone_number(text)
	# if phone:
	# st.session_state.phone_number = phone
	# st.success(f"📱 Detected Phone Number: {phone}")
	# else:
	# st.error("❌ No valid phone number detected")

	# # Step 2: National ID
	# st.subheader("Step 2: Provide your national ID")
	# col3, col4 = st.columns(2)

	# with col3:
	# if st.button("🎙️ Record National ID"):
	# rec, fs = record_audio()
	# wav_path = save_wav(rec, fs)
	# st.audio(wav_path)
	# text = transcribe_audio(wav_path)
	# st.write("📝 Transcription:", text)
	# nid = extract_national_id(text)
	# if nid:
	# st.session_state.national_id = nid
	# st.success(f"🪪 Detected National ID: {nid}")
	# else:
	# st.error("❌ No valid national ID detected")

	# with col4:
	# nid_upload = st.file_uploader("Or upload national ID audio", type=["wav", "mp3", "m4a"])
	# if nid_upload is not None:
	# temp_path = os.path.join("recordings", f"upload_nid_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav")
	# os.makedirs("recordings", exist_ok=True)
	# with open(temp_path, "wb") as f:
	# f.write(nid_upload.read())
	# st.audio(temp_path)
	# text = transcribe_audio(temp_path)
	# st.write("📝 Transcription:", text)
	# nid = extract_national_id(text)
	# if nid:
	# st.session_state.national_id = nid
	# st.success(f"🪪 Detected National ID: {nid}")
	# else:
	# st.error("❌ No valid national ID detected")

	# # Step 3: Manual fields
	# st.subheader("Step 3: Provide additional info")
	# st.session_state.name = st.text_input("👤 Enter your Name", st.session_state.name)
	# st.session_state.case_name = st.text_input("📂 Enter Case Name", st.session_state.case_name)

	# # Final summary
	# st.subheader("📋 Summary")
	# if st.session_state.phone_number:
	# st.info(f"📱 Phone Number: {st.session_state.phone_number}")
	# if st.session_state.national_id:
	# st.info(f"🪪 National ID: {st.session_state.national_id}")
	# if st.session_state.name:
	# st.info(f"👤 Name: {st.session_state.name}")
	# if st.session_state.case_name:
	# st.info(f"📂 Case Name: {st.session_state.case_name}")

	# if st.session_state.phone_number and st.session_state.national_id and st.session_state.name and st.session_state.case_name:
	# st.success("✅ All details captured successfully!")


	import sounddevice as sd
	import streamlit as st
	import os
	import re
	import numpy as np
	import wave
	import requests
	from datetime import datetime

	API_URL = "http://localhost:8070/transcribe"

	# Record audio
	def record_audio(duration=15, fs=16000):
	st.write("🔴 Recording... Speak Arabic now!")
	# recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
	recording = sd.rec(int(16000 * 10), samplerate=16000, channels=1, dtype='int16')
	sd.wait()
	st.write("✅ Recording finished")
	return recording, fs

	# Save recording
	# def save_wav(recording, fs, out_dir="recordings", prefix="recording"):
	# os.makedirs(out_dir, exist_ok=True)
	# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	# file_path = os.path.join(out_dir, f"{prefix}_{timestamp}.wav")

	# # Normalize
	# max_val = np.max(np.abs(recording))
	# if max_val > 0:
	# recording = (recording / max_val * 32767).astype(np.int16)

	# with wave.open(file_path, "wb") as wf:
	# wf.setnchannels(1)
	# wf.setsampwidth(2)
	# wf.setframerate(fs)
	# wf.writeframes(recording.tobytes())

	# return file_path
	import scipy.io.wavfile as wav

	# def save_wav(recording, fs, out_dir="recordings", prefix="recording"):
	# os.makedirs(out_dir, exist_ok=True)
	# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	# file_path = os.path.join(out_dir, f"{prefix}_{timestamp}.wav")

	# # Save directly using scipy.io.wavfile
	# wav.write(file_path, fs, recording)

	# return file_path
	# Call API
	def transcribe_audio(file_path):
	with open(file_path, "rb") as f:
	files = {"file": (file_path, f, "audio/wav")}
	response = requests.post(API_URL, files=files)

	if response.status_code == 200:
	return response.json().get("transcription", "")
	else:
	st.error(f"❌ API Error {response.status_code}: {response.text}")
	return ""


	def save_wav(recording, fs, out_dir="recordings", prefix="recording"):
	os.makedirs(out_dir, exist_ok=True)
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	file_path = os.path.join(out_dir, f"{prefix}_{timestamp}.wav")

	# Flatten to 1D if stereo-like shape
	if recording.ndim > 1:
	recording = recording[:, 0]

	wav.write(file_path, fs, recording.astype(np.int16))

	return file_path

	# Extract phone number
	def extract_phone_number(text):
	digits = re.findall(r"\d+", text)
	candidate = "".join(digits)
	return candidate if candidate.startswith("01") and len(candidate) == 11 else None

	# Extract national ID
	def extract_national_id(text):
	digits = re.findall(r"\d+", text)
	candidate = "".join(digits)
	if len(candidate) == 14:
	return candidate
	elif len(candidate) > 14:
	return candidate[:14]
	elif 7 <= len(candidate) < 14:
	return f"⚠️ Incomplete ID: {candidate} ({len(candidate)} digits)"
	else:
	return None

	# ---------------- UI ----------------
	st.title("📞 Phone, National ID, Name & Case Name Capture")

	# Session state
	for key in ["phone_number", "national_id", "name", "case_name"]:
	if key not in st.session_state:
	st.session_state[key] = None

	# Step 1: Phone number
	st.subheader("Step 1: Provide your phone number")
	col1, col2 = st.columns(2)

	with col1:
	if st.button("🎙️ Record Phone Number"):
	rec, fs = record_audio()
	wav_path = save_wav(rec, fs, prefix="phone")
	st.audio(wav_path)
	text = transcribe_audio(wav_path)
	st.write("📝 Transcription:", text)
	phone = extract_phone_number(text)
	if phone:
	st.session_state.phone_number = phone
	st.success(f"📱 Detected Phone Number: {phone}")
	else:
	st.error("❌ No valid phone number detected")

	with col2:
	phone_upload = st.file_uploader("Or upload phone number audio", type=["wav", "mp3", "m4a"])
	if phone_upload is not None:
	temp_path = os.path.join("recordings", f"upload_phone_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav")
	os.makedirs("recordings", exist_ok=True)
	with open(temp_path, "wb") as f:
	f.write(phone_upload.read())
	st.audio(temp_path)
	text = transcribe_audio(temp_path)
	st.write("📝 Transcription:", text)
	phone = extract_phone_number(text)
	if phone:
	st.session_state.phone_number = phone
	st.success(f"📱 Detected Phone Number: {phone}")
	else:
	st.error("❌ No valid phone number detected")

	# Step 2: National ID
	st.subheader("Step 2: Provide your national ID")
	col3, col4 = st.columns(2)

	with col3:
	if st.button("🎙️ Record National ID"):
	rec, fs = record_audio()
	wav_path = save_wav(rec, fs, prefix="nid")
	st.audio(wav_path)
	text = transcribe_audio(wav_path)
	st.write("📝 Transcription:", text)
	nid = extract_national_id(text)
	if nid:
	st.session_state.national_id = nid
	st.success(f"🪪 Detected National ID: {nid}")
	else:
	st.error("❌ No valid national ID detected")

	with col4:
	nid_upload = st.file_uploader("Or upload national ID audio", type=["wav", "mp3", "m4a"])
	if nid_upload is not None:
	temp_path = os.path.join("recordings", f"upload_nid_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav")
	os.makedirs("recordings", exist_ok=True)
	with open(temp_path, "wb") as f:
	f.write(nid_upload.read())
	st.audio(temp_path)
	text = transcribe_audio(temp_path)
	st.write("📝 Transcription:", text)
	nid = extract_national_id(text)
	if nid:
	st.session_state.national_id = nid
	st.success(f"🪪 Detected National ID: {nid}")
	else:
	st.error("❌ No valid national ID detected")

	# Step 3: Name (audio input)
	st.subheader("Step 3: Provide your Name")
	col5, col6 = st.columns(2)

	with col5:
	if st.button("🎙️ Record Name"):
	rec, fs = record_audio()
	wav_path = save_wav(rec, fs, prefix="name")
	st.audio(wav_path)
	text = transcribe_audio(wav_path)
	st.write("📝 Transcription:", text)
	if text.strip():
	st.session_state.name = text.strip()
	st.success(f"👤 Name: {text.strip()}")

	with col6:
	name_upload = st.file_uploader("Or upload name audio", type=["wav", "mp3", "m4a"])
	if name_upload is not None:
	temp_path = os.path.join("recordings", f"upload_name_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav")
	os.makedirs("recordings", exist_ok=True)
	with open(temp_path, "wb") as f:
	f.write(name_upload.read())
	st.audio(temp_path)
	text = transcribe_audio(temp_path)
	st.write("📝 Transcription:", text)
	if text.strip():
	st.session_state.name = text.strip()
	st.success(f"👤 Name: {text.strip()}")

	# Step 4: Case Name (audio input)
	st.subheader("Step 4: Provide Case Name")
	col7, col8 = st.columns(2)

	with col7:
	if st.button("🎙️ Record Case Name"):
	rec, fs = record_audio()
	wav_path = save_wav(rec, fs, prefix="case_name")
	st.audio(wav_path)
	text = transcribe_audio(wav_path)
	st.write("📝 Transcription:", text)
	if text.strip():
	st.session_state.case_name = text.strip()
	st.success(f"📂 Case Name: {text.strip()}")

	with col8:
	case_upload = st.file_uploader("Or upload case name audio", type=["wav", "mp3", "m4a"])
	if case_upload is not None:
	temp_path = os.path.join("recordings", f"upload_case_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav")
	os.makedirs("recordings", exist_ok=True)
	with open(temp_path, "wb") as f:
	f.write(case_upload.read())
	st.audio(temp_path)
	text = transcribe_audio(temp_path)
	st.write("📝 Transcription:", text)
	if text.strip():
	st.session_state.case_name = text.strip()
	st.success(f"📂 Case Name: {text.strip()}")

	# Final summary
	st.subheader("📋 Summary")
	if st.session_state.phone_number:
	st.info(f"📱 Phone Number: {st.session_state.phone_number}")
	if st.session_state.national_id:
	st.info(f"🪪 National ID: {st.session_state.national_id}")
	if st.session_state.name:
	st.info(f"👤 Name: {st.session_state.name}")
	if st.session_state.case_name:
	st.info(f"📂 Case Name: {st.session_state.case_name}")

	if st.session_state.phone_number and st.session_state.national_id and st.session_state.name and st.session_state.case_name:
	st.success("✅ All details captured successfully!")