Muhammadidrees commited on
Commit
0329fd9
·
verified ·
1 Parent(s): 72f0197

Upload 4 files

Browse files
Files changed (4) hide show
  1. DocVoice.py +16 -0
  2. PaitentVoiceToText.py +70 -0
  3. bot_msg.jpg +0 -0
  4. user_msg.png +0 -0
DocVoice.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pyttsx3
2
+
3
+ def text_to_speech(text: str):
4
+ # Initialize engine
5
+ engine = pyttsx3.init()
6
+
7
+ # Use default voice
8
+ engine.setProperty('voice', engine.getProperty('voices')[0].id)
9
+
10
+ # Speak the text
11
+ engine.say(text)
12
+ engine.runAndWait()
13
+
14
+ # Example usage
15
+ if __name__ == "__main__":
16
+ text_to_speech("Hello Abdul Moiz! This is your Riaya Tech project speaking.")
PaitentVoiceToText.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # stt.py
2
+ import torch
3
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
4
+ import sounddevice as sd
5
+ import numpy as np
6
+ import scipy.io.wavfile as wav
7
+
8
+ save_dir = r"C:\Users\JAY\Downloads\model\OpenAIWhisper"
9
+
10
+ # Detect GPU
11
+ use_cuda = torch.cuda.is_available()
12
+ device_index = 0 if use_cuda else -1
13
+ device_str = "cuda" if use_cuda else "cpu"
14
+ dtype = torch.float16 if use_cuda else torch.float32
15
+
16
+ # Load model
17
+ try:
18
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
19
+ save_dir,
20
+ torch_dtype=dtype,
21
+ low_cpu_mem_usage=True,
22
+ use_safetensors=True,
23
+ local_files_only=True
24
+ ).to(device_str)
25
+ processor = AutoProcessor.from_pretrained(save_dir, local_files_only=True)
26
+ except Exception as e:
27
+ print("Warning: Local model load failed, falling back to online model:", e)
28
+ hub_id = "openai/whisper-small"
29
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
30
+ hub_id,
31
+ torch_dtype=dtype,
32
+ low_cpu_mem_usage=True,
33
+ use_safetensors=True,
34
+ ).to(device_str)
35
+ processor = AutoProcessor.from_pretrained(hub_id)
36
+
37
+ pipe = pipeline(
38
+ "automatic-speech-recognition",
39
+ model=model,
40
+ tokenizer=processor.tokenizer,
41
+ feature_extractor=processor.feature_extractor,
42
+ torch_dtype=dtype,
43
+ device=device_index
44
+ )
45
+
46
+ print("Whisper pipeline ready.")
47
+
48
+
49
+ def record_and_transcribe(duration=5, samplerate=16000, filename="mic_input.wav") -> str:
50
+ """
51
+ Record audio from the microphone, save it as a WAV file,
52
+ and return the transcribed text using Whisper.
53
+ """
54
+ # 1️⃣ Record audio
55
+ print(f"🎙️ Recording for {duration} seconds...")
56
+ audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype="float32")
57
+ sd.wait()
58
+ audio = np.squeeze(audio)
59
+
60
+ # 2️⃣ Save as WAV
61
+ wav.write(filename, samplerate, (audio * 32767).astype(np.int16))
62
+ print(f"✅ Recording saved as {filename}")
63
+
64
+ # 3️⃣ Transcribe
65
+ result = pipe(filename)
66
+ text = result["text"]
67
+ print(f"📝 Transcribed text: {text}")
68
+
69
+ return text
70
+
bot_msg.jpg ADDED
user_msg.png ADDED