Spaces:
Running
Running
Danh Tran commited on
Upload 34 files
Browse files- .gitattributes +1 -0
- README.md +165 -14
- app.py +21 -0
- audios/audios/input_audios/5g_Viet.mp3 +0 -0
- audios/audios/input_audios/Megan-Fox.mp3 +0 -0
- audios/audios/input_audios/audio-Wizard-of-Oz-Dorthy.wav +0 -0
- audios/audios/input_audios/linus.mp3 +3 -0
- audios/audios/input_audios/mic.mp3 +0 -0
- audios/audios/input_audios/sample.wav +0 -0
- audios/audios/output_audios/Wizard-of-Oz-Dorthy_robot.mp3 +0 -0
- audios/audios/output_audios/linus.mp3 +0 -0
- audios/audios/output_audios/mic.mp3 +0 -0
- audios/records/input_records/mic.mp3 +0 -0
- audios/records/output_records/mic.mp3 +0 -0
- config_folder/__pycache__/config.cpython-311.pyc +0 -0
- config_folder/config.py +156 -0
- config_folder/config.yaml +24 -0
- in_out_micro/__pycache__/record_and_convert.cpython-311.pyc +0 -0
- in_out_micro/__pycache__/stream_to_virtual_mic.cpython-311.pyc +0 -0
- in_out_micro/__pycache__/to_virtual_micro.cpython-311.pyc +0 -0
- in_out_micro/__pycache__/voice_record.cpython-311.pyc +0 -0
- in_out_micro/record_and_convert.py +38 -0
- in_out_micro/stream_to_virtual_mic.py +19 -0
- in_out_micro/to_virtual_micro.py +50 -0
- in_out_micro/voice_record.py +58 -0
- main.py +28 -0
- main_parallel.py +24 -0
- requirements.txt +7 -0
- robot_voice/__pycache__/change_to_robot_voice.cpython-311.pyc +0 -0
- robot_voice/__pycache__/waveshaper.cpython-311.pyc +0 -0
- robot_voice/change_to_robot_voice.py +131 -0
- robot_voice/waveshaper.py +22 -0
- test.ipynb +607 -0
- test2.py +7 -0
- utils/pass_to_virtual_micro.py +64 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
audios/audios/input_audios/linus.mp3 filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,14 +1,165 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
--
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Robot Voice with Enhanced Features
|
| 2 |
+
|
| 3 |
+
This project builds upon the excellent work of [Neil Lakin](https://github.com/nrlakin) on his **robot_voice** project, adding several exciting new features:
|
| 4 |
+
|
| 5 |
+
## **Get Started:**
|
| 6 |
+
|
| 7 |
+
This project aims to offer a more dynamic and customizable approach to robot voice synthesis. Explore the code, experiment with the features, and contribute to the ongoing development!
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
## Enhanced Capabilities:
|
| 11 |
+
|
| 12 |
+
- **Real-time Recording:** Capture your voice instantly and manipulate it with the same robotic effects applied to text input.
|
| 13 |
+
- **Virtual Audio Driver Connectivity:** Seamlessly integrate with virtual audio drivers, allowing you to route the synthesized voice output to various applications and devices.
|
| 14 |
+
- **Expanded Presets:** Utilize a wider variety of pre-configured voice settings to customize the robotic sound to your liking, with sample audio included for demonstration.
|
| 15 |
+
|
| 16 |
+
## Project Foundation:
|
| 17 |
+
|
| 18 |
+
The core code of this project is based on Neil Lakin's original "robot_voice" project. For a deeper understanding of the underlying concepts and techniques, I encourage you to explore his repository:
|
| 19 |
+
|
| 20 |
+
**[Neil Lakin's "robot_voice" repository](https://github.com/nrlakin/robot_voice)**
|
| 21 |
+
|
| 22 |
+
## **How to Use:**
|
| 23 |
+
|
| 24 |
+
### 1. Audio Files
|
| 25 |
+
- Place your audio files (e.g., .wav, .mp3) in the `audios/audios/input_audios` folder.
|
| 26 |
+
- The processed robotic voice versions of your files will be saved in the `audios/audios/output_audios` folder with the same filenames.
|
| 27 |
+
|
| 28 |
+
### 2. Configuration
|
| 29 |
+
|
| 30 |
+
- Configuration File of all configurations are stored in `config_folder/config.yaml`. You can control whether the program prompts for configuration by setting `ASK_NEW_CONFIG` to `true` or `false`. If set to `false`, the program will not ask for configuration.
|
| 31 |
+
|
| 32 |
+
- Keyboard Shortcuts Use the following keyboard shortcuts to control the program:
|
| 33 |
+
|
| 34 |
+
- `s`: Start recording
|
| 35 |
+
- `q`: Stop recording
|
| 36 |
+
- `e`: Stream Robot Voice to microphone
|
| 37 |
+
|
| 38 |
+
### 3. Real-time Recording
|
| 39 |
+
- Record your voice using the provided interface and `write_to_file=True`.
|
| 40 |
+
- Your recordings will be saved in `audios/records/input_records/mic.wav`.
|
| 41 |
+
- The robotic versions of your recordings will be saved in `audios/records/output_records/mic.wav`.
|
| 42 |
+
|
| 43 |
+
- If `write_to_file=False`. The robotic versions of your recordings will be saved in `audios/records/output_records/mic.wav`.
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
- Config
|
| 47 |
+
|
| 48 |
+
### 4. Parameters for Adjusting
|
| 49 |
+
|
| 50 |
+
This section explains the parameters you can use to fine-tune the sound of the generated speech.
|
| 51 |
+
|
| 52 |
+
#### 4.1. Parameter Short Explanation and Default Values
|
| 53 |
+
|
| 54 |
+
| Parameter | Default Value | Description |
|
| 55 |
+
|---|---|---|
|
| 56 |
+
| `VB` | 0.2 | Controls the volume of the **bright** part of the speech. Higher values result in a shorter voice that might be difficult to hear clearly. |
|
| 57 |
+
| `VL` | 0.4 | Controls the volume of the **low** part of the speech. Higher values result in a shorter voice that might be difficult to hear clearly. |
|
| 58 |
+
| `H` | 4 | Controls the slope of the linear section of the diode model's response, which kicks in after the voltage exceeds `VL`. |
|
| 59 |
+
| `LOOKUP_SAMPLES` | 1024 | Determines the size of the **lookup table** used for sound synthesis. Lower values can introduce noise. |
|
| 60 |
+
| `MOD_F` | 50 | Controls the **modulation frequency**, which influences the "robotic" effect. Higher values produce a more robotic sound. |
|
| 61 |
+
|
| 62 |
+
#### **Important Notes:**
|
| 63 |
+
|
| 64 |
+
* Each parameter has a default value. You can adjust them to fine-tune the sound to your liking.
|
| 65 |
+
* `VB` and `VL` are independent and do not have the same value.
|
| 66 |
+
* `VB` and `VL` must be below 1.0.
|
| 67 |
+
|
| 68 |
+
#### 4.2. Parameter Short Explanation
|
| 69 |
+
##### 4.2.1. Explaining the Parameters in the Diode-Based Ring Modulator Model
|
| 70 |
+
|
| 71 |
+
The parameters you listed are used in a digital model of a diode-based ring modulator, as described in the article "A SIMPLE DIGITAL MODEL OF THE DIODE-BASED RING-MODULATOR". This model aims to recreate the distinctive sound of an analog diode-based ring modulator, which is characterized by its non-linear behavior and added harmonics.
|
| 72 |
+
|
| 73 |
+
Here's a breakdown of each parameter and its role in the model:
|
| 74 |
+
|
| 75 |
+
- **`VB` (Default: 0.2): Diode Forward Bias Voltage**
|
| 76 |
+
|
| 77 |
+
- This parameter emulates the forward bias voltage of a diode, controlling the point at which the diode starts conducting.
|
| 78 |
+
- It essentially sets the threshold for the signal to pass through the diode model.
|
| 79 |
+
- The paper mentions using values of **0.2** and **0.4**, and it's important to keep this value **below 1**.
|
| 80 |
+
|
| 81 |
+
- **`VL` (Default: 0.4): Transition to Linear Behavior**
|
| 82 |
+
|
| 83 |
+
- This parameter determines the voltage at which the diode model transitions from a curved response to a linear response.
|
| 84 |
+
- Beyond this voltage, the output of the diode model is proportional to the input.
|
| 85 |
+
- Like `VB`, **`VL`** should be kept **below 1**, and the paper uses **0.4** as a value.
|
| 86 |
+
|
| 87 |
+
- **`H` (Default: 4): Slope of Linear Section**
|
| 88 |
+
|
| 89 |
+
- This parameter controls the slope of the linear section of the diode model's response, which kicks in after the voltage exceeds `VL`.
|
| 90 |
+
- A higher `H` value means a steeper slope, leading to a more pronounced effect of the diode's non-linearity.
|
| 91 |
+
- By adjusting `H`, you can influence the overall distortion characteristics of the ring modulator.
|
| 92 |
+
|
| 93 |
+
- **`LOOKUP_SAMPLES` (Default: 1024): Size of Lookup Table**
|
| 94 |
+
|
| 95 |
+
- This parameter determines the number of samples used in a lookup table that represents the diode's non-linearity.
|
| 96 |
+
- A larger table can potentially provide a more accurate representation of the non-linearity, but it comes at the cost of increased memory usage.
|
| 97 |
+
- The article recommends leaving this value as is unless you have specific reasons to change it.
|
| 98 |
+
|
| 99 |
+
- **`MOD_F` (Default: 50): Modulating Frequency**
|
| 100 |
+
|
| 101 |
+
- This parameter sets the frequency of the modulating signal in Hertz (Hz).
|
| 102 |
+
- In ring modulation, the modulating signal multiplies the carrier signal, creating sum and difference frequencies.
|
| 103 |
+
- `MOD_F` determines the frequency of one of the input signals to the ring modulator.
|
| 104 |
+
|
| 105 |
+
Understanding these parameters helps you to control and manipulate the sound of the digital diode-based ring modulator. You can experiment with different values to achieve a wide range of timbres, from subtle saturation to harsher, more distorted sounds.
|
| 106 |
+
|
| 107 |
+
##### 4.2.2. Exploring the Impact of High and Low Parameter Values
|
| 108 |
+
|
| 109 |
+
Adjusting the parameters in the digital model of a diode-based ring modulator influences the sound by altering the behavior of the simulated diodes. Here's a breakdown of what happens when these parameters are set to high or low values:
|
| 110 |
+
|
| 111 |
+
- **`VB` (Diode Forward Bias Voltage)**
|
| 112 |
+
|
| 113 |
+
- **High `VB`:** With a higher forward bias voltage, the diode takes longer to start conducting. This means a larger portion of the input signal will fall below the threshold and be effectively clipped, resulting in a more pronounced distortion effect.
|
| 114 |
+
- **Low `VB`:** A lower `VB` allows the diode to conduct at lower signal levels. This leads to less clipping and a more subtle, less distorted sound. The signal will pass through with less alteration.
|
| 115 |
+
|
| 116 |
+
- **`VL` (Transition to Linear Behavior)**
|
| 117 |
+
|
| 118 |
+
- **High `VL`:** Increasing `VL` expands the curved, non-linear region of the diode's response. This enhances the initial distortion and coloration before the signal reaches the linear section.
|
| 119 |
+
- **Low `VL`:** A lower `VL` means a shorter curved region. The diode's response becomes linear more quickly, resulting in less initial distortion and a faster transition to a cleaner output.
|
| 120 |
+
|
| 121 |
+
- **`H` (Slope of Linear Section)**
|
| 122 |
+
|
| 123 |
+
- **High `H`:** A steeper slope in the linear section amplifies the differences between the input and output signals. This leads to a more significant change in the signal's overall shape, emphasizing the harmonic content introduced by the diode's non-linearity and creating a more distorted sound.
|
| 124 |
+
- **Low `H`:** A gentler slope reduces the difference between the input and output signals in the linear region. The effect of the non-linearity becomes less pronounced, resulting in less distortion.
|
| 125 |
+
|
| 126 |
+
- **`LOOKUP_SAMPLES` (Size of Lookup Table)**
|
| 127 |
+
|
| 128 |
+
- **High `LOOKUP_SAMPLES`:** While increasing the lookup table size can potentially improve the accuracy of the diode non-linearity representation, it might not significantly impact the perceived sound. It also increases memory usage.
|
| 129 |
+
- **Low `LOOKUP_SAMPLES`:** A smaller lookup table might introduce some inaccuracies in the representation of the diode non-linearity, potentially leading to a slightly less accurate emulation of the analog behavior.
|
| 130 |
+
|
| 131 |
+
- **`MOD_F` (Modulating Frequency)**
|
| 132 |
+
|
| 133 |
+
- **High `MOD_F`:** Increasing the modulating frequency shifts the sum and difference frequencies generated by the ring modulation process higher in the frequency spectrum. This can create a brighter, more metallic or clangorous sound.
|
| 134 |
+
- **Low `MOD_F`:** A lower modulating frequency results in lower sum and difference frequencies, leading to a darker, muddier, or more subtle ring modulation effect.
|
| 135 |
+
|
| 136 |
+
Remember that the perception of "high" and "low" for these parameters depends on the context of the specific sounds you are working with. Experimentation is key to finding the sweet spots that create the desired sonic results.
|
| 137 |
+
|
| 138 |
+
#### 4.2 Preset Styles
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
### 5. Connect to Virtual Audio Cable
|
| 142 |
+
- Download and Install Driver in https://vb-audio.com/Cable/.
|
| 143 |
+
|
| 144 |
+
- This flowchart illustrates the audio routing for real-time distortion processing using VB-Audio CABLE.
|
| 145 |
+
|
| 146 |
+
```mermaid
|
| 147 |
+
graph LR
|
| 148 |
+
A[Recorded Audio] --> B(CABLE Output);
|
| 149 |
+
B --> C{VB-Audio Virtual Cable};
|
| 150 |
+
C --> D(CABLE Input);
|
| 151 |
+
D --> E[Distorted Audio Output];
|
| 152 |
+
```
|
| 153 |
+
|
| 154 |
+
The process flows as follows:
|
| 155 |
+
|
| 156 |
+
- **Recorded Audio:** Audio data from a recording source (e.g., microphone, instrument) is sent to...
|
| 157 |
+
- **CABLE Output:** The virtual output of VB-Audio CABLE. This acts as a virtual audio interface output.
|
| 158 |
+
- **VB-Audio Virtual Cable:** The audio data is passed through the VB-Audio Virtual Cable driver. This is where the distortion processing would occur (using a plugin or other software that intercepts the virtual cable's audio stream).
|
| 159 |
+
- **CABLE Input:** The processed audio is then received by the virtual input of VB-Audio CABLE.
|
| 160 |
+
- **Distorted Audio Output:** Finally, the distorted audio is sent to the desired output destination (e.g., speakers, headphones, recording software).
|
| 161 |
+
|
| 162 |
+
- Note that: **CABLE Output**, **VB-Audio Virtual Cable:**, and **CABLE Input** can be replace by **Input**, **Other Microphone**, and **Output**.
|
| 163 |
+
|
| 164 |
+
## **Note**
|
| 165 |
+
This readme assumes the project structure includes folders like `audios`, `records`, `input_audios`, `output_audios`, etc. If your project uses a different folder structure, adjust the paths accordingly.
|
app.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
from robot_voice.change_to_robot_voice import RobotVoice
|
| 4 |
+
from config_folder.config import run_config
|
| 5 |
+
|
| 6 |
+
def convert_to_robot_voice(audio_array):
|
| 7 |
+
_, _, _, _, VB, VL, H, LOOKUP_SAMPLES, MOD_F, _, _ = run_config()
|
| 8 |
+
|
| 9 |
+
rv_obj = RobotVoice(vb=VB, vl=VL, h=H, lookup_samples=LOOKUP_SAMPLES, mod_f=MOD_F)
|
| 10 |
+
robot_voice_arr, sr = rv_obj.run((audio_array, None), use_record=False, save_to_file=False)
|
| 11 |
+
|
| 12 |
+
return robot_voice_arr
|
| 13 |
+
|
| 14 |
+
demo = gr.Interface(
|
| 15 |
+
convert_to_robot_voice,
|
| 16 |
+
[gr.Audio(sources=["upload"], label="Input Human Voice"),
|
| 17 |
+
gr.Audio(sources=["microphone"], label="Input Human Voice", streaming=True)],
|
| 18 |
+
gr.Audio(label="Output Robot Voice"),
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
demo.launch()
|
audios/audios/input_audios/5g_Viet.mp3
ADDED
|
Binary file (947 kB). View file
|
|
|
audios/audios/input_audios/Megan-Fox.mp3
ADDED
|
Binary file (31.8 kB). View file
|
|
|
audios/audios/input_audios/audio-Wizard-of-Oz-Dorthy.wav
ADDED
|
Binary file (26.4 kB). View file
|
|
|
audios/audios/input_audios/linus.mp3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9367855472f8b275824b5975540842ed6037858e53160f1c392520c5c88a37e3
|
| 3 |
+
size 5292044
|
audios/audios/input_audios/mic.mp3
ADDED
|
Binary file (25 kB). View file
|
|
|
audios/audios/input_audios/sample.wav
ADDED
|
Binary file (610 kB). View file
|
|
|
audios/audios/output_audios/Wizard-of-Oz-Dorthy_robot.mp3
ADDED
|
Binary file (16.4 kB). View file
|
|
|
audios/audios/output_audios/linus.mp3
ADDED
|
Binary file (626 kB). View file
|
|
|
audios/audios/output_audios/mic.mp3
ADDED
|
Binary file (26 kB). View file
|
|
|
audios/records/input_records/mic.mp3
ADDED
|
Binary file (9.79 kB). View file
|
|
|
audios/records/output_records/mic.mp3
ADDED
|
Binary file (39 kB). View file
|
|
|
config_folder/__pycache__/config.cpython-311.pyc
ADDED
|
Binary file (6.94 kB). View file
|
|
|
config_folder/config.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import yaml
|
| 3 |
+
import sounddevice as sd
|
| 4 |
+
|
| 5 |
+
FOLDER_CONFIG = "config_folder"
|
| 6 |
+
CONFIG_FILE = "config.yaml"
|
| 7 |
+
CONFIG_PATH = os.path.join(FOLDER_CONFIG, CONFIG_FILE)
|
| 8 |
+
|
| 9 |
+
class ConfigProcess():
|
| 10 |
+
def read_config(self, path):
|
| 11 |
+
with open(path, "r") as yaml_file:
|
| 12 |
+
return yaml.load(yaml_file, Loader=yaml.FullLoader)
|
| 13 |
+
|
| 14 |
+
def write_config(self, yaml_file, data):
|
| 15 |
+
with open(yaml_file, 'w') as file:
|
| 16 |
+
yaml.dump(data, file)
|
| 17 |
+
|
| 18 |
+
def record_and_save_robot() -> (bool, bool):
|
| 19 |
+
asking_1 = True
|
| 20 |
+
asking_2 = True
|
| 21 |
+
|
| 22 |
+
while asking_1:
|
| 23 |
+
use_recording_s = input("USE RECORDING? (y/n): ")
|
| 24 |
+
if use_recording_s in ["Y", "y"]:
|
| 25 |
+
use_recording = True
|
| 26 |
+
asking_1 = False
|
| 27 |
+
elif use_recording_s in ["N", "n"]:
|
| 28 |
+
use_recording = False
|
| 29 |
+
asking_1 = False
|
| 30 |
+
else:
|
| 31 |
+
print('You only need type "y" or "n"')
|
| 32 |
+
|
| 33 |
+
while asking_2:
|
| 34 |
+
save_robot_to_file_s = input("SAVE ROBOT VOICE TO FILE? (y/n): ")
|
| 35 |
+
if save_robot_to_file_s == 'y' or save_robot_to_file_s == 'Y':
|
| 36 |
+
save_robot_to_file = True
|
| 37 |
+
asking_2 = False
|
| 38 |
+
elif save_robot_to_file_s == 'n' or save_robot_to_file_s == 'N':
|
| 39 |
+
save_robot_to_file = False
|
| 40 |
+
asking_2 = False
|
| 41 |
+
else:
|
| 42 |
+
print('You only need type "y" or "n"')
|
| 43 |
+
|
| 44 |
+
print(f"USE_RECORD = {use_recording}")
|
| 45 |
+
print(f"SAVE_ROBOT_TO_FILE = {save_robot_to_file}")
|
| 46 |
+
return use_recording, save_robot_to_file
|
| 47 |
+
|
| 48 |
+
def choose_audio_device():
|
| 49 |
+
audio_device_list = sd.query_devices()
|
| 50 |
+
audio_hostapis_list = sd.query_hostapis()
|
| 51 |
+
print(f"Here are Indecies and Full-Name of Audio Devices (Just enter the device index): \n\
|
| 52 |
+
{audio_device_list}\n")
|
| 53 |
+
index_is_not_int = True
|
| 54 |
+
|
| 55 |
+
while index_is_not_int:
|
| 56 |
+
idx_input_device = input("Please enter Your Index Input Audio: ")
|
| 57 |
+
idx_output_device = input("Please enter Your Index Output Audio: ")
|
| 58 |
+
try:
|
| 59 |
+
idx_input_device = int(idx_input_device)
|
| 60 |
+
idx_output_device = int(idx_output_device)
|
| 61 |
+
index_is_not_int = False
|
| 62 |
+
except:
|
| 63 |
+
print("Make sure 2 Index are integer.")
|
| 64 |
+
|
| 65 |
+
in_device_name = audio_device_list[idx_input_device]['name'] + ", " + \
|
| 66 |
+
audio_hostapis_list[audio_device_list[idx_input_device]['hostapi']]['name']
|
| 67 |
+
|
| 68 |
+
out_device_name = audio_device_list[idx_output_device]['name'] + ", " + \
|
| 69 |
+
audio_hostapis_list[audio_device_list[idx_output_device]['hostapi']]['name']
|
| 70 |
+
|
| 71 |
+
print("- You have chosen Input Device: ", in_device_name)
|
| 72 |
+
print("- You have chosen Output Device: ", out_device_name)
|
| 73 |
+
|
| 74 |
+
in_device = {"index": idx_input_device, "name": in_device_name}
|
| 75 |
+
out_device = {"index": idx_output_device, "name": out_device_name}
|
| 76 |
+
|
| 77 |
+
return in_device, out_device
|
| 78 |
+
|
| 79 |
+
def change_config_dict(config_data: dict, **new_cfg):
|
| 80 |
+
|
| 81 |
+
for idx, element in enumerate(new_cfg):
|
| 82 |
+
config_data[element] = list(new_cfg.values())[idx]
|
| 83 |
+
return config_data
|
| 84 |
+
|
| 85 |
+
def re_config():
|
| 86 |
+
|
| 87 |
+
in_device, out_device = choose_audio_device()
|
| 88 |
+
use_recording, save_robot_to_file = record_and_save_robot()
|
| 89 |
+
|
| 90 |
+
new_cfg = {'Input_device': in_device,
|
| 91 |
+
'Output_device': out_device,
|
| 92 |
+
'USE_RECORDING': use_recording,
|
| 93 |
+
'SAVE_ROBOT_TO_FILE': save_robot_to_file}
|
| 94 |
+
|
| 95 |
+
# Read from config file
|
| 96 |
+
cfg_dict = ConfigProcess().read_config(CONFIG_PATH)
|
| 97 |
+
cfg_dict = change_config_dict(cfg_dict, **new_cfg)
|
| 98 |
+
|
| 99 |
+
# Write to config file
|
| 100 |
+
ConfigProcess().write_config(CONFIG_PATH, cfg_dict)
|
| 101 |
+
|
| 102 |
+
return in_device, out_device, use_recording, save_robot_to_file
|
| 103 |
+
|
| 104 |
+
def run_config():
|
| 105 |
+
|
| 106 |
+
cfg_dict = ConfigProcess().read_config(CONFIG_PATH)
|
| 107 |
+
ask_new_config = cfg_dict['ASK_NEW_CONFIG']
|
| 108 |
+
if ask_new_config:
|
| 109 |
+
asking = True
|
| 110 |
+
|
| 111 |
+
while asking:
|
| 112 |
+
use_past_config_s = input("Do you want to use past configuration? (y/n): ")
|
| 113 |
+
if use_past_config_s in ["Y", "y"]:
|
| 114 |
+
use_past_config = True
|
| 115 |
+
asking = False
|
| 116 |
+
elif use_past_config_s in ["N", "n"]:
|
| 117 |
+
use_past_config = False
|
| 118 |
+
asking = False
|
| 119 |
+
else:
|
| 120 |
+
print('You only need type "y" or "n"')
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
if not use_past_config:
|
| 124 |
+
in_device, out_device, use_recording, save_robot_to_file = re_config()
|
| 125 |
+
|
| 126 |
+
else:
|
| 127 |
+
in_device = cfg_dict['Input_device']
|
| 128 |
+
out_device = cfg_dict['Output_device']
|
| 129 |
+
use_recording = cfg_dict['USE_RECORDING']
|
| 130 |
+
save_robot_to_file = cfg_dict['SAVE_ROBOT_TO_FILE']
|
| 131 |
+
|
| 132 |
+
else:
|
| 133 |
+
in_device = cfg_dict['Input_device']
|
| 134 |
+
out_device = cfg_dict['Output_device']
|
| 135 |
+
use_recording = cfg_dict['USE_RECORDING']
|
| 136 |
+
save_robot_to_file = cfg_dict['SAVE_ROBOT_TO_FILE']
|
| 137 |
+
|
| 138 |
+
STREAM_TO_VIRTUAL_MIC = cfg_dict['STREAM_TO_VIRTUAL_MIC']
|
| 139 |
+
PARALLEL = cfg_dict['PARALLEL']
|
| 140 |
+
|
| 141 |
+
VB = cfg_dict['Robot_Voice_config']['vb']
|
| 142 |
+
VL = cfg_dict['Robot_Voice_config']['vl']
|
| 143 |
+
H = cfg_dict['Robot_Voice_config']['h']
|
| 144 |
+
LOOKUP_SAMPLES = cfg_dict['Robot_Voice_config']['lookup_samples']
|
| 145 |
+
MOD_F = cfg_dict['Robot_Voice_config']['mod_f']
|
| 146 |
+
|
| 147 |
+
return in_device, out_device, use_recording, save_robot_to_file,\
|
| 148 |
+
VB, VL, H, LOOKUP_SAMPLES, MOD_F, STREAM_TO_VIRTUAL_MIC, PARALLEL
|
| 149 |
+
|
| 150 |
+
if __name__ == "__main__":
|
| 151 |
+
|
| 152 |
+
in_device, out_device, use_recording, save_robot_to_file,\
|
| 153 |
+
VB, VL, H, LOOKUP_SAMPLES, MOD_F, STREAM_TO_VIRTUAL_MIC, PARALLEL = run_config()
|
| 154 |
+
|
| 155 |
+
print(in_device, out_device, use_recording, save_robot_to_file,\
|
| 156 |
+
VB, VL, H, LOOKUP_SAMPLES, MOD_F, STREAM_TO_VIRTUAL_MIC, PARALLEL)
|
config_folder/config.yaml
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ASK_NEW_CONFIG: false
|
| 2 |
+
|
| 3 |
+
Input_device:
|
| 4 |
+
index: 11
|
| 5 |
+
name: CABLE Input (VB-Audio Virtual Cable), Windows DirectSound
|
| 6 |
+
|
| 7 |
+
Output_device:
|
| 8 |
+
index: 2
|
| 9 |
+
name: CABLE Output (VB-Audio Virtual , MME
|
| 10 |
+
|
| 11 |
+
Robot_Voice_config:
|
| 12 |
+
h: 4
|
| 13 |
+
lookup_samples: 512
|
| 14 |
+
mod_f: 50
|
| 15 |
+
vb: 0.005
|
| 16 |
+
vl: 0.006
|
| 17 |
+
|
| 18 |
+
SAVE_ROBOT_TO_FILE: false
|
| 19 |
+
|
| 20 |
+
USE_RECORDING: true
|
| 21 |
+
|
| 22 |
+
STREAM_TO_VIRTUAL_MIC: true
|
| 23 |
+
|
| 24 |
+
PARALLEL: false
|
in_out_micro/__pycache__/record_and_convert.cpython-311.pyc
ADDED
|
Binary file (2.05 kB). View file
|
|
|
in_out_micro/__pycache__/stream_to_virtual_mic.cpython-311.pyc
ADDED
|
Binary file (1.05 kB). View file
|
|
|
in_out_micro/__pycache__/to_virtual_micro.cpython-311.pyc
ADDED
|
Binary file (1.91 kB). View file
|
|
|
in_out_micro/__pycache__/voice_record.cpython-311.pyc
ADDED
|
Binary file (3.92 kB). View file
|
|
|
in_out_micro/record_and_convert.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import librosa
|
| 3 |
+
# from robot_voice.change_to_robot_voice import RobotVoice
|
| 4 |
+
from in_out_micro.voice_record import Recorder
|
| 5 |
+
|
| 6 |
+
def record(out_device, use_recording):
|
| 7 |
+
# Record and convert to Robot voice
|
| 8 |
+
if use_recording:
|
| 9 |
+
base_name_file = "mic.mp3"
|
| 10 |
+
recorder = Recorder(os.path.join("audios","records","input_records",base_name_file),
|
| 11 |
+
input_device_index=out_device['index'])
|
| 12 |
+
audo_arr, sr = recorder.record(write_to_file=True)
|
| 13 |
+
# print(audo_arr.shape)
|
| 14 |
+
|
| 15 |
+
else:
|
| 16 |
+
input_file = input("Paste your file path: ")
|
| 17 |
+
base_name_file = os.path.basename(input_file)
|
| 18 |
+
audo_arr, sr = librosa.load(input_file)
|
| 19 |
+
return audo_arr, sr, base_name_file
|
| 20 |
+
|
| 21 |
+
def convert(rv_obj, out_device, use_recording, save_robot_to_file):
|
| 22 |
+
|
| 23 |
+
# Record and convert to Robot voice
|
| 24 |
+
audo_arr, sr, base_name_file = record(out_device, use_recording)
|
| 25 |
+
|
| 26 |
+
# Create Object of RobotVoice
|
| 27 |
+
|
| 28 |
+
if use_recording:
|
| 29 |
+
robot_voice_arr, sr = rv_obj.run(input_func=(audo_arr, sr),
|
| 30 |
+
output_func=os.path.join('audios','records','output_records',base_name_file),
|
| 31 |
+
use_record=use_recording, save_to_file=save_robot_to_file)
|
| 32 |
+
|
| 33 |
+
else:
|
| 34 |
+
robot_voice_arr, sr = rv_obj.run(os.path.join('audios','audios','input_audios',base_name_file),
|
| 35 |
+
os.path.join('audios','audios','output_audios',base_name_file),
|
| 36 |
+
use_record=use_recording, save_to_file=save_robot_to_file)
|
| 37 |
+
|
| 38 |
+
return robot_voice_arr, sr
|
in_out_micro/stream_to_virtual_mic.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import keyboard
|
| 2 |
+
from in_out_micro.to_virtual_micro import push_to_mic
|
| 3 |
+
|
| 4 |
+
def stream(data, sr, device_name, start_key='e'):
|
| 5 |
+
start_streaming = False
|
| 6 |
+
if start_key is not None:
|
| 7 |
+
print(f"Press '{start_key}' to start stream to {device_name}!")
|
| 8 |
+
|
| 9 |
+
while not start_streaming:
|
| 10 |
+
if keyboard.is_pressed(start_key) or keyboard.is_pressed(start_key.upper()):
|
| 11 |
+
start_streaming = True
|
| 12 |
+
push_to_mic(data, sr, device_name)
|
| 13 |
+
|
| 14 |
+
else:
|
| 15 |
+
while not start_streaming:
|
| 16 |
+
start_streaming = True
|
| 17 |
+
push_to_mic(data, sr, device_name)
|
| 18 |
+
|
| 19 |
+
|
in_out_micro/to_virtual_micro.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import librosa
|
| 3 |
+
import numpy as np
|
| 4 |
+
import sounddevice as sd
|
| 5 |
+
|
| 6 |
+
def select_device():
|
| 7 |
+
devices = sd.query_devices()
|
| 8 |
+
input_devices = [device for device in devices if device['max_input_channels'] > 0]
|
| 9 |
+
device_names = [device['name'] for device in input_devices]
|
| 10 |
+
|
| 11 |
+
# Check for "Sound Mixer"
|
| 12 |
+
for name in input_devices:
|
| 13 |
+
if "Sound Mixer" in name:
|
| 14 |
+
return name
|
| 15 |
+
|
| 16 |
+
# Check for "CABLE Output"
|
| 17 |
+
for name in input_devices:
|
| 18 |
+
if "CABLE Input" in name:
|
| 19 |
+
return name
|
| 20 |
+
|
| 21 |
+
# 'CABLE Input (VB-Audio Virtual Cable)'
|
| 22 |
+
def push_to_mic(data, sr, device_name):
|
| 23 |
+
# path = "audios/audios/input_audios/5g_Viet.mp3"
|
| 24 |
+
# data, sr = librosa.load(path)
|
| 25 |
+
|
| 26 |
+
# devices[11]
|
| 27 |
+
# {'name': 'CABLE Input (VB-Audio Virtual Cable)',
|
| 28 |
+
# 'index': 11,
|
| 29 |
+
# 'hostapi': 1,
|
| 30 |
+
# 'max_input_channels': 0,
|
| 31 |
+
# 'max_output_channels': 8,
|
| 32 |
+
# 'default_low_input_latency': 0.0,
|
| 33 |
+
# 'default_low_output_latency': 0.12,
|
| 34 |
+
# 'default_high_input_latency': 0.0,
|
| 35 |
+
# 'default_high_output_latency': 0.24,
|
| 36 |
+
# 'default_samplerate': 44100.0}
|
| 37 |
+
|
| 38 |
+
# devicename = select_device()
|
| 39 |
+
#'CABLE Input (VB-Audio Virtual Cable), Windows DirectSound'#'Speakers (Realtek(R) Audio), MME'
|
| 40 |
+
|
| 41 |
+
print(f"Pushing data to {device_name}")
|
| 42 |
+
# Play the audio through 'headphone' output device
|
| 43 |
+
sd.play(data, sr, device=device_name)
|
| 44 |
+
sd.wait() # Wait until file is done playing
|
| 45 |
+
sd.stop()
|
| 46 |
+
|
| 47 |
+
if __name__ == "__main__":
|
| 48 |
+
data, sr = librosa.load('audios/audios/output_audios/mic.mp3')
|
| 49 |
+
device_name = 'CABLE Input (VB-Audio Virtual Cable), Windows DirectSound'
|
| 50 |
+
push_to_mic(data, sr, device_name)
|
in_out_micro/voice_record.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import keyboard
|
| 2 |
+
import pyaudio
|
| 3 |
+
import sounddevice as sd
|
| 4 |
+
import numpy as np
|
| 5 |
+
import soundfile as sf
|
| 6 |
+
import time
|
| 7 |
+
|
| 8 |
+
class Recorder():
|
| 9 |
+
def __init__(self, filename, sample_rate=22050, channels=1, input_device_index=2):
|
| 10 |
+
self.audio_format = pyaudio.paFloat32 # pyaudio.paInt16
|
| 11 |
+
self.channels = channels
|
| 12 |
+
self.sample_rate = sample_rate
|
| 13 |
+
self.chunk = int(0.03*self.sample_rate)
|
| 14 |
+
self.filename = filename
|
| 15 |
+
self.START_KEY = 's'
|
| 16 |
+
self.STOP_KEY = 'q'
|
| 17 |
+
self.input_device_index = input_device_index
|
| 18 |
+
|
| 19 |
+
def record_procssing(self, write_to_file):
|
| 20 |
+
recorded_data = []
|
| 21 |
+
p = pyaudio.PyAudio()
|
| 22 |
+
|
| 23 |
+
stream = p.open(format=self.audio_format, channels=self.channels,
|
| 24 |
+
rate=self.sample_rate, input=True,
|
| 25 |
+
frames_per_buffer=self.chunk, input_device_index=self.input_device_index)
|
| 26 |
+
|
| 27 |
+
while True:
|
| 28 |
+
data = stream.read(self.chunk)
|
| 29 |
+
recorded_data.append(data)
|
| 30 |
+
if keyboard.is_pressed(self.STOP_KEY) or keyboard.is_pressed(self.STOP_KEY.upper()):
|
| 31 |
+
|
| 32 |
+
print("Stop recording")
|
| 33 |
+
# stop and close the stream
|
| 34 |
+
stream.stop_stream()
|
| 35 |
+
stream.close()
|
| 36 |
+
p.terminate()
|
| 37 |
+
#convert recorded data to numpy array
|
| 38 |
+
recorded_data = [np.frombuffer(frame, dtype=np.float32) for frame in recorded_data]
|
| 39 |
+
audio_arr = np.concatenate(recorded_data, axis=0)
|
| 40 |
+
if write_to_file:
|
| 41 |
+
sf.write(self.filename, audio_arr, self.sample_rate)
|
| 42 |
+
print("Saved to Record directory")
|
| 43 |
+
return audio_arr
|
| 44 |
+
break
|
| 45 |
+
|
| 46 |
+
def record(self, write_to_file=True):
|
| 47 |
+
print(f"Press '{self.START_KEY}' to Start and '{self.STOP_KEY}' to Stop!")
|
| 48 |
+
while True:
|
| 49 |
+
if keyboard.is_pressed(self.START_KEY) or keyboard.is_pressed(self.START_KEY.upper()):
|
| 50 |
+
print("Recodrding...")
|
| 51 |
+
audio_arr = self.record_procssing(write_to_file)
|
| 52 |
+
return audio_arr, self.sample_rate
|
| 53 |
+
break
|
| 54 |
+
|
| 55 |
+
if __name__ == "__main__":
|
| 56 |
+
recorder = Recorder("audios/records/input_records/mic.mp3") #name of output file
|
| 57 |
+
audo_arr, sr = recorder.record()
|
| 58 |
+
print(audo_arr.shape)
|
main.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import keyboard
|
| 3 |
+
from robot_voice.change_to_robot_voice import RobotVoice
|
| 4 |
+
from in_out_micro.record_and_convert import convert
|
| 5 |
+
from config_folder.config import run_config
|
| 6 |
+
from in_out_micro.stream_to_virtual_mic import stream
|
| 7 |
+
|
| 8 |
+
# Run Configuration
|
| 9 |
+
IN_DEVICE, OUT_DEVICE, USE_RECORDING, SAVE_ROBOT_TO_FILE,\
|
| 10 |
+
VB, VL, H, LOOKUP_SAMPLES, MOD_F, STREAM_TO_VIRTUAL_MIC, PARALLEL = run_config()
|
| 11 |
+
|
| 12 |
+
def voice_human2robot():
|
| 13 |
+
|
| 14 |
+
rv_obj = RobotVoice(vb=VB, vl=VL, h=H, lookup_samples=LOOKUP_SAMPLES, mod_f=MOD_F)
|
| 15 |
+
|
| 16 |
+
# Line 1
|
| 17 |
+
robot_voice_arr, sr = convert(rv_obj, OUT_DEVICE, USE_RECORDING, SAVE_ROBOT_TO_FILE)
|
| 18 |
+
|
| 19 |
+
# Line 2
|
| 20 |
+
# To Virtual Microphone
|
| 21 |
+
if STREAM_TO_VIRTUAL_MIC:
|
| 22 |
+
stream(robot_voice_arr, sr, IN_DEVICE['name'])
|
| 23 |
+
else:
|
| 24 |
+
# return
|
| 25 |
+
return robot_voice_arr, sr
|
| 26 |
+
|
| 27 |
+
if __name__ == "__main__":
|
| 28 |
+
voice_human2robot()
|
main_parallel.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import keyboard
|
| 3 |
+
import
|
| 4 |
+
from robot_voice.change_to_robot_voice import RobotVoice
|
| 5 |
+
from in_out_micro.record_and_convert import convert
|
| 6 |
+
from config_folder.config import run_config
|
| 7 |
+
from in_out_micro.stream_to_virtual_mic import stream
|
| 8 |
+
|
| 9 |
+
def voice_human2robot():
|
| 10 |
+
# Run Configuration
|
| 11 |
+
IN_DEVICE, OUT_DEVICE, USE_RECORDING, SAVE_ROBOT_TO_FILE, VB, VL, H, LOOKUP_SAMPLES, MOD_F, STREAM_TO_VIRTUAL_MIC = run_config()
|
| 12 |
+
|
| 13 |
+
rv_obj = RobotVoice(vb=VB, vl=VL, h=H, lookup_samples=LOOKUP_SAMPLES, mod_f=MOD_F)
|
| 14 |
+
|
| 15 |
+
# Line 1
|
| 16 |
+
robot_voice_arr, sr = convert(rv_obj, OUT_DEVICE, USE_RECORDING, SAVE_ROBOT_TO_FILE)
|
| 17 |
+
|
| 18 |
+
# Line 2
|
| 19 |
+
# To Virtual Microphone
|
| 20 |
+
if STREAM_TO_VIRTUAL_MIC:
|
| 21 |
+
stream(robot_voice_arr, sr, IN_DEVICE['name'])
|
| 22 |
+
else:
|
| 23 |
+
# return
|
| 24 |
+
return robot_voice_arr, sr
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sounddevice
|
| 2 |
+
keyboard
|
| 3 |
+
scipy
|
| 4 |
+
librosa
|
| 5 |
+
soundfile
|
| 6 |
+
numpy
|
| 7 |
+
wavio
|
robot_voice/__pycache__/change_to_robot_voice.cpython-311.pyc
ADDED
|
Binary file (6.76 kB). View file
|
|
|
robot_voice/__pycache__/waveshaper.cpython-311.pyc
ADDED
|
Binary file (1.58 kB). View file
|
|
|
robot_voice/change_to_robot_voice.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import scipy.io.wavfile as wavfile
|
| 3 |
+
from robot_voice.waveshaper import Waveshaper
|
| 4 |
+
import librosa
|
| 5 |
+
import soundfile as sf
|
| 6 |
+
|
| 7 |
+
class RobotVoice:
|
| 8 |
+
"""
|
| 9 |
+
Simulates a ring modulator to create a robotic voice effect.
|
| 10 |
+
|
| 11 |
+
This implementation is based on the research paper:
|
| 12 |
+
http://recherche.ircam.fr/pub/dafx11/Papers/66_e.pdf
|
| 13 |
+
|
| 14 |
+
Parameters:
|
| 15 |
+
|
| 16 |
+
- `vb` (float, optional): Controls the volume of the "bright" part of the speech.
|
| 17 |
+
Must be LESS than 1.0. Higher values result in a shorter, potentially less clear voice.
|
| 18 |
+
Defaults to 0.2. Paramerter `vb` and `vl` must be deferent.
|
| 19 |
+
- `vl` (float, optional): Controls the volume of the "low" part of the speech.
|
| 20 |
+
Must be LESS than 1.0. Higher values result in a shorter, potentially less clear voice.
|
| 21 |
+
Defaults to 0.4. Paramerter `vb` and `vl` must be deferent.
|
| 22 |
+
- `h` (float, optional): Controls the slope of the linear section of the diode model's response,
|
| 23 |
+
which takes effect after the voltage exceeds `vl`. Defaults to 4.
|
| 24 |
+
- `lookup_samples` (int, optional): Determines the size of the lookup table used for sound synthesis.
|
| 25 |
+
Lower values can introduce noise. Defaults to 1024.
|
| 26 |
+
- `mod_f` (float, optional): Controls the modulation frequency, which influences the "robotic" effect.
|
| 27 |
+
Higher values produce a more pronounced robotic sound. Defaults to 50.
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
numpy.ndarray: The processed audio data with the robotic voice effect applied. (This assumes your function returns the processed audio - adjust if needed)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
Example usage (assuming the function is named `robotic_voice`):
|
| 34 |
+
|
| 35 |
+
>>> processed_audio = RobotVoice(vb=0.3, vl=0.4, h=3, lookup_samples=512, mod_f=30)
|
| 36 |
+
|
| 37 |
+
"""
|
| 38 |
+
def __init__(self, vb=0.2, vl=0.4, h=4, lookup_samples=1024, mod_f=50):
|
| 39 |
+
|
| 40 |
+
self.vb = vb
|
| 41 |
+
self.vl = vl
|
| 42 |
+
self.h = h
|
| 43 |
+
self.lookup_samples = lookup_samples
|
| 44 |
+
self.mod_f = mod_f
|
| 45 |
+
|
| 46 |
+
def diode_lookup(self, n_samples):
|
| 47 |
+
result = np.zeros((n_samples,))
|
| 48 |
+
for i in range(0, n_samples):
|
| 49 |
+
v = float(i - float(n_samples)/2)/(n_samples/2)
|
| 50 |
+
v = abs(v)
|
| 51 |
+
if v < self.vb:
|
| 52 |
+
result[i] = 0
|
| 53 |
+
elif self.vb < v <= self.vl:
|
| 54 |
+
result[i] = self.h * ((v - self.vb)**2)/(2*self.vl - 2*self.vb)
|
| 55 |
+
else:
|
| 56 |
+
result[i] = self.h * v - self.h * self.vl + (self.h * (self.vl - self.vb) ** 2) / (2 * self.vl - 2 * self.vb)
|
| 57 |
+
|
| 58 |
+
return result
|
| 59 |
+
|
| 60 |
+
def raw_diode(self, signal):
|
| 61 |
+
result = np.zeros(signal.shape)
|
| 62 |
+
for i in range(0, signal.shape[0]):
|
| 63 |
+
v = signal[i]
|
| 64 |
+
if v < self.vb:
|
| 65 |
+
result[i] = 0
|
| 66 |
+
elif self.vb < v <= self.vl:
|
| 67 |
+
result[i] = self.h * ((v - self.vb)**2)/(2*self.vl - 2*self.vb)
|
| 68 |
+
else:
|
| 69 |
+
result[i] = self.h*v - self.h*self.vl + (self.h*(self.vl-self.vb)**2)/(2*self.vl-2*self.vb)
|
| 70 |
+
return result
|
| 71 |
+
|
| 72 |
+
def run(self, input_func, output_func, use_record=False, save_to_file=True):
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
if use_record:
|
| 76 |
+
data, rate = input_func
|
| 77 |
+
else:
|
| 78 |
+
data, rate = librosa.load(input_func)
|
| 79 |
+
print("data.shape, rate",data.shape, rate)
|
| 80 |
+
|
| 81 |
+
# Get max value to scale to original volume at the end
|
| 82 |
+
scaler = np.max(np.abs(data))
|
| 83 |
+
|
| 84 |
+
# Normalize to floats in range -1.0 < data < 1.0
|
| 85 |
+
data = data.astype('float')/scaler
|
| 86 |
+
|
| 87 |
+
# Length of array (number of samples)
|
| 88 |
+
n_samples = data.shape[0]
|
| 89 |
+
|
| 90 |
+
# Create the lookup table for simulating the diode.
|
| 91 |
+
d_lookup = self.diode_lookup(self.lookup_samples)
|
| 92 |
+
diode = Waveshaper(d_lookup)
|
| 93 |
+
|
| 94 |
+
# Simulate sine wave of frequency self.mod_f (in Hz)
|
| 95 |
+
tone = np.arange(n_samples)
|
| 96 |
+
tone = np.sin(2*np.pi*tone*self.mod_f/rate)
|
| 97 |
+
|
| 98 |
+
# Gain tone by 1/2
|
| 99 |
+
tone = tone * 0.5
|
| 100 |
+
|
| 101 |
+
# Junctions here
|
| 102 |
+
tone2 = tone.copy() # to top path
|
| 103 |
+
data2 = data.copy() # to bottom path
|
| 104 |
+
|
| 105 |
+
# Invert tone, sum paths
|
| 106 |
+
tone = -tone + data2 # bottom path
|
| 107 |
+
data = data + tone2 #top path
|
| 108 |
+
|
| 109 |
+
# top
|
| 110 |
+
data = diode.transform(data) + diode.transform(-data)
|
| 111 |
+
|
| 112 |
+
# bottom
|
| 113 |
+
tone = diode.transform(tone) + diode.transform(-tone)
|
| 114 |
+
|
| 115 |
+
result = data - tone
|
| 116 |
+
|
| 117 |
+
#scale to +-1.0
|
| 118 |
+
result /= np.max(np.abs(result))
|
| 119 |
+
#now scale to max value of input file.
|
| 120 |
+
result *= scaler
|
| 121 |
+
# wavfile.write wants ints between +-5000; hence the cast
|
| 122 |
+
# wavfile.write('robot.wav', rate, result.astype(np.int16))
|
| 123 |
+
|
| 124 |
+
if save_to_file:
|
| 125 |
+
sf.write(output_func, result, rate)
|
| 126 |
+
return result, rate
|
| 127 |
+
|
| 128 |
+
if __name__ == "__main__":
|
| 129 |
+
obj = RobotVoice(vb=0.1, vl=0.15, h=4, lookup_samples=1024, mod_f=20)
|
| 130 |
+
robot_voice_arr, sr = obj.run('audios/audios/input_audios/linus.mp3',
|
| 131 |
+
'audios/audios/output_audios/linus.mp3')
|
robot_voice/waveshaper.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
class Waveshaper():
|
| 4 |
+
"""
|
| 5 |
+
Apply a transform to an audio signal; store transform as curve,
|
| 6 |
+
use curve as lookup table. Implementation of jQuery's WaveShaperNode
|
| 7 |
+
API:
|
| 8 |
+
http://webaudio.github.io/web-audio-api/#the-waveshapernode-interface
|
| 9 |
+
"""
|
| 10 |
+
def __init__(self, curve):
|
| 11 |
+
self.curve = curve
|
| 12 |
+
self.n_bins = self.curve.shape[0]
|
| 13 |
+
|
| 14 |
+
def transform(self, samples):
|
| 15 |
+
# normalize to 0 < samples < 2
|
| 16 |
+
max_val = np.max(np.abs(samples))
|
| 17 |
+
if max_val >= 1.0:
|
| 18 |
+
result = samples/np.max(np.abs(samples)) + 1.0
|
| 19 |
+
else:
|
| 20 |
+
result = samples + 1.0
|
| 21 |
+
result = result * (self.n_bins-1)/2
|
| 22 |
+
return self.curve[result.astype('int')]
|
test.ipynb
ADDED
|
@@ -0,0 +1,607 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": null,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"from change_to_robot_voice import RobotVoice\n",
|
| 10 |
+
"import IPython.display as ipd"
|
| 11 |
+
]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"execution_count": null,
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"outputs": [],
|
| 18 |
+
"source": [
|
| 19 |
+
"obj = RobotVoice(vb=-5, vl=6, h=4, lookup_samples=1024, mod_f=20)\n",
|
| 20 |
+
"robot_voice_arr, sr = obj.run('audio-Wizard-of-Oz-Dorthy.wav', 'audio-Wizard-of-Oz-Dorthy_robot.mp3')"
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"cell_type": "code",
|
| 25 |
+
"execution_count": null,
|
| 26 |
+
"metadata": {},
|
| 27 |
+
"outputs": [],
|
| 28 |
+
"source": [
|
| 29 |
+
"from IPython.display import Audio\n",
|
| 30 |
+
"\n",
|
| 31 |
+
"Audio(filename='audio-Wizard-of-Oz-Dorthy_robot.mp3', autoplay=True)"
|
| 32 |
+
]
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"cell_type": "code",
|
| 36 |
+
"execution_count": null,
|
| 37 |
+
"metadata": {},
|
| 38 |
+
"outputs": [],
|
| 39 |
+
"source": [
|
| 40 |
+
"from voice_record import Recorder"
|
| 41 |
+
]
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"cell_type": "code",
|
| 45 |
+
"execution_count": null,
|
| 46 |
+
"metadata": {},
|
| 47 |
+
"outputs": [],
|
| 48 |
+
"source": [
|
| 49 |
+
"recorder = Recorder(\"audios/recordsinput_records/mic.wav\") #name of output file\n",
|
| 50 |
+
"audo_arr = recorder.record()"
|
| 51 |
+
]
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"cell_type": "code",
|
| 55 |
+
"execution_count": 37,
|
| 56 |
+
"metadata": {},
|
| 57 |
+
"outputs": [
|
| 58 |
+
{
|
| 59 |
+
"name": "stdout",
|
| 60 |
+
"output_type": "stream",
|
| 61 |
+
"text": [
|
| 62 |
+
"(132861,) 22050\n",
|
| 63 |
+
"data.shape, rate (69405,) 22050\n",
|
| 64 |
+
"here data.shape, rate (69405,) 22050\n"
|
| 65 |
+
]
|
| 66 |
+
}
|
| 67 |
+
],
|
| 68 |
+
"source": [
|
| 69 |
+
"import os\n",
|
| 70 |
+
"import librosa\n",
|
| 71 |
+
"import numpy as np\n",
|
| 72 |
+
"from change_to_robot_voice import RobotVoice\n",
|
| 73 |
+
"from voice_record import Recorder\n",
|
| 74 |
+
"\n",
|
| 75 |
+
"USE_RECORD = False\n",
|
| 76 |
+
"SAVE_TO_FILE = True\n",
|
| 77 |
+
"\n",
|
| 78 |
+
"if USE_RECORD:\n",
|
| 79 |
+
" base_name_file = \"mic.mp3\"\n",
|
| 80 |
+
" recorder = Recorder(os.path.join(\"audios\", \"records\", \"input_records\", base_name_file)) #name of output file\n",
|
| 81 |
+
" audio_arr, sr = recorder.record(write_to_file=True)\n",
|
| 82 |
+
" print(audio_arr.shape)\n",
|
| 83 |
+
"\n",
|
| 84 |
+
"else:\n",
|
| 85 |
+
" input_file = r'audios\\records\\input_records\\mic.mp3'#input(\"Paste your file path: \")\n",
|
| 86 |
+
" base_name_file = os.path.basename(input_file)\n",
|
| 87 |
+
" audio_arr, sr = librosa.load(input_file)\n",
|
| 88 |
+
"\n",
|
| 89 |
+
"print(audio_arr.shape, sr)\n",
|
| 90 |
+
"\n",
|
| 91 |
+
"obj = RobotVoice(vb=0.1, vl=0.001, h=1, lookup_samples=1024, mod_f=10)\n",
|
| 92 |
+
"# audio_arr = librosa.util.normalize(audio_arr)\n",
|
| 93 |
+
"if USE_RECORD: \n",
|
| 94 |
+
" robot_voice_arr, sr = obj.run(input_func=(audio_arr, sr),\n",
|
| 95 |
+
" output_func=os.path.join('audios','records','output_records',base_name_file),\n",
|
| 96 |
+
" use_record=USE_RECORD, save_to_file=SAVE_TO_FILE)\n",
|
| 97 |
+
"\n",
|
| 98 |
+
"else:\n",
|
| 99 |
+
" robot_voice_arr, sr = obj.run(os.path.join('audios','audios','input_audios',base_name_file),\n",
|
| 100 |
+
" os.path.join('audios','audios','output_audios',base_name_file),\n",
|
| 101 |
+
" USE_RECORD, SAVE_TO_FILE)\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"\n"
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"cell_type": "code",
|
| 108 |
+
"execution_count": 38,
|
| 109 |
+
"metadata": {},
|
| 110 |
+
"outputs": [],
|
| 111 |
+
"source": [
|
| 112 |
+
"import sounddevice as sd\n",
|
| 113 |
+
"\n",
|
| 114 |
+
"sd.play(robot_voice_arr, sr)"
|
| 115 |
+
]
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"cell_type": "code",
|
| 119 |
+
"execution_count": 36,
|
| 120 |
+
"metadata": {},
|
| 121 |
+
"outputs": [],
|
| 122 |
+
"source": [
|
| 123 |
+
"vl_10 = robot_voice_arr.copy()"
|
| 124 |
+
]
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"cell_type": "code",
|
| 128 |
+
"execution_count": 39,
|
| 129 |
+
"metadata": {},
|
| 130 |
+
"outputs": [],
|
| 131 |
+
"source": [
|
| 132 |
+
"sd.play(vl_10, sr)"
|
| 133 |
+
]
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"cell_type": "code",
|
| 137 |
+
"execution_count": 13,
|
| 138 |
+
"metadata": {},
|
| 139 |
+
"outputs": [],
|
| 140 |
+
"source": [
|
| 141 |
+
"sd.play(audio_arr, sr)"
|
| 142 |
+
]
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"cell_type": "code",
|
| 146 |
+
"execution_count": 5,
|
| 147 |
+
"metadata": {},
|
| 148 |
+
"outputs": [],
|
| 149 |
+
"source": [
|
| 150 |
+
"sd.stop()"
|
| 151 |
+
]
|
| 152 |
+
},
|
| 153 |
+
{
|
| 154 |
+
"cell_type": "code",
|
| 155 |
+
"execution_count": 8,
|
| 156 |
+
"metadata": {},
|
| 157 |
+
"outputs": [],
|
| 158 |
+
"source": [
|
| 159 |
+
"audio_arr_2, sr = librosa.load(r'audios\\records\\input_records\\mic.mp3')\n"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"cell_type": "code",
|
| 164 |
+
"execution_count": 10,
|
| 165 |
+
"metadata": {},
|
| 166 |
+
"outputs": [
|
| 167 |
+
{
|
| 168 |
+
"data": {
|
| 169 |
+
"text/plain": [
|
| 170 |
+
"array([ 0.00872803, 0.00704956, -0.00872803, ..., -0.08926392,\n",
|
| 171 |
+
" -0.11709595, -0.0836792 ], dtype=float32)"
|
| 172 |
+
]
|
| 173 |
+
},
|
| 174 |
+
"execution_count": 10,
|
| 175 |
+
"metadata": {},
|
| 176 |
+
"output_type": "execute_result"
|
| 177 |
+
}
|
| 178 |
+
],
|
| 179 |
+
"source": [
|
| 180 |
+
"audio_arr"
|
| 181 |
+
]
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"cell_type": "code",
|
| 185 |
+
"execution_count": 9,
|
| 186 |
+
"metadata": {},
|
| 187 |
+
"outputs": [
|
| 188 |
+
{
|
| 189 |
+
"data": {
|
| 190 |
+
"text/plain": [
|
| 191 |
+
"array([ 0.01161877, 0.00981571, -0.00999575, ..., -0.08572178,\n",
|
| 192 |
+
" -0.1190884 , -0.08362549], dtype=float32)"
|
| 193 |
+
]
|
| 194 |
+
},
|
| 195 |
+
"execution_count": 9,
|
| 196 |
+
"metadata": {},
|
| 197 |
+
"output_type": "execute_result"
|
| 198 |
+
}
|
| 199 |
+
],
|
| 200 |
+
"source": [
|
| 201 |
+
"audio_arr_2"
|
| 202 |
+
]
|
| 203 |
+
},
|
| 204 |
+
{
|
| 205 |
+
"cell_type": "code",
|
| 206 |
+
"execution_count": 32,
|
| 207 |
+
"metadata": {},
|
| 208 |
+
"outputs": [
|
| 209 |
+
{
|
| 210 |
+
"data": {
|
| 211 |
+
"text/plain": [
|
| 212 |
+
"17251"
|
| 213 |
+
]
|
| 214 |
+
},
|
| 215 |
+
"execution_count": 32,
|
| 216 |
+
"metadata": {},
|
| 217 |
+
"output_type": "execute_result"
|
| 218 |
+
}
|
| 219 |
+
],
|
| 220 |
+
"source": [
|
| 221 |
+
"np.max(audio_arr)"
|
| 222 |
+
]
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"cell_type": "code",
|
| 226 |
+
"execution_count": 33,
|
| 227 |
+
"metadata": {},
|
| 228 |
+
"outputs": [
|
| 229 |
+
{
|
| 230 |
+
"name": "stderr",
|
| 231 |
+
"output_type": "stream",
|
| 232 |
+
"text": [
|
| 233 |
+
"C:\\Users\\MyLaptop\\AppData\\Local\\Temp\\ipykernel_13176\\3951130461.py:4: RuntimeWarning: overflow encountered in scalar subtract\n",
|
| 234 |
+
" normalized_data = 2 * (data - np.min(data)) / (np.max(data) - np.min(data)) - 1\n"
|
| 235 |
+
]
|
| 236 |
+
},
|
| 237 |
+
{
|
| 238 |
+
"data": {
|
| 239 |
+
"text/plain": [
|
| 240 |
+
"array([-0.1477003 , -0.14747774, -0.15341246, ..., 0.16268546,\n",
|
| 241 |
+
" 0.11876855, 0.01973294])"
|
| 242 |
+
]
|
| 243 |
+
},
|
| 244 |
+
"execution_count": 33,
|
| 245 |
+
"metadata": {},
|
| 246 |
+
"output_type": "execute_result"
|
| 247 |
+
}
|
| 248 |
+
],
|
| 249 |
+
"source": [
|
| 250 |
+
"def normalize_data(data):\n",
|
| 251 |
+
" \"\"\"Normalizes data to the range of -1 to 1 using numpy.\"\"\"\n",
|
| 252 |
+
" data = np.array(data)\n",
|
| 253 |
+
" normalized_data = 2 * (data - np.min(data)) / (np.max(data) - np.min(data)) - 1\n",
|
| 254 |
+
" return normalized_data\n",
|
| 255 |
+
"\n",
|
| 256 |
+
"e = normalize_data(audio_arr)\n",
|
| 257 |
+
"e"
|
| 258 |
+
]
|
| 259 |
+
},
|
| 260 |
+
{
|
| 261 |
+
"cell_type": "code",
|
| 262 |
+
"execution_count": 17,
|
| 263 |
+
"metadata": {},
|
| 264 |
+
"outputs": [
|
| 265 |
+
{
|
| 266 |
+
"data": {
|
| 267 |
+
"text/plain": [
|
| 268 |
+
"array([0., 0., 0., ..., 0., 0., 0.])"
|
| 269 |
+
]
|
| 270 |
+
},
|
| 271 |
+
"execution_count": 17,
|
| 272 |
+
"metadata": {},
|
| 273 |
+
"output_type": "execute_result"
|
| 274 |
+
}
|
| 275 |
+
],
|
| 276 |
+
"source": [
|
| 277 |
+
"robot_voice_arr"
|
| 278 |
+
]
|
| 279 |
+
},
|
| 280 |
+
{
|
| 281 |
+
"cell_type": "code",
|
| 282 |
+
"execution_count": 34,
|
| 283 |
+
"metadata": {},
|
| 284 |
+
"outputs": [
|
| 285 |
+
{
|
| 286 |
+
"data": {
|
| 287 |
+
"text/plain": [
|
| 288 |
+
"11"
|
| 289 |
+
]
|
| 290 |
+
},
|
| 291 |
+
"execution_count": 34,
|
| 292 |
+
"metadata": {},
|
| 293 |
+
"output_type": "execute_result"
|
| 294 |
+
}
|
| 295 |
+
],
|
| 296 |
+
"source": [
|
| 297 |
+
"import os\n",
|
| 298 |
+
"import yaml\n",
|
| 299 |
+
"\n",
|
| 300 |
+
"def read_config(path):\n",
|
| 301 |
+
" with open(path, \"r\") as yaml_file: \n",
|
| 302 |
+
" return yaml.load(yaml_file, Loader=yaml.FullLoader)\n",
|
| 303 |
+
"\n",
|
| 304 |
+
"def write_config(yaml_file, data):\n",
|
| 305 |
+
" yaml.dump(data, yaml_file)\n",
|
| 306 |
+
"\n",
|
| 307 |
+
"FOLDER_CONFIG = \"configuration\"\n",
|
| 308 |
+
"CONFIG_FILE = \"config.yaml\"\n",
|
| 309 |
+
"yaml_file_path = os.path.join(FOLDER_CONFIG, CONFIG_FILE)\n",
|
| 310 |
+
"\n",
|
| 311 |
+
"cfg = read_config(yaml_file_path)\n",
|
| 312 |
+
"url = cfg['Input']['id']\n",
|
| 313 |
+
"url"
|
| 314 |
+
]
|
| 315 |
+
},
|
| 316 |
+
{
|
| 317 |
+
"cell_type": "code",
|
| 318 |
+
"execution_count": 42,
|
| 319 |
+
"metadata": {},
|
| 320 |
+
"outputs": [
|
| 321 |
+
{
|
| 322 |
+
"name": "stdout",
|
| 323 |
+
"output_type": "stream",
|
| 324 |
+
"text": [
|
| 325 |
+
"Write successful\n"
|
| 326 |
+
]
|
| 327 |
+
}
|
| 328 |
+
],
|
| 329 |
+
"source": [
|
| 330 |
+
"yaml_file_path = os.path.join(FOLDER_CONFIG, \"abc.yaml\")\n",
|
| 331 |
+
"\n",
|
| 332 |
+
"\n",
|
| 333 |
+
"\n",
|
| 334 |
+
"article_info = [\n",
|
| 335 |
+
" {\n",
|
| 336 |
+
" 'Details': {\n",
|
| 337 |
+
" 'domain' : 'www.abc.com',\n",
|
| 338 |
+
" 'language': 'jav',\n",
|
| 339 |
+
" 'date': '11/09/2021990'\n",
|
| 340 |
+
" }\n",
|
| 341 |
+
" }\n",
|
| 342 |
+
"]\n",
|
| 343 |
+
"\n",
|
| 344 |
+
"with open(yaml_file_path, 'a') as yamlfile:\n",
|
| 345 |
+
" data = yaml.dump(article_info, yamlfile)\n",
|
| 346 |
+
" print(\"Write successful\")\n"
|
| 347 |
+
]
|
| 348 |
+
},
|
| 349 |
+
{
|
| 350 |
+
"cell_type": "code",
|
| 351 |
+
"execution_count": null,
|
| 352 |
+
"metadata": {},
|
| 353 |
+
"outputs": [],
|
| 354 |
+
"source": [
|
| 355 |
+
"with open(\"config.yaml\", \"w\") as f:\n",
|
| 356 |
+
" cfg = yaml.dump(\n",
|
| 357 |
+
" cfg, stream=f, default_flow_style=False, sort_keys=False\n",
|
| 358 |
+
" )"
|
| 359 |
+
]
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"cell_type": "code",
|
| 363 |
+
"execution_count": null,
|
| 364 |
+
"metadata": {},
|
| 365 |
+
"outputs": [],
|
| 366 |
+
"source": [
|
| 367 |
+
"from change_to_robot_voice import RobotVoice\n",
|
| 368 |
+
"aa = RobotVoice()"
|
| 369 |
+
]
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"cell_type": "code",
|
| 373 |
+
"execution_count": 55,
|
| 374 |
+
"metadata": {},
|
| 375 |
+
"outputs": [
|
| 376 |
+
{
|
| 377 |
+
"data": {
|
| 378 |
+
"text/plain": [
|
| 379 |
+
"'A list with information about all available audio devices.\\n\\n This class is not meant to be instantiated by the user.\\n Instead, it is returned by `query_devices()`.\\n It contains a dictionary for each available device, holding the keys\\n described in `query_devices()`.\\n\\n This class has a special string representation that is shown as\\n return value of `query_devices()` if used in an interactive\\n Python session. It will also be shown when using the :func:`print`\\n function. Furthermore, it can be obtained with :func:`repr` and\\n :class:`str() <str>`.\\n\\n '"
|
| 380 |
+
]
|
| 381 |
+
},
|
| 382 |
+
"execution_count": 55,
|
| 383 |
+
"metadata": {},
|
| 384 |
+
"output_type": "execute_result"
|
| 385 |
+
}
|
| 386 |
+
],
|
| 387 |
+
"source": [
|
| 388 |
+
"sd.query_devices().__doc__"
|
| 389 |
+
]
|
| 390 |
+
},
|
| 391 |
+
{
|
| 392 |
+
"cell_type": "code",
|
| 393 |
+
"execution_count": 64,
|
| 394 |
+
"metadata": {},
|
| 395 |
+
"outputs": [
|
| 396 |
+
{
|
| 397 |
+
"data": {
|
| 398 |
+
"text/plain": [
|
| 399 |
+
"({'name': 'MME',\n",
|
| 400 |
+
" 'devices': [0, 1, 2, 3, 4, 5],\n",
|
| 401 |
+
" 'default_input_device': 1,\n",
|
| 402 |
+
" 'default_output_device': 4},\n",
|
| 403 |
+
" {'name': 'Windows DirectSound',\n",
|
| 404 |
+
" 'devices': [6, 7, 8, 9, 10, 11],\n",
|
| 405 |
+
" 'default_input_device': 6,\n",
|
| 406 |
+
" 'default_output_device': 9},\n",
|
| 407 |
+
" {'name': 'Windows WASAPI',\n",
|
| 408 |
+
" 'devices': [12, 13, 14, 15],\n",
|
| 409 |
+
" 'default_input_device': 15,\n",
|
| 410 |
+
" 'default_output_device': 13},\n",
|
| 411 |
+
" {'name': 'Windows WDM-KS',\n",
|
| 412 |
+
" 'devices': [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],\n",
|
| 413 |
+
" 'default_input_device': 16,\n",
|
| 414 |
+
" 'default_output_device': 26})"
|
| 415 |
+
]
|
| 416 |
+
},
|
| 417 |
+
"execution_count": 64,
|
| 418 |
+
"metadata": {},
|
| 419 |
+
"output_type": "execute_result"
|
| 420 |
+
}
|
| 421 |
+
],
|
| 422 |
+
"source": [
|
| 423 |
+
"sd.query_hostapis()"
|
| 424 |
+
]
|
| 425 |
+
},
|
| 426 |
+
{
|
| 427 |
+
"cell_type": "code",
|
| 428 |
+
"execution_count": 65,
|
| 429 |
+
"metadata": {},
|
| 430 |
+
"outputs": [
|
| 431 |
+
{
|
| 432 |
+
"name": "stdout",
|
| 433 |
+
"output_type": "stream",
|
| 434 |
+
"text": [
|
| 435 |
+
" 0 Microsoft Sound Mapper - Input, MME (2 in, 0 out)\n",
|
| 436 |
+
"> 1 Microphone Array (Intel® Smart , MME (4 in, 0 out)\n",
|
| 437 |
+
" 2 CABLE Output (VB-Audio Virtual , MME (8 in, 0 out)\n",
|
| 438 |
+
" 3 Microsoft Sound Mapper - Output, MME (0 in, 2 out)\n",
|
| 439 |
+
"< 4 Speakers (Realtek(R) Audio), MME (0 in, 8 out)\n",
|
| 440 |
+
" 5 CABLE Input (VB-Audio Virtual C, MME (0 in, 8 out)\n",
|
| 441 |
+
" 6 Primary Sound Capture Driver, Windows DirectSound (2 in, 0 out)\n",
|
| 442 |
+
" 7 Microphone Array (Intel® Smart Sound Technology for Digital Microphones), Windows DirectSound (4 in, 0 out)\n",
|
| 443 |
+
" 8 CABLE Output (VB-Audio Virtual Cable), Windows DirectSound (8 in, 0 out)\n",
|
| 444 |
+
" 9 Primary Sound Driver, Windows DirectSound (0 in, 2 out)\n",
|
| 445 |
+
" 10 Speakers (Realtek(R) Audio), Windows DirectSound (0 in, 8 out)\n",
|
| 446 |
+
" 11 CABLE Input (VB-Audio Virtual Cable), Windows DirectSound (0 in, 8 out)\n",
|
| 447 |
+
" 12 CABLE Input (VB-Audio Virtual Cable), Windows WASAPI (0 in, 2 out)\n",
|
| 448 |
+
" 13 Speakers (Realtek(R) Audio), Windows WASAPI (0 in, 2 out)\n",
|
| 449 |
+
" 14 CABLE Output (VB-Audio Virtual Cable), Windows WASAPI (2 in, 0 out)\n",
|
| 450 |
+
" 15 Microphone Array (Intel® Smart Sound Technology for Digital Microphones), Windows WASAPI (2 in, 0 out)\n",
|
| 451 |
+
" 16 Microphone Array 1 (), Windows WDM-KS (2 in, 0 out)\n",
|
| 452 |
+
" 17 Microphone Array 2 (), Windows WDM-KS (2 in, 0 out)\n",
|
| 453 |
+
" 18 Microphone Array 3 (), Windows WDM-KS (4 in, 0 out)\n",
|
| 454 |
+
" 19 Microphone Array 4 (), Windows WDM-KS (4 in, 0 out)\n",
|
| 455 |
+
" 20 CABLE Output (VB-Audio Point), Windows WDM-KS (8 in, 0 out)\n",
|
| 456 |
+
" 21 Speakers (VB-Audio Point), Windows WDM-KS (0 in, 8 out)\n",
|
| 457 |
+
" 22 Stereo Mix (Realtek HD Audio Stereo input), Windows WDM-KS (2 in, 0 out)\n",
|
| 458 |
+
" 23 Headphones 1 (Realtek HD Audio 2nd output with SST), Windows WDM-KS (0 in, 2 out)\n",
|
| 459 |
+
" 24 Headphones 2 (Realtek HD Audio 2nd output with SST), Windows WDM-KS (0 in, 8 out)\n",
|
| 460 |
+
" 25 PC Speaker (Realtek HD Audio 2nd output with SST), Windows WDM-KS (2 in, 0 out)\n",
|
| 461 |
+
" 26 Speakers 1 (Realtek HD Audio output with SST), Windows WDM-KS (0 in, 2 out)\n",
|
| 462 |
+
" 27 Speakers 2 (Realtek HD Audio output with SST), Windows WDM-KS (0 in, 8 out)\n",
|
| 463 |
+
" 28 PC Speaker (Realtek HD Audio output with SST), Windows WDM-KS (2 in, 0 out)\n",
|
| 464 |
+
" 29 Microphone 1 (Realtek HD Audio Mic input with SST), Windows WDM-KS (2 in, 0 out)\n",
|
| 465 |
+
" 30 Microphone 2 (Realtek HD Audio Mic input with SST), Windows WDM-KS (4 in, 0 out)\n",
|
| 466 |
+
" 31 Microphone 3 (Realtek HD Audio Mic input with SST), Windows WDM-KS (4 in, 0 out)\n"
|
| 467 |
+
]
|
| 468 |
+
}
|
| 469 |
+
],
|
| 470 |
+
"source": [
|
| 471 |
+
"import sounddevice as sd\n",
|
| 472 |
+
"audio_device_list = sd.query_devices()\n",
|
| 473 |
+
"# 11 CABLE Input (VB-Audio Virtual Cable), Windows DirectSound\n",
|
| 474 |
+
"print(audio_device_list)"
|
| 475 |
+
]
|
| 476 |
+
},
|
| 477 |
+
{
|
| 478 |
+
"cell_type": "code",
|
| 479 |
+
"execution_count": 63,
|
| 480 |
+
"metadata": {},
|
| 481 |
+
"outputs": [
|
| 482 |
+
{
|
| 483 |
+
"data": {
|
| 484 |
+
"text/plain": [
|
| 485 |
+
"' 0 Microsoft Sound Mapper - Input, MME (2 in, 0 out)\\n> 1 Microphone Array (Intel® Smart , MME (4 in, 0 out)\\n 2 CABLE Output (VB-Audio Virtual , MME (8 in, 0 out)\\n 3 Microsoft Sound Mapper - Output, MME (0 in, 2 out)\\n< 4 Speakers (Realtek(R) Audio), MME (0 in, 8 out)\\n 5 CABLE Input (VB-Audio Virtual C, MME (0 in, 8 out)\\n 6 Primary Sound Capture Driver, Windows DirectSound (2 in, 0 out)\\n 7 Microphone Array (Intel® Smart Sound Technology for Digital Microphones), Windows DirectSound (4 in, 0 out)\\n 8 CABLE Output (VB-Audio Virtual Cable), Windows DirectSound (8 in, 0 out)\\n 9 Primary Sound Driver, Windows DirectSound (0 in, 2 out)\\n 10 Speakers (Realtek(R) Audio), Windows DirectSound (0 in, 8 out)\\n 11 CABLE Input (VB-Audio Virtual Cable), Windows DirectSound (0 in, 8 out)\\n 12 CABLE Input (VB-Audio Virtual Cable), Windows WASAPI (0 in, 2 out)\\n 13 Speakers (Realtek(R) Audio), Windows WASAPI (0 in, 2 out)\\n 14 CABLE Output (VB-Audio Virtual Cable), Windows WASAPI (2 in, 0 out)\\n 15 Microphone Array (Intel® Smart Sound Technology for Digital Microphones), Windows WASAPI (2 in, 0 out)\\n 16 Microphone Array 1 (), Windows WDM-KS (2 in, 0 out)\\n 17 Microphone Array 2 (), Windows WDM-KS (2 in, 0 out)\\n 18 Microphone Array 3 (), Windows WDM-KS (4 in, 0 out)\\n 19 Microphone Array 4 (), Windows WDM-KS (4 in, 0 out)\\n 20 CABLE Output (VB-Audio Point), Windows WDM-KS (8 in, 0 out)\\n 21 Speakers (VB-Audio Point), Windows WDM-KS (0 in, 8 out)\\n 22 Stereo Mix (Realtek HD Audio Stereo input), Windows WDM-KS (2 in, 0 out)\\n 23 Headphones 1 (Realtek HD Audio 2nd output with SST), Windows WDM-KS (0 in, 2 out)\\n 24 Headphones 2 (Realtek HD Audio 2nd output with SST), Windows WDM-KS (0 in, 8 out)\\n 25 PC Speaker (Realtek HD Audio 2nd output with SST), Windows WDM-KS (2 in, 0 out)\\n 26 Speakers 1 (Realtek HD Audio output with SST), Windows WDM-KS (0 in, 2 out)\\n 27 Speakers 2 (Realtek HD Audio output with SST), Windows WDM-KS (0 in, 8 out)\\n 28 PC Speaker (Realtek HD Audio output with SST), Windows WDM-KS (2 in, 0 out)\\n 29 Microphone 1 (Realtek HD Audio Mic input with SST), Windows WDM-KS (2 in, 0 out)\\n 30 Microphone 2 (Realtek HD Audio Mic input with SST), Windows WDM-KS (4 in, 0 out)\\n 31 Microphone 3 (Realtek HD Audio Mic input with SST), Windows WDM-KS (4 in, 0 out)'"
|
| 486 |
+
]
|
| 487 |
+
},
|
| 488 |
+
"execution_count": 63,
|
| 489 |
+
"metadata": {},
|
| 490 |
+
"output_type": "execute_result"
|
| 491 |
+
}
|
| 492 |
+
],
|
| 493 |
+
"source": [
|
| 494 |
+
"audio_device_list"
|
| 495 |
+
]
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"cell_type": "code",
|
| 499 |
+
"execution_count": 18,
|
| 500 |
+
"metadata": {},
|
| 501 |
+
"outputs": [
|
| 502 |
+
{
|
| 503 |
+
"ename": "TypeError",
|
| 504 |
+
"evalue": "unsupported operand type(s) for +: 'aA' and 'int'",
|
| 505 |
+
"output_type": "error",
|
| 506 |
+
"traceback": [
|
| 507 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 508 |
+
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
|
| 509 |
+
"Cell \u001b[1;32mIn[18], line 8\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mint\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39ma)\n\u001b[0;32m 7\u001b[0m e \u001b[38;5;241m=\u001b[39m aA(\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m----> 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43me\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m)\n",
|
| 510 |
+
"\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for +: 'aA' and 'int'"
|
| 511 |
+
]
|
| 512 |
+
}
|
| 513 |
+
],
|
| 514 |
+
"source": [
|
| 515 |
+
"class aA:\n",
|
| 516 |
+
" def __init__(self, a):\n",
|
| 517 |
+
" self.a = a\n",
|
| 518 |
+
" def __repr__(self):\n",
|
| 519 |
+
" return int(self.a)\n",
|
| 520 |
+
"\n",
|
| 521 |
+
"e = aA(1)\n",
|
| 522 |
+
"print(e+1)"
|
| 523 |
+
]
|
| 524 |
+
},
|
| 525 |
+
{
|
| 526 |
+
"cell_type": "code",
|
| 527 |
+
"execution_count": 48,
|
| 528 |
+
"metadata": {},
|
| 529 |
+
"outputs": [
|
| 530 |
+
{
|
| 531 |
+
"data": {
|
| 532 |
+
"text/plain": [
|
| 533 |
+
"{'index': 1,\n",
|
| 534 |
+
" 'structVersion': 2,\n",
|
| 535 |
+
" 'name': 'Microphone Array (Intel® Smart ',\n",
|
| 536 |
+
" 'hostApi': 0,\n",
|
| 537 |
+
" 'maxInputChannels': 4,\n",
|
| 538 |
+
" 'maxOutputChannels': 0,\n",
|
| 539 |
+
" 'defaultLowInputLatency': 0.09,\n",
|
| 540 |
+
" 'defaultLowOutputLatency': 0.09,\n",
|
| 541 |
+
" 'defaultHighInputLatency': 0.18,\n",
|
| 542 |
+
" 'defaultHighOutputLatency': 0.18,\n",
|
| 543 |
+
" 'defaultSampleRate': 44100.0}"
|
| 544 |
+
]
|
| 545 |
+
},
|
| 546 |
+
"execution_count": 48,
|
| 547 |
+
"metadata": {},
|
| 548 |
+
"output_type": "execute_result"
|
| 549 |
+
}
|
| 550 |
+
],
|
| 551 |
+
"source": [
|
| 552 |
+
"in_devices"
|
| 553 |
+
]
|
| 554 |
+
},
|
| 555 |
+
{
|
| 556 |
+
"cell_type": "code",
|
| 557 |
+
"execution_count": 49,
|
| 558 |
+
"metadata": {},
|
| 559 |
+
"outputs": [
|
| 560 |
+
{
|
| 561 |
+
"data": {
|
| 562 |
+
"text/plain": [
|
| 563 |
+
"{'index': 4,\n",
|
| 564 |
+
" 'structVersion': 2,\n",
|
| 565 |
+
" 'name': 'Speakers (Realtek(R) Audio)',\n",
|
| 566 |
+
" 'hostApi': 0,\n",
|
| 567 |
+
" 'maxInputChannels': 0,\n",
|
| 568 |
+
" 'maxOutputChannels': 8,\n",
|
| 569 |
+
" 'defaultLowInputLatency': 0.09,\n",
|
| 570 |
+
" 'defaultLowOutputLatency': 0.09,\n",
|
| 571 |
+
" 'defaultHighInputLatency': 0.18,\n",
|
| 572 |
+
" 'defaultHighOutputLatency': 0.18,\n",
|
| 573 |
+
" 'defaultSampleRate': 44100.0}"
|
| 574 |
+
]
|
| 575 |
+
},
|
| 576 |
+
"execution_count": 49,
|
| 577 |
+
"metadata": {},
|
| 578 |
+
"output_type": "execute_result"
|
| 579 |
+
}
|
| 580 |
+
],
|
| 581 |
+
"source": [
|
| 582 |
+
"out_devices"
|
| 583 |
+
]
|
| 584 |
+
}
|
| 585 |
+
],
|
| 586 |
+
"metadata": {
|
| 587 |
+
"kernelspec": {
|
| 588 |
+
"display_name": "Python 3",
|
| 589 |
+
"language": "python",
|
| 590 |
+
"name": "python3"
|
| 591 |
+
},
|
| 592 |
+
"language_info": {
|
| 593 |
+
"codemirror_mode": {
|
| 594 |
+
"name": "ipython",
|
| 595 |
+
"version": 3
|
| 596 |
+
},
|
| 597 |
+
"file_extension": ".py",
|
| 598 |
+
"mimetype": "text/x-python",
|
| 599 |
+
"name": "python",
|
| 600 |
+
"nbconvert_exporter": "python",
|
| 601 |
+
"pygments_lexer": "ipython3",
|
| 602 |
+
"version": "3.11.9"
|
| 603 |
+
}
|
| 604 |
+
},
|
| 605 |
+
"nbformat": 4,
|
| 606 |
+
"nbformat_minor": 2
|
| 607 |
+
}
|
test2.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sounddevice as sd
|
| 2 |
+
audio_device_list = sd.query_devices()
|
| 3 |
+
audio_hostapis_list = sd.query_hostapis()
|
| 4 |
+
idx_device = int(input(f"Please choose 1 Input Microphone (Just enter the device index): \n {audio_device_list}\nYour Index Choose is: "))
|
| 5 |
+
# print(audio_device_list)
|
| 6 |
+
device_name = audio_device_list[idx_device]['name'] + ", " + audio_hostapis_list[audio_device_list[idx_device]['hostapi']]['name']
|
| 7 |
+
print(device_name)
|
utils/pass_to_virtual_micro.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sounddevice as sd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import wavio
|
| 4 |
+
import datetime
|
| 5 |
+
|
| 6 |
+
# Configuration for the recording
|
| 7 |
+
SAMPLE_RATE = 44100
|
| 8 |
+
CHANNELS = 2
|
| 9 |
+
DTYPE = np.int16
|
| 10 |
+
SECONDS_PER_CHUNK = 10 # You can modify this to record larger or smaller chunks at a time
|
| 11 |
+
|
| 12 |
+
def select_device():
|
| 13 |
+
devices = sd.query_devices()
|
| 14 |
+
input_devices = [device for device in devices if device['max_input_channels'] > 0]
|
| 15 |
+
device_names = [device['name'] for device in input_devices]
|
| 16 |
+
|
| 17 |
+
# Check for "Sound Mixer"
|
| 18 |
+
for name in device_names:
|
| 19 |
+
if "Sound Mixer" in name:
|
| 20 |
+
return name
|
| 21 |
+
|
| 22 |
+
# Check for "CABLE Output"
|
| 23 |
+
for name in device_names:
|
| 24 |
+
if "CABLE Output" in name:
|
| 25 |
+
return name
|
| 26 |
+
|
| 27 |
+
# If neither "Sound Mixer" nor "CABLE Output" is found, prompt for VB-Cable installation
|
| 28 |
+
print("It seems you don't have a suitable input device installed.")
|
| 29 |
+
print("Please consider downloading and installing VB-Cable for this purpose.")
|
| 30 |
+
print("You can download it from here: https://vb-audio.com/Cable/")
|
| 31 |
+
exit(1)
|
| 32 |
+
|
| 33 |
+
def get_filename_with_date_and_time():
|
| 34 |
+
current_time = datetime.datetime.now()
|
| 35 |
+
formatted_date_time = current_time.strftime('%m-%d-%Y_%H-%M-%S')
|
| 36 |
+
filename = f"./output/recording_{formatted_date_time}.wav"
|
| 37 |
+
return filename
|
| 38 |
+
|
| 39 |
+
def record_until_closed(device_name):
|
| 40 |
+
with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS, dtype=DTYPE, device=device_name) as stream:
|
| 41 |
+
print("Recording... Press Ctrl+C to stop and save.")
|
| 42 |
+
all_data = []
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
while True:
|
| 46 |
+
audio_chunk, _ = stream.read(int(SECONDS_PER_CHUNK * SAMPLE_RATE))
|
| 47 |
+
all_data.append(audio_chunk)
|
| 48 |
+
except KeyboardInterrupt:
|
| 49 |
+
# If user presses Ctrl+C, stop recording and save to a WAV file
|
| 50 |
+
print("Recording stopped. Saving...")
|
| 51 |
+
|
| 52 |
+
# Concatenate all chunks to form the complete audio data
|
| 53 |
+
audio_data = np.concatenate(all_data, axis=0)
|
| 54 |
+
|
| 55 |
+
# Get filename with date and time
|
| 56 |
+
filename = get_filename_with_date_and_time()
|
| 57 |
+
|
| 58 |
+
# Save to a WAV file
|
| 59 |
+
wavio.write(filename, audio_data, SAMPLE_RATE, sampwidth=2)
|
| 60 |
+
print(f"Saved as '{filename}'")
|
| 61 |
+
|
| 62 |
+
if __name__ == "__main__":
|
| 63 |
+
device_name = select_device()
|
| 64 |
+
record_until_closed(device_name)
|