ui tweaks
Browse files- app.py +102 -9
- extract_feature_print.py +0 -0
- main.py +6 -0
- utils.py +1 -0
app.py
CHANGED
|
@@ -221,6 +221,7 @@ from vc_infer_pipeline import VC
|
|
| 221 |
from config import Config
|
| 222 |
|
| 223 |
config = Config()
|
|
|
|
| 224 |
# from trainset_preprocess_pipeline import PreProcess
|
| 225 |
logging.getLogger("numba").setLevel(logging.WARNING)
|
| 226 |
|
|
@@ -248,6 +249,91 @@ index_paths = ["./logs/joel/added_IVF479_Flat_nprobe_1.index","./logs/jenny/adde
|
|
| 248 |
file_index=None
|
| 249 |
|
| 250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
def vc_single(
|
| 253 |
sid,
|
|
@@ -1556,9 +1642,8 @@ with gr.Blocks(theme=gr.themes.Base(), title='RVC RULE1 v1') as app:
|
|
| 1556 |
# face.upload(fn=success_message,inputs=[face], outputs=[preview, faces])
|
| 1557 |
with gr.Row():
|
| 1558 |
animation = gr.Video(type='filepath')
|
| 1559 |
-
refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation])
|
| 1560 |
-
|
| 1561 |
-
# animate_button = gr.Button('Animate')
|
| 1562 |
|
| 1563 |
with gr.Column():
|
| 1564 |
with gr.Accordion("Index Settings", open=False):
|
|
@@ -1584,12 +1669,20 @@ with gr.Blocks(theme=gr.themes.Base(), title='RVC RULE1 v1') as app:
|
|
| 1584 |
value=0.66,
|
| 1585 |
interactive=True,
|
| 1586 |
)
|
| 1587 |
-
|
| 1588 |
-
|
| 1589 |
-
|
| 1590 |
-
|
| 1591 |
-
|
| 1592 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1593 |
with gr.Accordion("Advanced Settings", open=False):
|
| 1594 |
f0method0 = gr.Radio(
|
| 1595 |
label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",
|
|
|
|
| 221 |
from config import Config
|
| 222 |
|
| 223 |
config = Config()
|
| 224 |
+
cpt=None
|
| 225 |
# from trainset_preprocess_pipeline import PreProcess
|
| 226 |
logging.getLogger("numba").setLevel(logging.WARNING)
|
| 227 |
|
|
|
|
| 249 |
file_index=None
|
| 250 |
|
| 251 |
|
| 252 |
+
# Define a function to calculate a "similarity score" to identify potential copyright infringement
|
| 253 |
+
def calculate_similarity_score(
|
| 254 |
+
audio0,
|
| 255 |
+
index_file,
|
| 256 |
+
sid0,
|
| 257 |
+
version="v1",
|
| 258 |
+
#protect=0.3,
|
| 259 |
+
index_rate=0.67,
|
| 260 |
+
#pitch
|
| 261 |
+
): # ,file_index,file_big_npy
|
| 262 |
+
""" Extract features from audio using the Hubert model """
|
| 263 |
+
extracted_feats = None
|
| 264 |
+
model = None
|
| 265 |
+
if sid0 == "joel.pth":
|
| 266 |
+
big_npy = "./logs/joel/total_fea.npy"
|
| 267 |
+
elif sid0 == "jenny.pth":
|
| 268 |
+
big_npy = "./logs/jenny/total_fea.npy"
|
| 269 |
+
try:
|
| 270 |
+
audio = load_audio(audio0, 16000, DoFormant, Quefrency, Timbre)
|
| 271 |
+
logging.log(logging.INFO, "audio loaded")
|
| 272 |
+
audio_max = np.abs(audio).max() / 0.95
|
| 273 |
+
if audio_max > 1:
|
| 274 |
+
audio /= audio_max
|
| 275 |
+
except TypeError as e:
|
| 276 |
+
print(e)
|
| 277 |
+
return None
|
| 278 |
+
feats = torch.from_numpy(audio)
|
| 279 |
+
# Use the "load_hubert_model" function to load the model
|
| 280 |
+
if hubert_model is None:
|
| 281 |
+
load_hubert()
|
| 282 |
+
# Set the "model" variable to the loaded model
|
| 283 |
+
model = hubert_model
|
| 284 |
+
# If the model is half precision, convert the features to half precision
|
| 285 |
+
if config.is_half:
|
| 286 |
+
feats = feats.half()
|
| 287 |
+
else:
|
| 288 |
+
feats = feats.float()
|
| 289 |
+
if feats.dim() == 2: # double channels
|
| 290 |
+
feats = feats.mean(-1)
|
| 291 |
+
assert feats.dim() == 1, feats.dim()
|
| 292 |
+
feats = feats.view(1, -1)
|
| 293 |
+
padding_mask = torch.BoolTensor(feats.shape).to(config.device).fill_(False)
|
| 294 |
+
inputs = {
|
| 295 |
+
"source": feats.to(config.device),
|
| 296 |
+
"padding_mask": padding_mask,
|
| 297 |
+
"output_layer": 9 if version == "v1" else 12,
|
| 298 |
+
}
|
| 299 |
+
with torch.no_grad():
|
| 300 |
+
logits = model.extract_features(**inputs)
|
| 301 |
+
feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
|
| 302 |
+
#if protect < 0.5 and pitch != None and pitchf != None:
|
| 303 |
+
# feats0 = feats.clone()
|
| 304 |
+
if (
|
| 305 |
+
isinstance(index_file, type(None)) == False
|
| 306 |
+
and isinstance(big_npy, type(None)) == False
|
| 307 |
+
and index_rate != 0
|
| 308 |
+
):
|
| 309 |
+
npy = feats[0].cpu().numpy()
|
| 310 |
+
if config.is_half:
|
| 311 |
+
npy = npy.astype("float32")
|
| 312 |
+
|
| 313 |
+
extracted_feats = npy
|
| 314 |
+
|
| 315 |
+
if config.is_half:
|
| 316 |
+
extracted_feats = extracted_feats.astype("float32")
|
| 317 |
+
# Convert the big_npy file to a numpy array and match the type
|
| 318 |
+
# to the extracted features
|
| 319 |
+
big_npy = np.load(big_npy)
|
| 320 |
+
if config.is_half:
|
| 321 |
+
big_npy = big_npy.astype("float32")
|
| 322 |
+
|
| 323 |
+
# Use the extracted features and the big_npy file to calculate whether or
|
| 324 |
+
# not the audio vocalist is the same as the one in the big_npy file
|
| 325 |
+
# compare the distances between the extracted features and the big_npy file
|
| 326 |
+
# to determine the similarity score
|
| 327 |
+
index = faiss.read_index(index_file)
|
| 328 |
+
D, I = index.search(big_npy, k=1) # search index for nearest match
|
| 329 |
+
distances = np.sqrt(D[:, 0]) # use L2 distance
|
| 330 |
+
threshold = np.percentile(distances, 50) # set threshold to exclude outliers
|
| 331 |
+
score, ix = index.search(extracted_feats, k=1)
|
| 332 |
+
if score[0][0] < threshold:
|
| 333 |
+
print("Potential unauthorized use detected!")
|
| 334 |
+
|
| 335 |
+
return f"Score {score[0][0]}, {distances}"
|
| 336 |
+
|
| 337 |
|
| 338 |
def vc_single(
|
| 339 |
sid,
|
|
|
|
| 1642 |
# face.upload(fn=success_message,inputs=[face], outputs=[preview, faces])
|
| 1643 |
with gr.Row():
|
| 1644 |
animation = gr.Video(type='filepath')
|
| 1645 |
+
refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation]) # with gr.Row():
|
| 1646 |
+
animate_button = gr.Button('Animate')
|
|
|
|
| 1647 |
|
| 1648 |
with gr.Column():
|
| 1649 |
with gr.Accordion("Index Settings", open=False):
|
|
|
|
| 1669 |
value=0.66,
|
| 1670 |
interactive=True,
|
| 1671 |
)
|
| 1672 |
+
with gr.Row():
|
| 1673 |
+
vc_output2 = gr.Audio(
|
| 1674 |
+
label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
|
| 1675 |
+
type='filepath',
|
| 1676 |
+
interactive=False,
|
| 1677 |
+
)
|
| 1678 |
+
with gr.Row():
|
| 1679 |
+
# Create a new button to calculate the similarity score
|
| 1680 |
+
similarity_button = gr.Button("Calculate Similarity Score", variant="primary")
|
| 1681 |
+
with gr.Row():
|
| 1682 |
+
similarity_score = gr.Textbox(label="Similarity Score", type="text", interactive=False)
|
| 1683 |
+
similarity_button.click(fn=calculate_similarity_score, inputs=[input_audio0, file_index1, sid0], outputs=[similarity_score])
|
| 1684 |
+
print(file_index1)
|
| 1685 |
+
#animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview])
|
| 1686 |
with gr.Accordion("Advanced Settings", open=False):
|
| 1687 |
f0method0 = gr.Radio(
|
| 1688 |
label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",
|
extract_feature_print.py
ADDED
|
File without changes
|
main.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" Main file to run the application. """
|
| 2 |
+
import uvicorn
|
| 3 |
+
from app.app import app
|
| 4 |
+
|
| 5 |
+
if __name__ == "__main__":
|
| 6 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
utils.py
CHANGED
|
@@ -150,3 +150,4 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre):
|
|
| 150 |
converted = False
|
| 151 |
|
| 152 |
return np.frombuffer(out, np.float32).flatten()
|
|
|
|
|
|
| 150 |
converted = False
|
| 151 |
|
| 152 |
return np.frombuffer(out, np.float32).flatten()
|
| 153 |
+
|