Update preprocessing_router.py
Browse files- preprocessing_router.py +946 -942
preprocessing_router.py
CHANGED
|
@@ -1,942 +1,946 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from fastapi import APIRouter, UploadFile, File, Form, BackgroundTasks, HTTPException, Body
|
| 4 |
-
from fastapi.responses import FileResponse
|
| 5 |
-
from pathlib import Path
|
| 6 |
-
from datetime import datetime
|
| 7 |
-
from enum import Enum
|
| 8 |
-
from typing import Dict, Any, List
|
| 9 |
-
import shutil
|
| 10 |
-
import os
|
| 11 |
-
import uuid
|
| 12 |
-
import numpy as np
|
| 13 |
-
import cv2
|
| 14 |
-
import tempfile
|
| 15 |
-
|
| 16 |
-
from casting_loader import ensure_chroma, build_faces_index, build_voices_index
|
| 17 |
-
from llm_router import load_yaml, LLMRouter
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
import
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
ROOT
|
| 26 |
-
|
| 27 |
-
TEMP_ROOT
|
| 28 |
-
|
| 29 |
-
VIDEOS_ROOT
|
| 30 |
-
|
| 31 |
-
IDENTITIES_ROOT
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
# ---------------------------------------------------------------------------
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
final_labels.append(
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
)
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
"
|
| 146 |
-
"
|
| 147 |
-
"
|
| 148 |
-
"
|
| 149 |
-
"
|
| 150 |
-
"
|
| 151 |
-
"
|
| 152 |
-
"
|
| 153 |
-
"
|
| 154 |
-
"
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
)
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
"
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
"
|
| 384 |
-
"
|
| 385 |
-
"
|
| 386 |
-
"
|
| 387 |
-
"
|
| 388 |
-
"
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
)
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
if
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
"
|
| 572 |
-
"
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
print(f"[detect_scenes]
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
print(f"[{job_id}]
|
| 729 |
-
|
| 730 |
-
labels =
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
"
|
| 810 |
-
"
|
| 811 |
-
"
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
|
| 834 |
-
|
| 835 |
-
|
| 836 |
-
|
| 837 |
-
|
| 838 |
-
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
|
| 853 |
-
|
| 854 |
-
|
| 855 |
-
|
| 856 |
-
|
| 857 |
-
|
| 858 |
-
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
# Get
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
"
|
| 883 |
-
"
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
print(f"[{job_id}]
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
"
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
"
|
| 920 |
-
"
|
| 921 |
-
"
|
| 922 |
-
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
-
|
| 929 |
-
|
| 930 |
-
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter, UploadFile, File, Form, BackgroundTasks, HTTPException, Body
|
| 4 |
+
from fastapi.responses import FileResponse
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from enum import Enum
|
| 8 |
+
from typing import Dict, Any, List
|
| 9 |
+
import shutil
|
| 10 |
+
import os
|
| 11 |
+
import uuid
|
| 12 |
+
import numpy as np
|
| 13 |
+
import cv2
|
| 14 |
+
import tempfile
|
| 15 |
+
|
| 16 |
+
from casting_loader import ensure_chroma, build_faces_index, build_voices_index
|
| 17 |
+
from llm_router import load_yaml, LLMRouter
|
| 18 |
+
from storage.media_routers import upload_video
|
| 19 |
+
|
| 20 |
+
# External space clients (no local GPU needed)
|
| 21 |
+
import svision_client
|
| 22 |
+
import asr_client
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
ROOT = Path("/tmp/veureu")
|
| 26 |
+
ROOT.mkdir(parents=True, exist_ok=True)
|
| 27 |
+
TEMP_ROOT = Path("/tmp/temp")
|
| 28 |
+
TEMP_ROOT.mkdir(parents=True, exist_ok=True)
|
| 29 |
+
VIDEOS_ROOT = Path("/tmp/data/videos")
|
| 30 |
+
VIDEOS_ROOT.mkdir(parents=True, exist_ok=True)
|
| 31 |
+
IDENTITIES_ROOT = Path("/tmp/characters")
|
| 32 |
+
IDENTITIES_ROOT.mkdir(parents=True, exist_ok=True)
|
| 33 |
+
VEUREU_TOKEN = os.getenv("VEUREU_TOKEN")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class JobStatus(str, Enum):
|
| 37 |
+
QUEUED = "queued"
|
| 38 |
+
PROCESSING = "processing"
|
| 39 |
+
DONE = "done"
|
| 40 |
+
FAILED = "failed"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
jobs: Dict[str, dict] = {}
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# ---------------------------------------------------------------------------
|
| 47 |
+
# Helper function for clustering (only math, no GPU)
|
| 48 |
+
# ---------------------------------------------------------------------------
|
| 49 |
+
|
| 50 |
+
def hierarchical_cluster_with_min_size(X, max_groups: int, min_cluster_size: int, sensitivity: float = 0.5) -> np.ndarray:
|
| 51 |
+
"""Hierarchical clustering using only min_cluster_size and k-target (max_groups).
|
| 52 |
+
|
| 53 |
+
- Primero intenta crear el máximo número posible de clusters con al menos
|
| 54 |
+
``min_cluster_size`` elementos.
|
| 55 |
+
- Después fusiona implícitamente (bajando el número de clusters) hasta
|
| 56 |
+
llegar a un número de clusters válidos (tamaño >= min_cluster_size)
|
| 57 |
+
menor o igual que ``max_groups``.
|
| 58 |
+
|
| 59 |
+
``sensitivity`` se mantiene en la firma por compatibilidad, pero no se usa.
|
| 60 |
+
"""
|
| 61 |
+
from scipy.cluster.hierarchy import linkage, fcluster
|
| 62 |
+
from collections import Counter
|
| 63 |
+
|
| 64 |
+
n_samples = len(X)
|
| 65 |
+
if n_samples == 0:
|
| 66 |
+
return np.array([])
|
| 67 |
+
|
| 68 |
+
# Si no hay suficientes muestras para formar un solo cluster válido,
|
| 69 |
+
# marcamos todo como ruido (-1).
|
| 70 |
+
if n_samples < min_cluster_size:
|
| 71 |
+
return np.full(n_samples, -1, dtype=int)
|
| 72 |
+
|
| 73 |
+
# k_target = max_groups (interpretamos este parámetro como k-Target)
|
| 74 |
+
k_target = max(0, int(max_groups))
|
| 75 |
+
|
| 76 |
+
# Caso especial: k_target == 0 => no queremos clusters, todo ruido.
|
| 77 |
+
if k_target == 0:
|
| 78 |
+
return np.full(n_samples, -1, dtype=int)
|
| 79 |
+
|
| 80 |
+
# Enlace jerárquico una sola vez
|
| 81 |
+
Z = linkage(X, method="average", metric="cosine")
|
| 82 |
+
|
| 83 |
+
# Máximo número de clusters posibles respetando min_cluster_size
|
| 84 |
+
max_possible = n_samples // min_cluster_size
|
| 85 |
+
if max_possible <= 0:
|
| 86 |
+
return np.full(n_samples, -1, dtype=int)
|
| 87 |
+
|
| 88 |
+
max_to_try = min(max_possible, n_samples)
|
| 89 |
+
|
| 90 |
+
best_labels = np.full(n_samples, -1, dtype=int)
|
| 91 |
+
|
| 92 |
+
# Recorremos de más clusters a menos, buscando la primera solución
|
| 93 |
+
# que tenga entre 1 y k_target clusters válidos.
|
| 94 |
+
for n_clusters in range(max_to_try, 0, -1):
|
| 95 |
+
trial_labels = fcluster(Z, t=n_clusters, criterion="maxclust") - 1
|
| 96 |
+
counts = Counter(trial_labels)
|
| 97 |
+
|
| 98 |
+
# Clusters con tamaño suficiente
|
| 99 |
+
valid_clusters = {lbl for lbl, cnt in counts.items() if cnt >= min_cluster_size}
|
| 100 |
+
num_valid = len(valid_clusters)
|
| 101 |
+
|
| 102 |
+
if num_valid == 0:
|
| 103 |
+
# Demasiado fino, todos los clusters son demasiado pequeños
|
| 104 |
+
continue
|
| 105 |
+
|
| 106 |
+
if num_valid <= k_target:
|
| 107 |
+
# Aceptamos esta solución
|
| 108 |
+
final_labels = []
|
| 109 |
+
for lbl in trial_labels:
|
| 110 |
+
if lbl in valid_clusters:
|
| 111 |
+
final_labels.append(lbl)
|
| 112 |
+
else:
|
| 113 |
+
final_labels.append(-1)
|
| 114 |
+
best_labels = np.array(final_labels, dtype=int)
|
| 115 |
+
break
|
| 116 |
+
|
| 117 |
+
return best_labels
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
router = APIRouter(tags=["Preprocessing Manager"])
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
@router.post("/create_initial_casting")
|
| 124 |
+
async def create_initial_casting(
|
| 125 |
+
background_tasks: BackgroundTasks,
|
| 126 |
+
video: UploadFile = File(...),
|
| 127 |
+
max_groups: int = Form(default=3),
|
| 128 |
+
min_cluster_size: int = Form(default=3),
|
| 129 |
+
face_sensitivity: float = Form(default=0.5),
|
| 130 |
+
voice_max_groups: int = Form(default=3),
|
| 131 |
+
voice_min_cluster_size: int = Form(default=3),
|
| 132 |
+
voice_sensitivity: float = Form(default=0.5),
|
| 133 |
+
max_frames: int = Form(default=100),
|
| 134 |
+
):
|
| 135 |
+
video_name = Path(video.filename).stem
|
| 136 |
+
dst_video = VIDEOS_ROOT / f"{video_name}.mp4"
|
| 137 |
+
with dst_video.open("wb") as f:
|
| 138 |
+
shutil.copyfileobj(video.file, f)
|
| 139 |
+
|
| 140 |
+
upload_video(video, VEUREU_TOKEN)
|
| 141 |
+
|
| 142 |
+
job_id = str(uuid.uuid4())
|
| 143 |
+
|
| 144 |
+
jobs[job_id] = {
|
| 145 |
+
"id": job_id,
|
| 146 |
+
"status": JobStatus.QUEUED,
|
| 147 |
+
"video_path": str(dst_video),
|
| 148 |
+
"video_name": video_name,
|
| 149 |
+
"max_groups": int(max_groups),
|
| 150 |
+
"min_cluster_size": int(min_cluster_size),
|
| 151 |
+
"face_sensitivity": float(face_sensitivity),
|
| 152 |
+
"voice_max_groups": int(voice_max_groups),
|
| 153 |
+
"voice_min_cluster_size": int(voice_min_cluster_size),
|
| 154 |
+
"voice_sensitivity": float(voice_sensitivity),
|
| 155 |
+
"max_frames": int(max_frames),
|
| 156 |
+
"created_at": datetime.now().isoformat(),
|
| 157 |
+
"results": None,
|
| 158 |
+
"error": None,
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
print(f"[{job_id}] Job creado para vídeo: {video_name}")
|
| 162 |
+
background_tasks.add_task(process_video_job, job_id)
|
| 163 |
+
return {"job_id": job_id}
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
@router.get("/jobs/{job_id}/status")
|
| 167 |
+
def get_job_status(job_id: str):
|
| 168 |
+
if job_id not in jobs:
|
| 169 |
+
raise HTTPException(status_code=404, detail="Job not found")
|
| 170 |
+
|
| 171 |
+
job = jobs[job_id]
|
| 172 |
+
status_value = job["status"].value if isinstance(job["status"], JobStatus) else str(job["status"])
|
| 173 |
+
response = {"status": status_value}
|
| 174 |
+
|
| 175 |
+
if job.get("results") is not None:
|
| 176 |
+
response["results"] = job["results"]
|
| 177 |
+
if job.get("error"):
|
| 178 |
+
response["error"] = job["error"]
|
| 179 |
+
|
| 180 |
+
return response
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
@router.get("/files/{video_name}/{char_id}/{filename}")
|
| 184 |
+
def serve_character_file(video_name: str, char_id: str, filename: str):
|
| 185 |
+
file_path = TEMP_ROOT / video_name / "characters" / char_id / filename
|
| 186 |
+
if not file_path.exists():
|
| 187 |
+
raise HTTPException(status_code=404, detail="File not found")
|
| 188 |
+
return FileResponse(file_path)
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
@router.get("/audio/{video_name}/{filename}")
|
| 192 |
+
def serve_audio_file(video_name: str, filename: str):
|
| 193 |
+
file_path = TEMP_ROOT / video_name / "clips" / filename
|
| 194 |
+
if not file_path.exists():
|
| 195 |
+
raise HTTPException(status_code=404, detail="File not found")
|
| 196 |
+
return FileResponse(file_path)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
@router.post("/load_casting")
|
| 200 |
+
async def load_casting(
|
| 201 |
+
faces_dir: str = Form("identities/faces"),
|
| 202 |
+
voices_dir: str = Form("identities/voices"),
|
| 203 |
+
db_dir: str = Form("chroma_db"),
|
| 204 |
+
drop_collections: bool = Form(False),
|
| 205 |
+
):
|
| 206 |
+
client = ensure_chroma(Path(db_dir))
|
| 207 |
+
n_faces = build_faces_index(Path(faces_dir), client, collection_name="index_faces", drop=drop_collections)
|
| 208 |
+
n_voices = build_voices_index(Path(voices_dir), client, collection_name="index_voices", drop=drop_collections)
|
| 209 |
+
return {"ok": True, "faces": n_faces, "voices": n_voices}
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
@router.post("/finalize_casting")
|
| 213 |
+
async def finalize_casting(
|
| 214 |
+
payload: dict = Body(...),
|
| 215 |
+
):
|
| 216 |
+
import shutil as _sh
|
| 217 |
+
from pathlib import Path as _P
|
| 218 |
+
|
| 219 |
+
video_name = payload.get("video_name")
|
| 220 |
+
base_dir = payload.get("base_dir")
|
| 221 |
+
characters = payload.get("characters", []) or []
|
| 222 |
+
voice_clusters = payload.get("voice_clusters", []) or []
|
| 223 |
+
|
| 224 |
+
if not video_name or not base_dir:
|
| 225 |
+
raise HTTPException(status_code=400, detail="Missing video_name or base_dir")
|
| 226 |
+
|
| 227 |
+
faces_out = IDENTITIES_ROOT / video_name / "faces"
|
| 228 |
+
voices_out = IDENTITIES_ROOT / video_name / "voices"
|
| 229 |
+
faces_out.mkdir(parents=True, exist_ok=True)
|
| 230 |
+
voices_out.mkdir(parents=True, exist_ok=True)
|
| 231 |
+
|
| 232 |
+
for ch in characters:
|
| 233 |
+
ch_name = (ch.get("name") or "Unknown").strip() or "Unknown"
|
| 234 |
+
ch_folder = ch.get("folder")
|
| 235 |
+
kept = ch.get("kept_files") or []
|
| 236 |
+
if not ch_folder or not os.path.isdir(ch_folder):
|
| 237 |
+
continue
|
| 238 |
+
dst_dir = faces_out / ch_name
|
| 239 |
+
dst_dir.mkdir(parents=True, exist_ok=True)
|
| 240 |
+
for fname in kept:
|
| 241 |
+
src = _P(ch_folder) / fname
|
| 242 |
+
if src.exists() and src.is_file():
|
| 243 |
+
try:
|
| 244 |
+
_sh.copy2(src, dst_dir / fname)
|
| 245 |
+
except Exception:
|
| 246 |
+
pass
|
| 247 |
+
|
| 248 |
+
clips_dir = _P(base_dir) / "clips"
|
| 249 |
+
for vc in voice_clusters:
|
| 250 |
+
v_name = (vc.get("name") or f"SPEAKER_{int(vc.get('label',0)):02d}").strip()
|
| 251 |
+
dst_dir = voices_out / v_name
|
| 252 |
+
dst_dir.mkdir(parents=True, exist_ok=True)
|
| 253 |
+
for wav in (vc.get("clips") or []):
|
| 254 |
+
src = clips_dir / wav
|
| 255 |
+
if src.exists() and src.is_file():
|
| 256 |
+
try:
|
| 257 |
+
_sh.copy2(src, dst_dir / wav)
|
| 258 |
+
except Exception:
|
| 259 |
+
pass
|
| 260 |
+
|
| 261 |
+
db_dir = IDENTITIES_ROOT / video_name / "chroma_db"
|
| 262 |
+
try:
|
| 263 |
+
client = ensure_chroma(db_dir)
|
| 264 |
+
n_faces = build_faces_index(
|
| 265 |
+
faces_out,
|
| 266 |
+
client,
|
| 267 |
+
collection_name="index_faces",
|
| 268 |
+
deepface_model="Facenet512",
|
| 269 |
+
drop=True,
|
| 270 |
+
)
|
| 271 |
+
n_voices = build_voices_index(
|
| 272 |
+
voices_out,
|
| 273 |
+
client,
|
| 274 |
+
collection_name="index_voices",
|
| 275 |
+
drop=True,
|
| 276 |
+
)
|
| 277 |
+
except Exception as e:
|
| 278 |
+
print(f"[finalize_casting] WARN - No se pudieron construir índices ChromaDB: {e}")
|
| 279 |
+
n_faces = 0
|
| 280 |
+
n_voices = 0
|
| 281 |
+
|
| 282 |
+
face_identities = sorted([p.name for p in faces_out.iterdir() if p.is_dir()]) if faces_out.exists() else []
|
| 283 |
+
voice_identities = sorted([p.name for p in voices_out.iterdir() if p.is_dir()]) if voices_out.exists() else []
|
| 284 |
+
|
| 285 |
+
casting_json = {"face_col": [], "voice_col": []}
|
| 286 |
+
|
| 287 |
+
try:
|
| 288 |
+
cfg = load_yaml("config.yaml")
|
| 289 |
+
router_llm = LLMRouter(cfg)
|
| 290 |
+
except Exception:
|
| 291 |
+
router_llm = None # type: ignore
|
| 292 |
+
|
| 293 |
+
try:
|
| 294 |
+
if face_identities and router_llm is not None:
|
| 295 |
+
factory = router_llm.client_factories.get("salamandra-vision") # type: ignore[attr-defined]
|
| 296 |
+
if factory is not None:
|
| 297 |
+
vclient = factory()
|
| 298 |
+
gclient = getattr(vclient, "_client", None)
|
| 299 |
+
else:
|
| 300 |
+
gclient = None
|
| 301 |
+
|
| 302 |
+
if gclient is not None:
|
| 303 |
+
for identity in face_identities:
|
| 304 |
+
id_dir = faces_out / identity
|
| 305 |
+
if not id_dir.is_dir():
|
| 306 |
+
continue
|
| 307 |
+
img_path = None
|
| 308 |
+
for ext in (".jpg", ".jpeg", ".png", ".bmp", ".webp"):
|
| 309 |
+
candidates = list(id_dir.glob(f"*{ext}"))
|
| 310 |
+
if candidates:
|
| 311 |
+
img_path = candidates[0]
|
| 312 |
+
break
|
| 313 |
+
if not img_path:
|
| 314 |
+
continue
|
| 315 |
+
|
| 316 |
+
try:
|
| 317 |
+
out = gclient.predict(str(img_path), api_name="/face_image_embedding")
|
| 318 |
+
emb = None
|
| 319 |
+
if isinstance(out, list):
|
| 320 |
+
if out and isinstance(out[0], (list, tuple, float, int)):
|
| 321 |
+
if out and isinstance(out[0], (list, tuple)):
|
| 322 |
+
emb = list(out[0])
|
| 323 |
+
else:
|
| 324 |
+
emb = list(out)
|
| 325 |
+
elif isinstance(out, dict) and "embedding" in out:
|
| 326 |
+
emb = out.get("embedding")
|
| 327 |
+
|
| 328 |
+
if not emb:
|
| 329 |
+
continue
|
| 330 |
+
|
| 331 |
+
casting_json["face_col"].append({
|
| 332 |
+
"nombre": identity,
|
| 333 |
+
"embedding": emb,
|
| 334 |
+
})
|
| 335 |
+
except Exception:
|
| 336 |
+
continue
|
| 337 |
+
except Exception:
|
| 338 |
+
casting_json["face_col"] = []
|
| 339 |
+
|
| 340 |
+
try:
|
| 341 |
+
if voice_identities and router_llm is not None:
|
| 342 |
+
factory = router_llm.client_factories.get("whisper-catalan") # type: ignore[attr-defined]
|
| 343 |
+
if factory is not None:
|
| 344 |
+
aclient = factory()
|
| 345 |
+
gclient = getattr(aclient, "_client", None)
|
| 346 |
+
else:
|
| 347 |
+
gclient = None
|
| 348 |
+
|
| 349 |
+
if gclient is not None:
|
| 350 |
+
for identity in voice_identities:
|
| 351 |
+
id_dir = voices_out / identity
|
| 352 |
+
if not id_dir.is_dir():
|
| 353 |
+
continue
|
| 354 |
+
wav_files = sorted([
|
| 355 |
+
p for p in id_dir.iterdir()
|
| 356 |
+
if p.is_file() and p.suffix.lower() in [".wav", ".flac", ".mp3"]
|
| 357 |
+
])
|
| 358 |
+
if not wav_files:
|
| 359 |
+
continue
|
| 360 |
+
|
| 361 |
+
wf = wav_files[0]
|
| 362 |
+
try:
|
| 363 |
+
out = gclient.predict(str(wf), api_name="/voice_embedding")
|
| 364 |
+
emb = None
|
| 365 |
+
if isinstance(out, list):
|
| 366 |
+
emb = list(out)
|
| 367 |
+
elif isinstance(out, dict) and "embedding" in out:
|
| 368 |
+
emb = out.get("embedding")
|
| 369 |
+
|
| 370 |
+
if not emb:
|
| 371 |
+
continue
|
| 372 |
+
|
| 373 |
+
casting_json["voice_col"].append({
|
| 374 |
+
"nombre": identity,
|
| 375 |
+
"embedding": emb,
|
| 376 |
+
})
|
| 377 |
+
except Exception:
|
| 378 |
+
continue
|
| 379 |
+
except Exception:
|
| 380 |
+
casting_json["voice_col"] = []
|
| 381 |
+
|
| 382 |
+
return {
|
| 383 |
+
"ok": True,
|
| 384 |
+
"video_name": video_name,
|
| 385 |
+
"faces_dir": str(faces_out),
|
| 386 |
+
"voices_dir": str(voices_out),
|
| 387 |
+
"db_dir": str(db_dir),
|
| 388 |
+
"n_faces_embeddings": n_faces,
|
| 389 |
+
"n_voices_embeddings": n_voices,
|
| 390 |
+
"face_identities": face_identities,
|
| 391 |
+
"voice_identities": voice_identities,
|
| 392 |
+
"casting_json": casting_json,
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
|
| 396 |
+
@router.get("/files_scene/{video_name}/{scene_id}/{filename}")
|
| 397 |
+
def serve_scene_file(video_name: str, scene_id: str, filename: str):
|
| 398 |
+
file_path = TEMP_ROOT / video_name / "scenes" / scene_id / filename
|
| 399 |
+
if not file_path.exists():
|
| 400 |
+
raise HTTPException(status_code=404, detail="File not found")
|
| 401 |
+
return FileResponse(file_path)
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
@router.post("/detect_scenes")
|
| 405 |
+
async def detect_scenes(
|
| 406 |
+
video: UploadFile = File(...),
|
| 407 |
+
max_groups: int = Form(default=3),
|
| 408 |
+
min_cluster_size: int = Form(default=3),
|
| 409 |
+
scene_sensitivity: float = Form(default=0.5),
|
| 410 |
+
frame_interval_sec: float = Form(default=0.5), # mantenido por compatibilidad, no se usa
|
| 411 |
+
max_frames: int = Form(default=100),
|
| 412 |
+
):
|
| 413 |
+
"""Detecta escenas usando frames equiespaciados del vídeo y clustering jerárquico.
|
| 414 |
+
|
| 415 |
+
- Extrae ``max_frames`` fotogramas equiespaciados del vídeo original.
|
| 416 |
+
- Descarta frames negros o muy oscuros antes de construir el histograma.
|
| 417 |
+
- Representa cada frame por un histograma de color 3D (8x8x8) normalizado
|
| 418 |
+
dividiendo por la media (si el histograma es todo ceros o la media es 0,
|
| 419 |
+
se descarta el frame).
|
| 420 |
+
- Aplica ``hierarchical_cluster_with_min_size`` igual que para cares i veus.
|
| 421 |
+
"""
|
| 422 |
+
|
| 423 |
+
video_name = Path(video.filename).stem
|
| 424 |
+
dst_video = VIDEOS_ROOT / f"{video_name}.mp4"
|
| 425 |
+
with dst_video.open("wb") as f:
|
| 426 |
+
shutil.copyfileobj(video.file, f)
|
| 427 |
+
|
| 428 |
+
try:
|
| 429 |
+
print(f"[detect_scenes] Extrayendo frames equiespaciados de {video_name}...")
|
| 430 |
+
|
| 431 |
+
cap = cv2.VideoCapture(str(dst_video))
|
| 432 |
+
if not cap.isOpened():
|
| 433 |
+
raise RuntimeError("No se pudo abrir el vídeo para detectar escenas")
|
| 434 |
+
|
| 435 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
|
| 436 |
+
if total_frames <= 0:
|
| 437 |
+
cap.release()
|
| 438 |
+
print("[detect_scenes] total_frames <= 0")
|
| 439 |
+
return {"scene_clusters": []}
|
| 440 |
+
|
| 441 |
+
n_samples = max(1, min(int(max_frames), total_frames))
|
| 442 |
+
frame_indices = sorted(set(np.linspace(0, max(0, total_frames - 1), num=n_samples, dtype=int).tolist()))
|
| 443 |
+
print(f"[detect_scenes] Total frames: {total_frames}, muestreando {len(frame_indices)} frames")
|
| 444 |
+
|
| 445 |
+
# Create base directory for scenes
|
| 446 |
+
base = TEMP_ROOT / video_name
|
| 447 |
+
scenes_dir = base / "scenes"
|
| 448 |
+
scenes_dir.mkdir(parents=True, exist_ok=True)
|
| 449 |
+
|
| 450 |
+
# ------------------------------------------------------------------
|
| 451 |
+
# STEP 1: Guardar frames y construir embeddings sencillos (histogramas)
|
| 452 |
+
# ------------------------------------------------------------------
|
| 453 |
+
keyframe_paths: List[Path] = []
|
| 454 |
+
keyframe_infos: List[dict] = []
|
| 455 |
+
features: List[np.ndarray] = []
|
| 456 |
+
|
| 457 |
+
for i, frame_idx in enumerate(frame_indices):
|
| 458 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_idx))
|
| 459 |
+
ret, frame = cap.read()
|
| 460 |
+
if not ret:
|
| 461 |
+
continue
|
| 462 |
+
|
| 463 |
+
# Filtrar frames negros o muy oscuros (umbral sobre la media de intensidad)
|
| 464 |
+
# Trabajamos en escala de grises para evaluar brillo global.
|
| 465 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 466 |
+
mean_intensity = float(gray.mean())
|
| 467 |
+
if mean_intensity < 5.0:
|
| 468 |
+
# Frame negro o casi negro, lo descartamos
|
| 469 |
+
continue
|
| 470 |
+
|
| 471 |
+
local_keyframe = scenes_dir / f"keyframe_{frame_idx:06d}.jpg"
|
| 472 |
+
try:
|
| 473 |
+
cv2.imwrite(str(local_keyframe), frame)
|
| 474 |
+
except Exception as werr:
|
| 475 |
+
print(f"[detect_scenes] Error guardando frame {frame_idx}: {werr}")
|
| 476 |
+
continue
|
| 477 |
+
|
| 478 |
+
try:
|
| 479 |
+
# Histograma de color 8x8x8 en RGB
|
| 480 |
+
img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 481 |
+
hist = cv2.calcHist(
|
| 482 |
+
[img_rgb], [0, 1, 2], None,
|
| 483 |
+
[8, 8, 8], [0, 256, 0, 256, 0, 256]
|
| 484 |
+
).astype("float32").flatten()
|
| 485 |
+
|
| 486 |
+
if not np.any(hist):
|
| 487 |
+
# Todo ceros, descartamos
|
| 488 |
+
continue
|
| 489 |
+
|
| 490 |
+
mean_val = float(hist.mean())
|
| 491 |
+
if mean_val <= 0.0:
|
| 492 |
+
# Media cero o negativa, descartamos
|
| 493 |
+
continue
|
| 494 |
+
|
| 495 |
+
hist /= mean_val
|
| 496 |
+
features.append(hist)
|
| 497 |
+
except Exception as fe_err:
|
| 498 |
+
print(f"[detect_scenes] Error calculando embedding para frame {frame_idx}: {fe_err}")
|
| 499 |
+
continue
|
| 500 |
+
|
| 501 |
+
keyframe_paths.append(local_keyframe)
|
| 502 |
+
# Como no tenemos frames_info de svision, usamos el índice de frame
|
| 503 |
+
info = {"start": int(frame_idx), "end": int(frame_idx) + 1}
|
| 504 |
+
keyframe_infos.append(info)
|
| 505 |
+
|
| 506 |
+
cap.release()
|
| 507 |
+
|
| 508 |
+
if not features or len(features) < min_cluster_size:
|
| 509 |
+
print(
|
| 510 |
+
f"[detect_scenes] No hay suficientes frames válidos para clusterizar escenas: "
|
| 511 |
+
f"validos={len(features)}, min_cluster_size={min_cluster_size}"
|
| 512 |
+
)
|
| 513 |
+
return {"scene_clusters": []}
|
| 514 |
+
|
| 515 |
+
Xs = np.vstack(features)
|
| 516 |
+
|
| 517 |
+
# ------------------------------------------------------------------
|
| 518 |
+
# STEP 2: Clustering jerárquico de escenas (k-Target + mida mínima)
|
| 519 |
+
# ------------------------------------------------------------------
|
| 520 |
+
print("[detect_scenes] Clustering jerárquico de escenas...")
|
| 521 |
+
scene_labels = hierarchical_cluster_with_min_size(Xs, max_groups, min_cluster_size, 0.5)
|
| 522 |
+
unique_labels = sorted({int(l) for l in scene_labels if int(l) >= 0})
|
| 523 |
+
print(f"[detect_scenes] Etiquetas de escena válidas: {unique_labels}")
|
| 524 |
+
|
| 525 |
+
# Mapear índices de keyframes a clusters
|
| 526 |
+
cluster_map: Dict[int, List[int]] = {}
|
| 527 |
+
for idx, lbl in enumerate(scene_labels):
|
| 528 |
+
lbl = int(lbl)
|
| 529 |
+
if lbl >= 0:
|
| 530 |
+
cluster_map.setdefault(lbl, []).append(idx)
|
| 531 |
+
|
| 532 |
+
# ------------------------------------------------------------------
|
| 533 |
+
# STEP 3: Construir scene_clusters con el formato esperado por el demo
|
| 534 |
+
# ------------------------------------------------------------------
|
| 535 |
+
scene_clusters: List[Dict[str, Any]] = []
|
| 536 |
+
for ci, idxs in sorted(cluster_map.items(), key=lambda x: x[0]):
|
| 537 |
+
if not idxs:
|
| 538 |
+
continue
|
| 539 |
+
|
| 540 |
+
scene_id = f"scene_{ci:02d}"
|
| 541 |
+
scene_out_dir = scenes_dir / scene_id
|
| 542 |
+
scene_out_dir.mkdir(parents=True, exist_ok=True)
|
| 543 |
+
|
| 544 |
+
# Copiar todos los keyframes del cluster a la carpeta del cluster
|
| 545 |
+
cluster_start = None
|
| 546 |
+
cluster_end = None
|
| 547 |
+
representative_file = None
|
| 548 |
+
|
| 549 |
+
for j, k_idx in enumerate(idxs):
|
| 550 |
+
src = keyframe_paths[k_idx]
|
| 551 |
+
dst = scene_out_dir / src.name
|
| 552 |
+
try:
|
| 553 |
+
shutil.copy2(src, dst)
|
| 554 |
+
except Exception as cp_err:
|
| 555 |
+
print(f"[detect_scenes] Error copiando keyframe {src} a cluster {scene_id}: {cp_err}")
|
| 556 |
+
continue
|
| 557 |
+
|
| 558 |
+
if representative_file is None:
|
| 559 |
+
representative_file = dst
|
| 560 |
+
|
| 561 |
+
info = keyframe_infos[k_idx]
|
| 562 |
+
start = info.get("start", k_idx)
|
| 563 |
+
end = info.get("end", k_idx + 1)
|
| 564 |
+
cluster_start = start if cluster_start is None else min(cluster_start, start)
|
| 565 |
+
cluster_end = end if cluster_end is None else max(cluster_end, end)
|
| 566 |
+
|
| 567 |
+
if representative_file is None:
|
| 568 |
+
continue
|
| 569 |
+
|
| 570 |
+
scene_clusters.append({
|
| 571 |
+
"id": scene_id,
|
| 572 |
+
"name": f"Escena {len(scene_clusters)+1}",
|
| 573 |
+
"folder": str(scene_out_dir),
|
| 574 |
+
"image_url": f"/files_scene/{video_name}/{scene_id}/{representative_file.name}",
|
| 575 |
+
"start_time": float(cluster_start) if cluster_start is not None else 0.0,
|
| 576 |
+
"end_time": float(cluster_end) if cluster_end is not None else 0.0,
|
| 577 |
+
})
|
| 578 |
+
|
| 579 |
+
print(f"[detect_scenes] {len(scene_clusters)} escenes clusteritzades")
|
| 580 |
+
return {"scene_clusters": scene_clusters}
|
| 581 |
+
|
| 582 |
+
except Exception as e:
|
| 583 |
+
print(f"[detect_scenes] Error: {e}")
|
| 584 |
+
import traceback
|
| 585 |
+
traceback.print_exc()
|
| 586 |
+
return {"scene_clusters": [], "error": str(e)}
|
| 587 |
+
|
| 588 |
+
|
| 589 |
+
def process_video_job(job_id: str):
|
| 590 |
+
"""
|
| 591 |
+
Process video job in background using EXTERNAL spaces (svision, asr).
|
| 592 |
+
|
| 593 |
+
NO local GPU needed - all vision/audio processing is delegated to:
|
| 594 |
+
- svision: face detection + embeddings (MTCNN + FaceNet)
|
| 595 |
+
- asr: audio diarization + voice embeddings (pyannote + ECAPA)
|
| 596 |
+
|
| 597 |
+
Engine only does: frame extraction, clustering (math), file organization.
|
| 598 |
+
"""
|
| 599 |
+
try:
|
| 600 |
+
job = jobs[job_id]
|
| 601 |
+
print(f"[{job_id}] Iniciando procesamiento (delegando a svision/asr)...")
|
| 602 |
+
|
| 603 |
+
job["status"] = JobStatus.PROCESSING
|
| 604 |
+
|
| 605 |
+
video_path = job["video_path"]
|
| 606 |
+
video_name = job["video_name"]
|
| 607 |
+
max_groups = int(job.get("max_groups", 5))
|
| 608 |
+
min_cluster_size = int(job.get("min_cluster_size", 3))
|
| 609 |
+
face_sensitivity = float(job.get("face_sensitivity", 0.5))
|
| 610 |
+
|
| 611 |
+
base = TEMP_ROOT / video_name
|
| 612 |
+
base.mkdir(parents=True, exist_ok=True)
|
| 613 |
+
print(f"[{job_id}] Directorio base: {base}")
|
| 614 |
+
|
| 615 |
+
try:
|
| 616 |
+
# ============================================================
|
| 617 |
+
# STEP 1: Extract frames from video (local, simple cv2)
|
| 618 |
+
# ============================================================
|
| 619 |
+
print(f"[{job_id}] Extrayendo frames del vídeo...")
|
| 620 |
+
|
| 621 |
+
cap = cv2.VideoCapture(video_path)
|
| 622 |
+
if not cap.isOpened():
|
| 623 |
+
raise RuntimeError("No se pudo abrir el vídeo")
|
| 624 |
+
|
| 625 |
+
fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
|
| 626 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
|
| 627 |
+
max_samples = job.get("max_frames", 100)
|
| 628 |
+
|
| 629 |
+
if total_frames > 0:
|
| 630 |
+
frame_indices = sorted(set(np.linspace(0, max(0, total_frames - 1), num=min(max_samples, max(1, total_frames)), dtype=int).tolist()))
|
| 631 |
+
else:
|
| 632 |
+
frame_indices = []
|
| 633 |
+
|
| 634 |
+
print(f"[{job_id}] Total frames: {total_frames}, FPS: {fps:.2f}, Muestreando {len(frame_indices)} frames")
|
| 635 |
+
|
| 636 |
+
# Save frames temporarily for svision processing
|
| 637 |
+
frames_dir = base / "frames_temp"
|
| 638 |
+
frames_dir.mkdir(parents=True, exist_ok=True)
|
| 639 |
+
faces_root = base / "faces_raw"
|
| 640 |
+
faces_root.mkdir(parents=True, exist_ok=True)
|
| 641 |
+
|
| 642 |
+
frame_paths: List[str] = []
|
| 643 |
+
for frame_idx in frame_indices:
|
| 644 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_idx))
|
| 645 |
+
ret, frame = cap.read()
|
| 646 |
+
if not ret:
|
| 647 |
+
continue
|
| 648 |
+
frame_path = frames_dir / f"frame_{frame_idx:06d}.jpg"
|
| 649 |
+
cv2.imwrite(str(frame_path), frame)
|
| 650 |
+
frame_paths.append(str(frame_path))
|
| 651 |
+
cap.release()
|
| 652 |
+
|
| 653 |
+
print(f"[{job_id}] ✓ {len(frame_paths)} frames extraídos")
|
| 654 |
+
|
| 655 |
+
# ============================================================
|
| 656 |
+
# STEP 2: Send frames to SVISION for face detection + embeddings
|
| 657 |
+
# ============================================================
|
| 658 |
+
print(f"[{job_id}] Enviando frames a svision para detección de caras...")
|
| 659 |
+
|
| 660 |
+
embeddings: List[List[float]] = []
|
| 661 |
+
crops_meta: List[dict] = []
|
| 662 |
+
saved_count = 0
|
| 663 |
+
frames_with_faces = 0
|
| 664 |
+
|
| 665 |
+
for i, frame_path in enumerate(frame_paths):
|
| 666 |
+
frame_idx = frame_indices[i] if i < len(frame_indices) else i
|
| 667 |
+
try:
|
| 668 |
+
# Call svision to get faces + embeddings
|
| 669 |
+
faces = svision_client.get_face_embeddings_from_image(frame_path)
|
| 670 |
+
|
| 671 |
+
if faces:
|
| 672 |
+
frames_with_faces += 1
|
| 673 |
+
for face_data in faces:
|
| 674 |
+
emb = face_data.get("embedding", [])
|
| 675 |
+
if not emb:
|
| 676 |
+
continue
|
| 677 |
+
|
| 678 |
+
# Normalize embedding
|
| 679 |
+
emb = np.array(emb, dtype=float)
|
| 680 |
+
emb = emb / (np.linalg.norm(emb) + 1e-9)
|
| 681 |
+
embeddings.append(emb.tolist())
|
| 682 |
+
|
| 683 |
+
# Save face crop if provided by svision
|
| 684 |
+
crop_path = face_data.get("face_crop_path")
|
| 685 |
+
fn = f"face_{frame_idx:06d}_{saved_count:03d}.jpg"
|
| 686 |
+
local_crop_path = faces_root / fn
|
| 687 |
+
|
| 688 |
+
crop_saved = False
|
| 689 |
+
if crop_path:
|
| 690 |
+
# Handle remote URLs from svision (Gradio)
|
| 691 |
+
if isinstance(crop_path, str) and crop_path.startswith("http"):
|
| 692 |
+
try:
|
| 693 |
+
import requests
|
| 694 |
+
resp = requests.get(crop_path, timeout=30)
|
| 695 |
+
if resp.status_code == 200:
|
| 696 |
+
with open(local_crop_path, "wb") as f:
|
| 697 |
+
f.write(resp.content)
|
| 698 |
+
crop_saved = True
|
| 699 |
+
except Exception as dl_err:
|
| 700 |
+
print(f"[{job_id}] Error descargando crop: {dl_err}")
|
| 701 |
+
# Handle local paths
|
| 702 |
+
elif isinstance(crop_path, str) and os.path.exists(crop_path):
|
| 703 |
+
shutil.copy2(crop_path, local_crop_path)
|
| 704 |
+
crop_saved = True
|
| 705 |
+
|
| 706 |
+
if not crop_saved:
|
| 707 |
+
# If no crop from svision, use original frame
|
| 708 |
+
shutil.copy2(frame_path, local_crop_path)
|
| 709 |
+
|
| 710 |
+
crops_meta.append({
|
| 711 |
+
"file": fn,
|
| 712 |
+
"frame": frame_idx,
|
| 713 |
+
"index": face_data.get("index", saved_count),
|
| 714 |
+
})
|
| 715 |
+
saved_count += 1
|
| 716 |
+
|
| 717 |
+
except Exception as e:
|
| 718 |
+
print(f"[{job_id}] Error procesando frame {frame_idx}: {e}")
|
| 719 |
+
continue
|
| 720 |
+
|
| 721 |
+
print(f"[{job_id}] ✓ Frames con caras: {frames_with_faces}/{len(frame_paths)}")
|
| 722 |
+
print(f"[{job_id}] ✓ Caras detectadas: {len(embeddings)}")
|
| 723 |
+
|
| 724 |
+
# ============================================================
|
| 725 |
+
# STEP 3: Clustering (local, only math - no GPU)
|
| 726 |
+
# ============================================================
|
| 727 |
+
if embeddings:
|
| 728 |
+
print(f"[{job_id}] Clustering jerárquico...")
|
| 729 |
+
Xf = np.array(embeddings)
|
| 730 |
+
labels = hierarchical_cluster_with_min_size(Xf, max_groups, min_cluster_size, face_sensitivity).tolist()
|
| 731 |
+
n_clusters = len(set([l for l in labels if l >= 0]))
|
| 732 |
+
print(f"[{job_id}] ✓ Clustering: {n_clusters} clusters")
|
| 733 |
+
else:
|
| 734 |
+
labels = []
|
| 735 |
+
|
| 736 |
+
# ============================================================
|
| 737 |
+
# STEP 4: Organize faces into character folders
|
| 738 |
+
# ============================================================
|
| 739 |
+
characters: List[Dict[str, Any]] = []
|
| 740 |
+
cluster_map: Dict[int, List[int]] = {}
|
| 741 |
+
for idx, lbl in enumerate(labels):
|
| 742 |
+
if isinstance(lbl, int) and lbl >= 0:
|
| 743 |
+
cluster_map.setdefault(lbl, []).append(idx)
|
| 744 |
+
|
| 745 |
+
chars_dir = base / "characters"
|
| 746 |
+
chars_dir.mkdir(parents=True, exist_ok=True)
|
| 747 |
+
|
| 748 |
+
print(f"[{job_id}] cluster_map: {cluster_map}")
|
| 749 |
+
print(f"[{job_id}] crops_meta count: {len(crops_meta)}")
|
| 750 |
+
print(f"[{job_id}] faces_root: {faces_root}, exists: {faces_root.exists()}")
|
| 751 |
+
if faces_root.exists():
|
| 752 |
+
existing_files = list(faces_root.glob("*"))
|
| 753 |
+
print(f"[{job_id}] Files in faces_root: {len(existing_files)}")
|
| 754 |
+
for ef in existing_files[:5]:
|
| 755 |
+
print(f"[{job_id}] - {ef.name}")
|
| 756 |
+
|
| 757 |
+
for ci, idxs in sorted(cluster_map.items(), key=lambda x: x[0]):
|
| 758 |
+
char_id = f"char_{ci:02d}"
|
| 759 |
+
print(f"[{job_id}] Processing cluster {char_id} with {len(idxs)} indices: {idxs[:5]}...")
|
| 760 |
+
|
| 761 |
+
if not idxs:
|
| 762 |
+
continue
|
| 763 |
+
|
| 764 |
+
out_dir = chars_dir / char_id
|
| 765 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 766 |
+
|
| 767 |
+
# Select faces to show (half + 1)
|
| 768 |
+
total_faces = len(idxs)
|
| 769 |
+
max_faces_to_show = (total_faces // 2) + 1
|
| 770 |
+
selected_idxs = idxs[:max_faces_to_show]
|
| 771 |
+
|
| 772 |
+
files: List[str] = []
|
| 773 |
+
file_urls: List[str] = []
|
| 774 |
+
|
| 775 |
+
for j in selected_idxs:
|
| 776 |
+
if j >= len(crops_meta):
|
| 777 |
+
print(f"[{job_id}] Index {j} out of range (crops_meta len={len(crops_meta)})")
|
| 778 |
+
continue
|
| 779 |
+
meta = crops_meta[j]
|
| 780 |
+
fname = meta.get("file")
|
| 781 |
+
if not fname:
|
| 782 |
+
print(f"[{job_id}] No filename in meta for index {j}")
|
| 783 |
+
continue
|
| 784 |
+
|
| 785 |
+
src = faces_root / fname
|
| 786 |
+
dst = out_dir / fname
|
| 787 |
+
try:
|
| 788 |
+
if src.exists():
|
| 789 |
+
shutil.copy2(src, dst)
|
| 790 |
+
files.append(fname)
|
| 791 |
+
file_urls.append(f"/files/{video_name}/{char_id}/{fname}")
|
| 792 |
+
else:
|
| 793 |
+
print(f"[{job_id}] Source file not found: {src}")
|
| 794 |
+
except Exception as cp_err:
|
| 795 |
+
print(f"[{job_id}] Error copying {fname}: {cp_err}")
|
| 796 |
+
|
| 797 |
+
# Create representative image
|
| 798 |
+
rep = files[0] if files else None
|
| 799 |
+
if rep:
|
| 800 |
+
try:
|
| 801 |
+
shutil.copy2(out_dir / rep, out_dir / "representative.jpg")
|
| 802 |
+
except Exception:
|
| 803 |
+
pass
|
| 804 |
+
|
| 805 |
+
cluster_number = ci + 1
|
| 806 |
+
character_name = f"Cluster {cluster_number}"
|
| 807 |
+
|
| 808 |
+
characters.append({
|
| 809 |
+
"id": char_id,
|
| 810 |
+
"name": character_name,
|
| 811 |
+
"folder": str(out_dir),
|
| 812 |
+
"num_faces": len(files),
|
| 813 |
+
"total_faces_detected": total_faces,
|
| 814 |
+
"image_url": f"/files/{video_name}/{char_id}/representative.jpg" if rep else "",
|
| 815 |
+
"face_files": file_urls,
|
| 816 |
+
})
|
| 817 |
+
print(f"[{job_id}] ✓ Cluster {char_id}: {len(files)} caras")
|
| 818 |
+
|
| 819 |
+
# Cleanup temp frames
|
| 820 |
+
try:
|
| 821 |
+
shutil.rmtree(frames_dir)
|
| 822 |
+
except Exception:
|
| 823 |
+
pass
|
| 824 |
+
|
| 825 |
+
print(f"[{job_id}] ✓ Total: {len(characters)} personajes")
|
| 826 |
+
|
| 827 |
+
# ============================================================
|
| 828 |
+
# STEP 5: Audio diarization + voice embeddings using ASR space
|
| 829 |
+
# ============================================================
|
| 830 |
+
voice_max_groups = int(job.get("voice_max_groups", 3))
|
| 831 |
+
voice_min_cluster_size = int(job.get("voice_min_cluster_size", 3))
|
| 832 |
+
voice_sensitivity = float(job.get("voice_sensitivity", 0.5))
|
| 833 |
+
|
| 834 |
+
audio_segments: List[Dict[str, Any]] = []
|
| 835 |
+
voice_labels: List[int] = []
|
| 836 |
+
voice_embeddings: List[List[float]] = []
|
| 837 |
+
diarization_info: Dict[str, Any] = {}
|
| 838 |
+
|
| 839 |
+
print(f"[{job_id}] Procesando audio con ASR space...")
|
| 840 |
+
try:
|
| 841 |
+
# Extract audio and diarize
|
| 842 |
+
diar_result = asr_client.extract_audio_and_diarize(video_path)
|
| 843 |
+
clips = diar_result.get("clips", [])
|
| 844 |
+
segments = diar_result.get("segments", [])
|
| 845 |
+
|
| 846 |
+
print(f"[{job_id}] Diarización: {len(clips)} clips, {len(segments)} segmentos")
|
| 847 |
+
|
| 848 |
+
# Save clips locally
|
| 849 |
+
clips_dir = base / "clips"
|
| 850 |
+
clips_dir.mkdir(parents=True, exist_ok=True)
|
| 851 |
+
|
| 852 |
+
for i, clip_info in enumerate(clips if isinstance(clips, list) else []):
|
| 853 |
+
clip_path = clip_info if isinstance(clip_info, str) else clip_info.get("path") if isinstance(clip_info, dict) else None
|
| 854 |
+
if not clip_path:
|
| 855 |
+
continue
|
| 856 |
+
|
| 857 |
+
# Download or copy clip
|
| 858 |
+
local_clip = clips_dir / f"segment_{i:03d}.wav"
|
| 859 |
+
try:
|
| 860 |
+
if isinstance(clip_path, str) and clip_path.startswith("http"):
|
| 861 |
+
import requests
|
| 862 |
+
resp = requests.get(clip_path, timeout=30)
|
| 863 |
+
if resp.status_code == 200:
|
| 864 |
+
with open(local_clip, "wb") as f:
|
| 865 |
+
f.write(resp.content)
|
| 866 |
+
elif isinstance(clip_path, str) and os.path.exists(clip_path):
|
| 867 |
+
shutil.copy2(clip_path, local_clip)
|
| 868 |
+
except Exception as dl_err:
|
| 869 |
+
print(f"[{job_id}] Error guardando clip {i}: {dl_err}")
|
| 870 |
+
continue
|
| 871 |
+
|
| 872 |
+
# Get segment info
|
| 873 |
+
seg_info = segments[i] if i < len(segments) else {}
|
| 874 |
+
speaker = seg_info.get("speaker", f"SPEAKER_{i:02d}")
|
| 875 |
+
|
| 876 |
+
# Get voice embedding for this clip
|
| 877 |
+
emb = asr_client.get_voice_embedding(str(local_clip))
|
| 878 |
+
if emb:
|
| 879 |
+
voice_embeddings.append(emb)
|
| 880 |
+
|
| 881 |
+
audio_segments.append({
|
| 882 |
+
"index": i,
|
| 883 |
+
"clip_path": str(local_clip),
|
| 884 |
+
"clip_url": f"/audio/{video_name}/segment_{i:03d}.wav",
|
| 885 |
+
"speaker": speaker,
|
| 886 |
+
"start": seg_info.get("start", 0),
|
| 887 |
+
"end": seg_info.get("end", 0),
|
| 888 |
+
})
|
| 889 |
+
|
| 890 |
+
print(f"[{job_id}] \u2713 {len(audio_segments)} segmentos de audio procesados")
|
| 891 |
+
|
| 892 |
+
# Cluster voice embeddings
|
| 893 |
+
if voice_embeddings:
|
| 894 |
+
print(f"[{job_id}] Clustering jer\u00e1rquico de voz...")
|
| 895 |
+
print(f"[{job_id}] - voice_embeddings: {len(voice_embeddings)} embeddings")
|
| 896 |
+
print(f"[{job_id}] - par\u00e1metros: voice_max_groups={voice_max_groups}, voice_min_cluster_size={voice_min_cluster_size}")
|
| 897 |
+
Xv = np.array(voice_embeddings)
|
| 898 |
+
print(f"[{job_id}] - shape Xv: {Xv.shape}")
|
| 899 |
+
voice_labels = hierarchical_cluster_with_min_size(
|
| 900 |
+
Xv, voice_max_groups, voice_min_cluster_size, voice_sensitivity
|
| 901 |
+
).tolist()
|
| 902 |
+
n_voice_clusters = len(set([l for l in voice_labels if l >= 0]))
|
| 903 |
+
print(f"[{job_id}] - voice_labels: {voice_labels}")
|
| 904 |
+
print(f"[{job_id}] \u2713 Clustering de voz: {n_voice_clusters} clusters de {len(voice_embeddings)} muestras")
|
| 905 |
+
else:
|
| 906 |
+
print(f"[{job_id}] \u26a0\ufe0f No hay voice_embeddings para clustering")
|
| 907 |
+
|
| 908 |
+
diarization_info = {
|
| 909 |
+
"num_segments": len(audio_segments),
|
| 910 |
+
"num_voice_clusters": len(set([l for l in voice_labels if l >= 0])) if voice_labels else 0,
|
| 911 |
+
}
|
| 912 |
+
|
| 913 |
+
except Exception as audio_err:
|
| 914 |
+
print(f"[{job_id}] Error en procesamiento de audio: {audio_err}")
|
| 915 |
+
import traceback
|
| 916 |
+
traceback.print_exc()
|
| 917 |
+
|
| 918 |
+
job["results"] = {
|
| 919 |
+
"characters": characters,
|
| 920 |
+
"face_labels": labels,
|
| 921 |
+
"audio_segments": audio_segments,
|
| 922 |
+
"voice_labels": voice_labels,
|
| 923 |
+
"diarization_info": diarization_info,
|
| 924 |
+
"video_name": video_name,
|
| 925 |
+
"base_dir": str(base),
|
| 926 |
+
}
|
| 927 |
+
job["status"] = JobStatus.DONE
|
| 928 |
+
print(f"[{job_id}] ✓ Procesamiento completado")
|
| 929 |
+
|
| 930 |
+
except Exception as proc_error:
|
| 931 |
+
print(f"[{job_id}] Error en procesamiento: {proc_error}")
|
| 932 |
+
import traceback
|
| 933 |
+
traceback.print_exc()
|
| 934 |
+
job["results"] = {
|
| 935 |
+
"characters": [], "face_labels": [],
|
| 936 |
+
"audio_segments": [], "voice_labels": [], "diarization_info": {},
|
| 937 |
+
"video_name": video_name, "base_dir": str(base)
|
| 938 |
+
}
|
| 939 |
+
job["status"] = JobStatus.DONE
|
| 940 |
+
|
| 941 |
+
except Exception as e:
|
| 942 |
+
print(f"[{job_id}] Error general: {e}")
|
| 943 |
+
import traceback
|
| 944 |
+
traceback.print_exc()
|
| 945 |
+
job["status"] = JobStatus.FAILED
|
| 946 |
+
job["error"] = str(e)
|