Update app.py
Browse files
app.py
CHANGED
|
@@ -30,11 +30,16 @@ app.add_middleware(
|
|
| 30 |
allow_headers=["*"],
|
| 31 |
)
|
| 32 |
|
| 33 |
-
# Configuration
|
| 34 |
-
OCI_UPLOAD_API_URL = os.getenv("OCI_UPLOAD_API_URL", "
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 36 |
|
| 37 |
print(f"✅ Using device: {DEVICE}")
|
|
|
|
| 38 |
|
| 39 |
# Model configuration
|
| 40 |
MODEL_REPO_ID = "coqui/XTTS-v2"
|
|
@@ -104,12 +109,17 @@ def clean_text(text):
|
|
| 104 |
def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voiceover"):
|
| 105 |
"""Upload file to OCI using your existing API with subfolder support"""
|
| 106 |
try:
|
| 107 |
-
if
|
| 108 |
-
|
| 109 |
-
print("⚠️ OCI upload skipped -
|
| 110 |
return {"status": "skipped", "message": "OCI upload disabled"}, None
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
url = f"{OCI_UPLOAD_API_URL}/api/upload"
|
|
|
|
| 113 |
|
| 114 |
with open(file_path, "rb") as f:
|
| 115 |
files = {"file": (filename, f, "audio/wav")}
|
|
@@ -118,24 +128,55 @@ def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voi
|
|
| 118 |
"subfolder": "voiceover"
|
| 119 |
}
|
| 120 |
|
| 121 |
-
#
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
if response.status_code == 200:
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
else:
|
| 131 |
-
|
|
|
|
|
|
|
| 132 |
|
| 133 |
except requests.exceptions.Timeout:
|
| 134 |
-
|
|
|
|
|
|
|
| 135 |
except requests.exceptions.ConnectionError:
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
except Exception as e:
|
| 138 |
-
|
|
|
|
|
|
|
| 139 |
|
| 140 |
def upload_to_oci_with_retry(file_path: str, filename: str, project_id: str, file_type="voiceover", max_retries=3):
|
| 141 |
"""Upload file to OCI with retry logic"""
|
|
@@ -345,6 +386,62 @@ def load_tts_model(voice_style="default_female"):
|
|
| 345 |
finally:
|
| 346 |
model_loading = False
|
| 347 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
# API endpoints with lazy loading
|
| 349 |
@app.post("/api/tts")
|
| 350 |
async def generate_tts(request: TTSRequest):
|
|
@@ -466,13 +563,14 @@ async def generate_tts(request: TTSRequest):
|
|
| 466 |
print(f"❌ OCI upload failed: {error}")
|
| 467 |
# Still return the local file path if upload fails
|
| 468 |
return {
|
| 469 |
-
"status": "
|
| 470 |
-
"message": f"TTS generated
|
| 471 |
"local_file": output_path,
|
| 472 |
"filename": filename,
|
| 473 |
"file_size": file_size,
|
| 474 |
"voice_style": request.voice_style,
|
| 475 |
-
"model_used": current_model
|
|
|
|
| 476 |
}
|
| 477 |
|
| 478 |
print(f"✅ Upload successful: {filename}")
|
|
@@ -540,19 +638,16 @@ async def batch_generate_tts(request: BatchTTSRequest):
|
|
| 540 |
print(f" Processing text {i+1}/{len(request.texts)}")
|
| 541 |
|
| 542 |
# Generate sequential filename
|
| 543 |
-
|
|
|
|
| 544 |
output_path = f"/tmp/output/{filename}"
|
| 545 |
|
| 546 |
-
#
|
| 547 |
-
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
| 548 |
-
|
| 549 |
-
# Clean the text for each item
|
| 550 |
cleaned_text = clean_text(text)
|
| 551 |
-
print(f"📝 Batch text {i+1}: '{text}' -> '{cleaned_text}'")
|
| 552 |
|
| 553 |
-
# Generate TTS
|
| 554 |
try:
|
| 555 |
-
# Get
|
| 556 |
speaker = None
|
| 557 |
if "vctk/vits" in current_model:
|
| 558 |
speaker_map = {
|
|
@@ -570,173 +665,97 @@ async def batch_generate_tts(request: BatchTTSRequest):
|
|
| 570 |
speaker=speaker
|
| 571 |
)
|
| 572 |
else:
|
| 573 |
-
tts.tts_to_file(
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
|
| 590 |
-
# Save manually
|
| 591 |
-
if not save_wav(audio, output_path):
|
| 592 |
-
raise Exception("Failed to save audio file")
|
| 593 |
-
|
| 594 |
-
except Exception as alt_error:
|
| 595 |
-
print(f"❌ Alternative method also failed: {alt_error}")
|
| 596 |
results.append({
|
| 597 |
-
"
|
| 598 |
-
"
|
| 599 |
-
"
|
| 600 |
-
"
|
|
|
|
| 601 |
})
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
# Verify the file was created
|
| 605 |
-
if not os.path.exists(output_path):
|
| 606 |
results.append({
|
| 607 |
-
"text_index": i,
|
| 608 |
"status": "error",
|
| 609 |
-
"message": f"TTS failed to create output file: {output_path}",
|
| 610 |
-
"filename": filename
|
| 611 |
-
})
|
| 612 |
-
continue
|
| 613 |
-
|
| 614 |
-
file_size = os.path.getsize(output_path)
|
| 615 |
-
print(f"✅ TTS generated: {output_path} ({file_size} bytes)")
|
| 616 |
-
|
| 617 |
-
# Upload to OCI
|
| 618 |
-
upload_result, error = upload_to_oci_with_retry(
|
| 619 |
-
output_path, filename, request.project_id, "voiceover"
|
| 620 |
-
)
|
| 621 |
-
|
| 622 |
-
if error:
|
| 623 |
-
print(f"❌ OCI upload failed for {filename}: {error}")
|
| 624 |
-
results.append({
|
| 625 |
-
"text_index": i,
|
| 626 |
-
"status": "partial_success",
|
| 627 |
-
"message": f"TTS generated but upload failed: {error}",
|
| 628 |
-
"local_file": output_path,
|
| 629 |
-
"filename": filename,
|
| 630 |
-
"file_size": file_size
|
| 631 |
-
})
|
| 632 |
-
else:
|
| 633 |
-
print(f"✅ Upload successful: {filename}")
|
| 634 |
-
results.append({
|
| 635 |
-
"text_index": i,
|
| 636 |
-
"status": "success",
|
| 637 |
-
"message": "TTS generated and uploaded successfully",
|
| 638 |
"filename": filename,
|
| 639 |
-
"
|
| 640 |
-
"
|
| 641 |
})
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
print(f"🧹 Cleaned up local file: {output_path}")
|
| 647 |
-
except Exception as cleanup_error:
|
| 648 |
-
print(f"⚠️ Could not clean up file: {cleanup_error}")
|
| 649 |
|
| 650 |
return {
|
| 651 |
"status": "completed",
|
| 652 |
-
"
|
| 653 |
"results": results,
|
| 654 |
-
"
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
except Exception as e:
|
| 660 |
-
print(f"❌ Batch TTS generation error: {str(e)}")
|
| 661 |
-
raise HTTPException(status_code=500, detail=f"Batch TTS generation failed: {str(e)}")
|
| 662 |
-
|
| 663 |
-
@app.post("/api/upload-voice")
|
| 664 |
-
async def upload_voice_sample(
|
| 665 |
-
project_id: str = Form(...),
|
| 666 |
-
voice_name: str = Form(...),
|
| 667 |
-
file: UploadFile = File(...)
|
| 668 |
-
):
|
| 669 |
-
"""Upload a voice sample for cloning"""
|
| 670 |
-
try:
|
| 671 |
-
print(f"📥 Voice upload request: {voice_name} for project {project_id}")
|
| 672 |
-
|
| 673 |
-
# Check if voice cloning is supported
|
| 674 |
-
if not supports_voice_cloning():
|
| 675 |
-
raise HTTPException(
|
| 676 |
-
status_code=400,
|
| 677 |
-
detail="Voice cloning is not supported with the current model. Please use the XTTS model for voice cloning."
|
| 678 |
-
)
|
| 679 |
-
|
| 680 |
-
# Validate file type
|
| 681 |
-
if not file.filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
|
| 682 |
-
raise HTTPException(status_code=400, detail="Only audio files are allowed")
|
| 683 |
-
|
| 684 |
-
# Save voice sample
|
| 685 |
-
voice_path = f"/tmp/voices/{voice_name}.wav"
|
| 686 |
-
with open(voice_path, "wb") as f:
|
| 687 |
-
content = await file.read()
|
| 688 |
-
f.write(content)
|
| 689 |
-
|
| 690 |
-
print(f"✅ Voice sample saved: {voice_path}")
|
| 691 |
-
|
| 692 |
-
return {
|
| 693 |
-
"status": "success",
|
| 694 |
-
"message": "Voice sample uploaded successfully",
|
| 695 |
-
"voice_name": voice_name,
|
| 696 |
-
"local_path": voice_path
|
| 697 |
}
|
| 698 |
|
| 699 |
except Exception as e:
|
| 700 |
-
print(f"❌
|
| 701 |
-
raise HTTPException(status_code=500, detail=
|
| 702 |
|
| 703 |
@app.post("/api/clone-voice")
|
| 704 |
-
async def
|
| 705 |
project_id: str = Form(...),
|
| 706 |
voice_name: str = Form(...),
|
| 707 |
description: str = Form(""),
|
| 708 |
files: List[UploadFile] = File(...)
|
| 709 |
):
|
| 710 |
-
"""
|
| 711 |
try:
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
# Check if voice cloning is supported
|
| 715 |
-
if not supports_voice_cloning():
|
| 716 |
-
raise HTTPException(
|
| 717 |
-
status_code=400,
|
| 718 |
-
detail="Voice cloning is not supported with the current model. Please use the XTTS model for voice cloning."
|
| 719 |
-
)
|
| 720 |
|
| 721 |
# Save uploaded files temporarily
|
| 722 |
temp_files = []
|
| 723 |
-
for
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
raise HTTPException(status_code=400, detail="Only audio files are allowed")
|
| 727 |
|
| 728 |
-
# Save temporary file
|
| 729 |
temp_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
|
| 730 |
with open(temp_path, "wb") as f:
|
| 731 |
-
|
| 732 |
-
f.write(content)
|
| 733 |
temp_files.append(temp_path)
|
| 734 |
-
print(f" Saved sample {i+1}: {temp_path}")
|
| 735 |
|
| 736 |
# Clone voice
|
| 737 |
success, message = clone_voice(voice_name, temp_files, description)
|
| 738 |
|
| 739 |
-
# Clean up
|
| 740 |
for temp_file in temp_files:
|
| 741 |
try:
|
| 742 |
os.remove(temp_file)
|
|
@@ -747,235 +766,77 @@ async def api_clone_voice(
|
|
| 747 |
return {
|
| 748 |
"status": "success",
|
| 749 |
"message": message,
|
| 750 |
-
"voice_name": voice_name
|
|
|
|
| 751 |
}
|
| 752 |
else:
|
| 753 |
raise HTTPException(status_code=500, detail=message)
|
| 754 |
|
|
|
|
|
|
|
| 755 |
except Exception as e:
|
| 756 |
print(f"❌ Voice cloning error: {str(e)}")
|
| 757 |
raise HTTPException(status_code=500, detail=f"Voice cloning failed: {str(e)}")
|
| 758 |
|
| 759 |
@app.get("/api/voices")
|
| 760 |
async def list_voices():
|
| 761 |
-
"""List available voices"""
|
| 762 |
try:
|
| 763 |
voices_dir = Path("/tmp/voices")
|
| 764 |
-
|
|
|
|
| 765 |
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
samples = list(
|
| 770 |
voices.append({
|
| 771 |
-
"name":
|
| 772 |
"samples_count": len(samples),
|
| 773 |
-
"created_at": datetime.fromtimestamp(
|
| 774 |
-
})
|
| 775 |
-
elif item.is_file() and item.suffix == ".wav":
|
| 776 |
-
# Single voice file (not cloned)
|
| 777 |
-
voices.append({
|
| 778 |
-
"name": item.stem,
|
| 779 |
-
"samples_count": 1,
|
| 780 |
-
"created_at": datetime.fromtimestamp(item.stat().st_ctime).isoformat()
|
| 781 |
})
|
| 782 |
|
| 783 |
-
return {
|
| 784 |
-
"status": "success",
|
| 785 |
-
"voices": voices,
|
| 786 |
-
"voice_cloning_supported": supports_voice_cloning()
|
| 787 |
-
}
|
| 788 |
-
|
| 789 |
except Exception as e:
|
| 790 |
-
print(f"❌ List voices error: {str(e)}")
|
| 791 |
raise HTTPException(status_code=500, detail=f"Failed to list voices: {str(e)}")
|
| 792 |
|
| 793 |
-
@app.get("/api/health")
|
| 794 |
-
async def health_check():
|
| 795 |
-
"""Health check endpoint"""
|
| 796 |
-
return {
|
| 797 |
-
"status": "healthy",
|
| 798 |
-
"tts_loaded": model_loaded,
|
| 799 |
-
"model": current_model,
|
| 800 |
-
"voice_style": current_voice_style,
|
| 801 |
-
"voice_cloning_supported": voice_cloning_supported,
|
| 802 |
-
"device": DEVICE,
|
| 803 |
-
"load_attempts": model_load_attempts,
|
| 804 |
-
"timestamp": datetime.now().isoformat()
|
| 805 |
-
}
|
| 806 |
-
|
| 807 |
-
@app.post("/api/reload-model")
|
| 808 |
-
async def reload_model():
|
| 809 |
-
"""Force reload the TTS model"""
|
| 810 |
-
global tts, model_loaded, current_model, voice_cloning_supported
|
| 811 |
-
|
| 812 |
-
# Clear current model
|
| 813 |
-
tts = None
|
| 814 |
-
model_loaded = False
|
| 815 |
-
current_model = ""
|
| 816 |
-
voice_cloning_supported = False
|
| 817 |
-
|
| 818 |
-
# Try to reload
|
| 819 |
-
success = load_tts_model(current_voice_style)
|
| 820 |
-
|
| 821 |
-
return {
|
| 822 |
-
"status": "success" if success else "error",
|
| 823 |
-
"message": "Model reloaded successfully" if success else "Failed to reload model",
|
| 824 |
-
"model_loaded": model_loaded,
|
| 825 |
-
"model": current_model,
|
| 826 |
-
"voice_style": current_voice_style
|
| 827 |
-
}
|
| 828 |
-
|
| 829 |
@app.post("/api/change-voice")
|
| 830 |
-
async def
|
| 831 |
-
"""Change the
|
| 832 |
-
global tts, model_loaded, current_model, current_voice_style
|
| 833 |
-
|
| 834 |
try:
|
| 835 |
-
|
| 836 |
-
"male_deep": "Deep male voice (VITS p225)",
|
| 837 |
-
"male_medium": "Medium male voice (VITS p226)",
|
| 838 |
-
"female_1": "Female voice 1 (VITS p227)",
|
| 839 |
-
"female_2": "Female voice 2 (VITS p228)",
|
| 840 |
-
"default_female": "Default female voice (Tacotron2)",
|
| 841 |
-
"clear_male": "Clear male voice (Tacotron2)"
|
| 842 |
-
}
|
| 843 |
-
|
| 844 |
-
if request.voice_style not in voice_options:
|
| 845 |
-
return {
|
| 846 |
-
"status": "error",
|
| 847 |
-
"message": f"Invalid voice style. Available: {list(voice_options.keys())}",
|
| 848 |
-
"available_voices": voice_options
|
| 849 |
-
}
|
| 850 |
|
| 851 |
-
print(f"🔄 Changing voice to: {request.voice_style}
|
| 852 |
|
| 853 |
-
#
|
| 854 |
-
tts = None
|
| 855 |
model_loaded = False
|
| 856 |
|
| 857 |
-
|
| 858 |
-
success = load_tts_model(request.voice_style)
|
| 859 |
-
|
| 860 |
-
if success:
|
| 861 |
return {
|
| 862 |
"status": "success",
|
| 863 |
-
"message": f"Voice changed to {
|
| 864 |
-
"
|
| 865 |
-
"
|
| 866 |
}
|
| 867 |
else:
|
| 868 |
-
|
| 869 |
-
"status": "error",
|
| 870 |
-
"message": "Failed to change voice"
|
| 871 |
-
}
|
| 872 |
|
| 873 |
except Exception as e:
|
| 874 |
-
raise HTTPException(status_code=500, detail=
|
| 875 |
-
|
| 876 |
-
@app.get("/api/
|
| 877 |
-
async def
|
| 878 |
-
"""Get
|
| 879 |
-
|
| 880 |
-
"male_deep": "Deep male voice (VITS
|
| 881 |
-
"male_medium": "Medium male voice (VITS
|
| 882 |
-
"female_1": "Female voice 1 (VITS
|
| 883 |
-
"female_2": "Female voice 2 (VITS
|
| 884 |
"default_female": "Default female voice (Tacotron2)",
|
| 885 |
"clear_male": "Clear male voice (Tacotron2)"
|
| 886 |
}
|
| 887 |
-
|
| 888 |
-
return {
|
| 889 |
-
"status": "success",
|
| 890 |
-
"available_voices": voice_options,
|
| 891 |
-
"current_voice": current_voice_style,
|
| 892 |
-
"current_model": current_model
|
| 893 |
-
}
|
| 894 |
-
|
| 895 |
-
@app.get("/api/download/{filename}")
|
| 896 |
-
async def download_file(filename: str):
|
| 897 |
-
"""Download generated audio file directly"""
|
| 898 |
-
try:
|
| 899 |
-
file_path = f"/tmp/output/{filename}"
|
| 900 |
-
|
| 901 |
-
# Security check - only allow .wav files from output directory
|
| 902 |
-
if not filename.endswith('.wav') or '..' in filename or '/' in filename:
|
| 903 |
-
raise HTTPException(status_code=400, detail="Invalid filename")
|
| 904 |
-
|
| 905 |
-
if not os.path.exists(file_path):
|
| 906 |
-
raise HTTPException(status_code=404, detail="File not found")
|
| 907 |
-
|
| 908 |
-
# Get file info
|
| 909 |
-
file_size = os.path.getsize(file_path)
|
| 910 |
-
print(f"📥 Serving download: {filename} ({file_size} bytes)")
|
| 911 |
-
|
| 912 |
-
# Return the audio file
|
| 913 |
-
from fastapi.responses import FileResponse
|
| 914 |
-
return FileResponse(
|
| 915 |
-
path=file_path,
|
| 916 |
-
media_type='audio/wav',
|
| 917 |
-
filename=filename
|
| 918 |
-
)
|
| 919 |
-
|
| 920 |
-
except Exception as e:
|
| 921 |
-
print(f"❌ Download failed: {str(e)}")
|
| 922 |
-
raise HTTPException(status_code=500, detail=f"Download failed: {str(e)}")
|
| 923 |
-
|
| 924 |
-
@app.get("/api/files")
|
| 925 |
-
async def list_files():
|
| 926 |
-
"""List all generated audio files"""
|
| 927 |
-
try:
|
| 928 |
-
files_dir = Path("/tmp/output")
|
| 929 |
-
files = []
|
| 930 |
-
|
| 931 |
-
for file_path in files_dir.glob("*.wav"):
|
| 932 |
-
files.append({
|
| 933 |
-
"name": file_path.name,
|
| 934 |
-
"size": file_path.stat().st_size,
|
| 935 |
-
"created": datetime.fromtimestamp(file_path.stat().st_ctime).isoformat()
|
| 936 |
-
})
|
| 937 |
-
|
| 938 |
-
# Sort by creation time, newest first
|
| 939 |
-
files.sort(key=lambda x: x["created"], reverse=True)
|
| 940 |
-
|
| 941 |
-
return {
|
| 942 |
-
"status": "success",
|
| 943 |
-
"files": files,
|
| 944 |
-
"count": len(files)
|
| 945 |
-
}
|
| 946 |
-
|
| 947 |
-
except Exception as e:
|
| 948 |
-
raise HTTPException(status_code=500, detail=f"Failed to list files: {str(e)}")
|
| 949 |
-
|
| 950 |
-
@app.get("/")
|
| 951 |
-
async def root():
|
| 952 |
-
"""Root endpoint with API information"""
|
| 953 |
-
return {
|
| 954 |
-
"message": "TTS API with Coqui TTS and Voice Cloning",
|
| 955 |
-
"endpoints": {
|
| 956 |
-
"POST /api/tts": "Generate TTS for a single text",
|
| 957 |
-
"POST /api/batch-tts": "Generate TTS for multiple texts",
|
| 958 |
-
"POST /api/upload-voice": "Upload a voice sample for cloning",
|
| 959 |
-
"POST /api/clone-voice": "Clone a voice from multiple samples",
|
| 960 |
-
"GET /api/voices": "List available voices",
|
| 961 |
-
"GET /api/health": "Health check",
|
| 962 |
-
"POST /api/reload-model": "Reload TTS model",
|
| 963 |
-
"POST /api/change-voice": "Change voice style",
|
| 964 |
-
"GET /api/available-voices": "Get available voice options",
|
| 965 |
-
"GET /api/download/{filename}": "Download generated audio",
|
| 966 |
-
"GET /api/files": "List generated files"
|
| 967 |
-
},
|
| 968 |
-
"model_loaded": model_loaded,
|
| 969 |
-
"model_name": current_model if model_loaded else "None",
|
| 970 |
-
"current_voice_style": current_voice_style,
|
| 971 |
-
"voice_cloning_supported": supports_voice_cloning()
|
| 972 |
-
}
|
| 973 |
|
| 974 |
if __name__ == "__main__":
|
| 975 |
import uvicorn
|
| 976 |
-
|
| 977 |
-
print("📊 API endpoints available at: http://localhost:7860/")
|
| 978 |
-
print("💡 Model will be loaded on first request to save memory")
|
| 979 |
-
print("🎙️ Voice selection feature enabled")
|
| 980 |
-
print("🔄 Use /api/reload-model to force reload if needed")
|
| 981 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 30 |
allow_headers=["*"],
|
| 31 |
)
|
| 32 |
|
| 33 |
+
# Configuration - FIXED OCI URL HANDLING
|
| 34 |
+
OCI_UPLOAD_API_URL = os.getenv("OCI_UPLOAD_API_URL", "").strip()
|
| 35 |
+
if OCI_UPLOAD_API_URL:
|
| 36 |
+
# Remove trailing slash if present
|
| 37 |
+
OCI_UPLOAD_API_URL = OCI_UPLOAD_API_URL.rstrip('/')
|
| 38 |
+
|
| 39 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 40 |
|
| 41 |
print(f"✅ Using device: {DEVICE}")
|
| 42 |
+
print(f"🔧 OCI Upload URL: {OCI_UPLOAD_API_URL or 'Not configured - uploads will be local only'}")
|
| 43 |
|
| 44 |
# Model configuration
|
| 45 |
MODEL_REPO_ID = "coqui/XTTS-v2"
|
|
|
|
| 109 |
def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voiceover"):
|
| 110 |
"""Upload file to OCI using your existing API with subfolder support"""
|
| 111 |
try:
|
| 112 |
+
# Check if OCI URL is properly configured
|
| 113 |
+
if not OCI_UPLOAD_API_URL:
|
| 114 |
+
print("⚠️ OCI upload skipped - OCI_UPLOAD_API_URL not configured")
|
| 115 |
return {"status": "skipped", "message": "OCI upload disabled"}, None
|
| 116 |
|
| 117 |
+
# Validate URL format
|
| 118 |
+
if not OCI_UPLOAD_API_URL.startswith(('http://', 'https://')):
|
| 119 |
+
return None, f"Invalid OCI URL format: {OCI_UPLOAD_API_URL}"
|
| 120 |
+
|
| 121 |
url = f"{OCI_UPLOAD_API_URL}/api/upload"
|
| 122 |
+
print(f"🔗 Attempting upload to: {url}")
|
| 123 |
|
| 124 |
with open(file_path, "rb") as f:
|
| 125 |
files = {"file": (filename, f, "audio/wav")}
|
|
|
|
| 128 |
"subfolder": "voiceover"
|
| 129 |
}
|
| 130 |
|
| 131 |
+
# Add headers and better timeout handling
|
| 132 |
+
headers = {
|
| 133 |
+
"User-Agent": "TTS-API/1.0",
|
| 134 |
+
"Accept": "application/json"
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
response = requests.post(
|
| 138 |
+
url,
|
| 139 |
+
files=files,
|
| 140 |
+
data=data,
|
| 141 |
+
headers=headers,
|
| 142 |
+
timeout=30
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
print(f"📡 Upload response status: {response.status_code}")
|
| 146 |
|
| 147 |
if response.status_code == 200:
|
| 148 |
+
try:
|
| 149 |
+
result = response.json()
|
| 150 |
+
if result.get("status") == "success":
|
| 151 |
+
print(f"✅ Upload successful: {result}")
|
| 152 |
+
return result, None
|
| 153 |
+
else:
|
| 154 |
+
error_msg = result.get("message", "Upload failed - unknown error")
|
| 155 |
+
print(f"❌ Upload failed: {error_msg}")
|
| 156 |
+
return None, error_msg
|
| 157 |
+
except ValueError as e:
|
| 158 |
+
return None, f"Invalid JSON response: {str(e)}"
|
| 159 |
else:
|
| 160 |
+
error_msg = f"Upload failed with status {response.status_code}: {response.text}"
|
| 161 |
+
print(f"❌ {error_msg}")
|
| 162 |
+
return None, error_msg
|
| 163 |
|
| 164 |
except requests.exceptions.Timeout:
|
| 165 |
+
error_msg = "OCI upload timeout - server took too long to respond"
|
| 166 |
+
print(f"❌ {error_msg}")
|
| 167 |
+
return None, error_msg
|
| 168 |
except requests.exceptions.ConnectionError:
|
| 169 |
+
error_msg = f"Cannot connect to OCI API at {OCI_UPLOAD_API_URL} - check if the server is running and accessible"
|
| 170 |
+
print(f"❌ {error_msg}")
|
| 171 |
+
return None, error_msg
|
| 172 |
+
except requests.exceptions.RequestException as e:
|
| 173 |
+
error_msg = f"Network error during upload: {str(e)}"
|
| 174 |
+
print(f"❌ {error_msg}")
|
| 175 |
+
return None, error_msg
|
| 176 |
except Exception as e:
|
| 177 |
+
error_msg = f"Unexpected upload error: {str(e)}"
|
| 178 |
+
print(f"❌ {error_msg}")
|
| 179 |
+
return None, error_msg
|
| 180 |
|
| 181 |
def upload_to_oci_with_retry(file_path: str, filename: str, project_id: str, file_type="voiceover", max_retries=3):
|
| 182 |
"""Upload file to OCI with retry logic"""
|
|
|
|
| 386 |
finally:
|
| 387 |
model_loading = False
|
| 388 |
|
| 389 |
+
# Health check endpoint
|
| 390 |
+
@app.get("/")
|
| 391 |
+
async def root():
|
| 392 |
+
return {
|
| 393 |
+
"status": "running",
|
| 394 |
+
"service": "TTS API",
|
| 395 |
+
"model_loaded": model_loaded,
|
| 396 |
+
"current_model": current_model,
|
| 397 |
+
"device": DEVICE,
|
| 398 |
+
"oci_configured": bool(OCI_UPLOAD_API_URL)
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
@app.get("/api/health")
|
| 402 |
+
async def health_check():
|
| 403 |
+
"""Health check endpoint"""
|
| 404 |
+
return {
|
| 405 |
+
"status": "healthy",
|
| 406 |
+
"model_loaded": model_loaded,
|
| 407 |
+
"current_model": current_model,
|
| 408 |
+
"device": DEVICE,
|
| 409 |
+
"timestamp": datetime.now().isoformat()
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
@app.get("/api/oci-health")
|
| 413 |
+
async def check_oci_health():
|
| 414 |
+
"""Check if OCI upload service is available"""
|
| 415 |
+
if not OCI_UPLOAD_API_URL:
|
| 416 |
+
return {
|
| 417 |
+
"status": "not_configured",
|
| 418 |
+
"message": "OCI_UPLOAD_API_URL environment variable is not set"
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
try:
|
| 422 |
+
# Test connection to OCI service
|
| 423 |
+
test_url = f"{OCI_UPLOAD_API_URL}/api/health"
|
| 424 |
+
response = requests.get(test_url, timeout=10)
|
| 425 |
+
|
| 426 |
+
if response.status_code == 200:
|
| 427 |
+
return {
|
| 428 |
+
"status": "healthy",
|
| 429 |
+
"oci_url": OCI_UPLOAD_API_URL,
|
| 430 |
+
"message": "OCI service is reachable"
|
| 431 |
+
}
|
| 432 |
+
else:
|
| 433 |
+
return {
|
| 434 |
+
"status": "unhealthy",
|
| 435 |
+
"oci_url": OCI_UPLOAD_API_URL,
|
| 436 |
+
"message": f"OCI service returned status {response.status_code}"
|
| 437 |
+
}
|
| 438 |
+
except Exception as e:
|
| 439 |
+
return {
|
| 440 |
+
"status": "error",
|
| 441 |
+
"oci_url": OCI_UPLOAD_API_URL,
|
| 442 |
+
"message": f"Cannot connect to OCI service: {str(e)}"
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
# API endpoints with lazy loading
|
| 446 |
@app.post("/api/tts")
|
| 447 |
async def generate_tts(request: TTSRequest):
|
|
|
|
| 563 |
print(f"❌ OCI upload failed: {error}")
|
| 564 |
# Still return the local file path if upload fails
|
| 565 |
return {
|
| 566 |
+
"status": "success_local",
|
| 567 |
+
"message": f"TTS generated locally (upload failed: {error})",
|
| 568 |
"local_file": output_path,
|
| 569 |
"filename": filename,
|
| 570 |
"file_size": file_size,
|
| 571 |
"voice_style": request.voice_style,
|
| 572 |
+
"model_used": current_model,
|
| 573 |
+
"oci_upload_error": error
|
| 574 |
}
|
| 575 |
|
| 576 |
print(f"✅ Upload successful: {filename}")
|
|
|
|
| 638 |
print(f" Processing text {i+1}/{len(request.texts)}")
|
| 639 |
|
| 640 |
# Generate sequential filename
|
| 641 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 642 |
+
filename = f"voiceover_{timestamp}_{i+1:03d}.wav"
|
| 643 |
output_path = f"/tmp/output/{filename}"
|
| 644 |
|
| 645 |
+
# Clean text
|
|
|
|
|
|
|
|
|
|
| 646 |
cleaned_text = clean_text(text)
|
|
|
|
| 647 |
|
| 648 |
+
# Generate TTS
|
| 649 |
try:
|
| 650 |
+
# Get speaker for VITS models
|
| 651 |
speaker = None
|
| 652 |
if "vctk/vits" in current_model:
|
| 653 |
speaker_map = {
|
|
|
|
| 665 |
speaker=speaker
|
| 666 |
)
|
| 667 |
else:
|
| 668 |
+
tts.tts_to_file(text=cleaned_text, file_path=output_path)
|
| 669 |
+
|
| 670 |
+
# Verify file was created
|
| 671 |
+
if not os.path.exists(output_path):
|
| 672 |
+
raise Exception(f"Failed to create output file: {output_path}")
|
| 673 |
+
|
| 674 |
+
file_size = os.path.getsize(output_path)
|
| 675 |
+
|
| 676 |
+
# Upload to OCI
|
| 677 |
+
upload_result, error = upload_to_oci_with_retry(
|
| 678 |
+
output_path, filename, request.project_id, "voiceover"
|
| 679 |
+
)
|
| 680 |
+
|
| 681 |
+
if error:
|
| 682 |
+
results.append({
|
| 683 |
+
"status": "success_local",
|
| 684 |
+
"filename": filename,
|
| 685 |
+
"local_file": output_path,
|
| 686 |
+
"file_size": file_size,
|
| 687 |
+
"upload_error": error,
|
| 688 |
+
"text_index": i
|
| 689 |
+
})
|
| 690 |
+
else:
|
| 691 |
+
# Clean up local file on successful upload
|
| 692 |
+
try:
|
| 693 |
+
os.remove(output_path)
|
| 694 |
+
except:
|
| 695 |
+
pass
|
| 696 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
results.append({
|
| 698 |
+
"status": "success",
|
| 699 |
+
"filename": filename,
|
| 700 |
+
"oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}"),
|
| 701 |
+
"file_size": file_size,
|
| 702 |
+
"text_index": i
|
| 703 |
})
|
| 704 |
+
|
| 705 |
+
except Exception as e:
|
|
|
|
|
|
|
| 706 |
results.append({
|
|
|
|
| 707 |
"status": "error",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 708 |
"filename": filename,
|
| 709 |
+
"error": str(e),
|
| 710 |
+
"text_index": i
|
| 711 |
})
|
| 712 |
+
|
| 713 |
+
# Count successes and errors
|
| 714 |
+
success_count = len([r for r in results if r["status"] in ["success", "success_local"]])
|
| 715 |
+
error_count = len([r for r in results if r["status"] == "error"])
|
|
|
|
|
|
|
|
|
|
| 716 |
|
| 717 |
return {
|
| 718 |
"status": "completed",
|
| 719 |
+
"message": f"Processed {len(request.texts)} texts: {success_count} successful, {error_count} errors",
|
| 720 |
"results": results,
|
| 721 |
+
"summary": {
|
| 722 |
+
"total": len(request.texts),
|
| 723 |
+
"successful": success_count,
|
| 724 |
+
"errors": error_count
|
| 725 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 726 |
}
|
| 727 |
|
| 728 |
except Exception as e:
|
| 729 |
+
print(f"❌ Batch TTS error: {str(e)}")
|
| 730 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 731 |
|
| 732 |
@app.post("/api/clone-voice")
|
| 733 |
+
async def clone_voice_endpoint(
|
| 734 |
project_id: str = Form(...),
|
| 735 |
voice_name: str = Form(...),
|
| 736 |
description: str = Form(""),
|
| 737 |
files: List[UploadFile] = File(...)
|
| 738 |
):
|
| 739 |
+
"""Clone a voice from uploaded audio samples"""
|
| 740 |
try:
|
| 741 |
+
if not files:
|
| 742 |
+
raise HTTPException(status_code=400, detail="No audio files provided")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 743 |
|
| 744 |
# Save uploaded files temporarily
|
| 745 |
temp_files = []
|
| 746 |
+
for file in files:
|
| 747 |
+
if not file.filename.lower().endswith(('.wav', '.mp3', '.flac')):
|
| 748 |
+
raise HTTPException(status_code=400, detail="Only WAV, MP3, and FLAC files are supported")
|
|
|
|
| 749 |
|
|
|
|
| 750 |
temp_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
|
| 751 |
with open(temp_path, "wb") as f:
|
| 752 |
+
shutil.copyfileobj(file.file, f)
|
|
|
|
| 753 |
temp_files.append(temp_path)
|
|
|
|
| 754 |
|
| 755 |
# Clone voice
|
| 756 |
success, message = clone_voice(voice_name, temp_files, description)
|
| 757 |
|
| 758 |
+
# Clean up temp files
|
| 759 |
for temp_file in temp_files:
|
| 760 |
try:
|
| 761 |
os.remove(temp_file)
|
|
|
|
| 766 |
return {
|
| 767 |
"status": "success",
|
| 768 |
"message": message,
|
| 769 |
+
"voice_name": voice_name,
|
| 770 |
+
"samples_used": len(temp_files)
|
| 771 |
}
|
| 772 |
else:
|
| 773 |
raise HTTPException(status_code=500, detail=message)
|
| 774 |
|
| 775 |
+
except HTTPException:
|
| 776 |
+
raise
|
| 777 |
except Exception as e:
|
| 778 |
print(f"❌ Voice cloning error: {str(e)}")
|
| 779 |
raise HTTPException(status_code=500, detail=f"Voice cloning failed: {str(e)}")
|
| 780 |
|
| 781 |
@app.get("/api/voices")
|
| 782 |
async def list_voices():
|
| 783 |
+
"""List all available cloned voices"""
|
| 784 |
try:
|
| 785 |
voices_dir = Path("/tmp/voices")
|
| 786 |
+
if not voices_dir.exists():
|
| 787 |
+
return {"voices": []}
|
| 788 |
|
| 789 |
+
voices = []
|
| 790 |
+
for voice_dir in voices_dir.iterdir():
|
| 791 |
+
if voice_dir.is_dir():
|
| 792 |
+
samples = list(voice_dir.glob("sample_*.wav"))
|
| 793 |
voices.append({
|
| 794 |
+
"name": voice_dir.name,
|
| 795 |
"samples_count": len(samples),
|
| 796 |
+
"created_at": datetime.fromtimestamp(voice_dir.stat().st_mtime).isoformat()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 797 |
})
|
| 798 |
|
| 799 |
+
return {"voices": voices}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 800 |
except Exception as e:
|
|
|
|
| 801 |
raise HTTPException(status_code=500, detail=f"Failed to list voices: {str(e)}")
|
| 802 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 803 |
@app.post("/api/change-voice")
|
| 804 |
+
async def change_voice_style(request: ChangeVoiceRequest):
|
| 805 |
+
"""Change the voice style (reloads model)"""
|
|
|
|
|
|
|
| 806 |
try:
|
| 807 |
+
global model_loaded
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 808 |
|
| 809 |
+
print(f"🔄 Changing voice style to: {request.voice_style}")
|
| 810 |
|
| 811 |
+
# Reset model loaded flag to force reload
|
|
|
|
| 812 |
model_loaded = False
|
| 813 |
|
| 814 |
+
if load_tts_model(request.voice_style):
|
|
|
|
|
|
|
|
|
|
| 815 |
return {
|
| 816 |
"status": "success",
|
| 817 |
+
"message": f"Voice style changed to {request.voice_style}",
|
| 818 |
+
"current_voice_style": current_voice_style,
|
| 819 |
+
"current_model": current_model
|
| 820 |
}
|
| 821 |
else:
|
| 822 |
+
raise HTTPException(status_code=500, detail="Failed to load new voice style")
|
|
|
|
|
|
|
|
|
|
| 823 |
|
| 824 |
except Exception as e:
|
| 825 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 826 |
+
|
| 827 |
+
@app.get("/api/voice-styles")
|
| 828 |
+
async def get_voice_styles():
|
| 829 |
+
"""Get available voice styles"""
|
| 830 |
+
styles = {
|
| 831 |
+
"male_deep": "Deep male voice (VITS)",
|
| 832 |
+
"male_medium": "Medium male voice (VITS)",
|
| 833 |
+
"female_1": "Female voice 1 (VITS)",
|
| 834 |
+
"female_2": "Female voice 2 (VITS)",
|
| 835 |
"default_female": "Default female voice (Tacotron2)",
|
| 836 |
"clear_male": "Clear male voice (Tacotron2)"
|
| 837 |
}
|
| 838 |
+
return {"voice_styles": styles}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 839 |
|
| 840 |
if __name__ == "__main__":
|
| 841 |
import uvicorn
|
| 842 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|