fix
Browse files- .dockerignore +13 -0
- Dockerfile +22 -0
- app.py +116 -0
- app_ori.py +108 -0
- backend +0 -1
- fish_audio.py +53 -0
- notes +4 -0
- requirements.txt +7 -0
.dockerignore
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.git
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.pyd
|
| 6 |
+
.DS_Store
|
| 7 |
+
uploads/
|
| 8 |
+
outputs_v2/
|
| 9 |
+
*.mp3
|
| 10 |
+
*.wav
|
| 11 |
+
notes
|
| 12 |
+
app_ori.py
|
| 13 |
+
.gitignore
|
Dockerfile
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use a Python 3.10 runtime as a parent image for fish-audio-sdk compatibility
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory in the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy the requirements file into the container
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
|
| 10 |
+
# Install any needed packages specified in requirements.txt
|
| 11 |
+
# We also install ffmpeg which is required for many audio operations
|
| 12 |
+
RUN apt-get update && apt-get install -y ffmpeg && rm -rf /var/lib/apt/lists/*
|
| 13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 14 |
+
|
| 15 |
+
# Copy the rest of your application's code into the container
|
| 16 |
+
COPY . .
|
| 17 |
+
|
| 18 |
+
# Make port 7860 available to the world outside this container
|
| 19 |
+
EXPOSE 7860
|
| 20 |
+
|
| 21 |
+
# Command to run the application using gunicorn, a production-ready server
|
| 22 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--timeout", "600", "app:app"]
|
app.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify, send_file
|
| 2 |
+
import os
|
| 3 |
+
import requests
|
| 4 |
+
from bs4 import BeautifulSoup
|
| 5 |
+
import ebooklib
|
| 6 |
+
from ebooklib import epub
|
| 7 |
+
from urllib.parse import urlparse
|
| 8 |
+
import io
|
| 9 |
+
from fish_audio import clone_voice_with_fish
|
| 10 |
+
import uuid
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
+
# Load environment variables from a .env file if it exists.
|
| 14 |
+
# This is particularly useful for local development.
|
| 15 |
+
load_dotenv()
|
| 16 |
+
|
| 17 |
+
app = Flask(__name__)
|
| 18 |
+
|
| 19 |
+
def get_text_from_url(url):
|
| 20 |
+
try:
|
| 21 |
+
response = requests.get(url)
|
| 22 |
+
response.raise_for_status() # Raise an exception for bad status codes
|
| 23 |
+
|
| 24 |
+
content_type = response.headers.get('content-type')
|
| 25 |
+
|
| 26 |
+
if 'epub' in content_type:
|
| 27 |
+
book = epub.read_epub(io.BytesIO(response.content))
|
| 28 |
+
text_content = ""
|
| 29 |
+
for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
|
| 30 |
+
soup = BeautifulSoup(item.get_body_content(), 'html.parser')
|
| 31 |
+
text_content += soup.get_text() + "\n"
|
| 32 |
+
return text_content
|
| 33 |
+
elif 'html' in content_type:
|
| 34 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 35 |
+
return soup.get_text()
|
| 36 |
+
elif 'text' in content_type:
|
| 37 |
+
return response.text
|
| 38 |
+
else:
|
| 39 |
+
# Fallback for other content types or if content-type is not specific
|
| 40 |
+
# You might want to add more sophisticated handling here
|
| 41 |
+
return response.text
|
| 42 |
+
except requests.exceptions.RequestException as e:
|
| 43 |
+
print(f"Error fetching URL: {e}")
|
| 44 |
+
return None
|
| 45 |
+
|
| 46 |
+
def is_url(string):
|
| 47 |
+
try:
|
| 48 |
+
result = urlparse(string)
|
| 49 |
+
return all([result.scheme, result.netloc])
|
| 50 |
+
except ValueError:
|
| 51 |
+
return False
|
| 52 |
+
|
| 53 |
+
@app.route('/api/voice-transfer', methods=['POST'])
|
| 54 |
+
def voice_transfer():
|
| 55 |
+
if 'voice_file' not in request.files:
|
| 56 |
+
return jsonify({"error": "No voice file part"}), 400
|
| 57 |
+
|
| 58 |
+
file = request.files['voice_file']
|
| 59 |
+
|
| 60 |
+
if file.filename == '':
|
| 61 |
+
return jsonify({"error": "No selected file"}), 400
|
| 62 |
+
|
| 63 |
+
if file:
|
| 64 |
+
# Save the uploaded file temporarily
|
| 65 |
+
# In a production environment, you'd want a more robust storage solution
|
| 66 |
+
upload_folder = 'uploads'
|
| 67 |
+
if not os.path.exists(upload_folder):
|
| 68 |
+
os.makedirs(upload_folder)
|
| 69 |
+
|
| 70 |
+
voice_file_path = os.path.join(upload_folder, file.filename)
|
| 71 |
+
file.save(voice_file_path)
|
| 72 |
+
|
| 73 |
+
text_input = request.form.get('text')
|
| 74 |
+
print(text_input)
|
| 75 |
+
if not text_input:
|
| 76 |
+
return jsonify({"error": "No text or text_url provided"}), 400
|
| 77 |
+
|
| 78 |
+
text_content = ""
|
| 79 |
+
if is_url(text_input):
|
| 80 |
+
text_content = get_text_from_url(text_input)
|
| 81 |
+
if text_content is None:
|
| 82 |
+
return jsonify({"error": "Failed to retrieve or parse content from URL"}), 400
|
| 83 |
+
else:
|
| 84 |
+
print("Not URL")
|
| 85 |
+
text_content = text_input
|
| 86 |
+
|
| 87 |
+
# --- Perform Voice Cloning using Fish Audio ---
|
| 88 |
+
print("Starting voice cloning process with Fish Audio...")
|
| 89 |
+
|
| 90 |
+
output_dir = 'outputs_v2'
|
| 91 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 92 |
+
output_filename = f"output_cloned_{uuid.uuid4().hex}.mp3"
|
| 93 |
+
output_file_path = os.path.join(output_dir, output_filename)
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
# Note: For best results, provide an accurate transcript of the reference audio.
|
| 97 |
+
# Since we don't get it from the user, we can pass a generic placeholder or an empty string.
|
| 98 |
+
clone_voice_with_fish(
|
| 99 |
+
text=text_content,
|
| 100 |
+
reference_audio_path=voice_file_path,
|
| 101 |
+
output_path=output_file_path,
|
| 102 |
+
reference_text="This is a reference audio for voice cloning."
|
| 103 |
+
)
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"Error during voice cloning with Fish Audio: {e}")
|
| 106 |
+
return jsonify({"error": "Failed to generate voice file."}), 500
|
| 107 |
+
|
| 108 |
+
try:
|
| 109 |
+
return send_file(output_file_path, as_attachment=True)
|
| 110 |
+
except Exception as e:
|
| 111 |
+
app.logger.error(f"Error sending file: {e}")
|
| 112 |
+
return jsonify({"error": "Failed to send audio file."}), 500
|
| 113 |
+
|
| 114 |
+
# if __name__ == '__main__':
|
| 115 |
+
# app.run(debug=True, port=5001)
|
| 116 |
+
|
app_ori.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify
|
| 2 |
+
import os
|
| 3 |
+
import requests
|
| 4 |
+
from bs4 import BeautifulSoup
|
| 5 |
+
import ebooklib
|
| 6 |
+
from ebooklib import epub
|
| 7 |
+
from urllib.parse import urlparse
|
| 8 |
+
import io
|
| 9 |
+
import os
|
| 10 |
+
import torch
|
| 11 |
+
from openvoice import se_extractor
|
| 12 |
+
from openvoice.api import ToneColorConverter
|
| 13 |
+
from melo.api import TTS
|
| 14 |
+
from voice_cloning import VoiceCloningManager
|
| 15 |
+
|
| 16 |
+
app = Flask(__name__)
|
| 17 |
+
|
| 18 |
+
# --- Voice Cloning Setup ---
|
| 19 |
+
# Initialize the VoiceCloningManager once when the app starts.
|
| 20 |
+
# This loads the models into memory so they don't have to be reloaded for each request.
|
| 21 |
+
print("Initializing Voice Cloning Manager...")
|
| 22 |
+
voice_cloning_manager = VoiceCloningManager()
|
| 23 |
+
print("Voice Cloning Manager Initialized.")
|
| 24 |
+
# -------------------------
|
| 25 |
+
|
| 26 |
+
def get_text_from_url(url):
|
| 27 |
+
try:
|
| 28 |
+
response = requests.get(url)
|
| 29 |
+
response.raise_for_status() # Raise an exception for bad status codes
|
| 30 |
+
|
| 31 |
+
content_type = response.headers.get('content-type')
|
| 32 |
+
|
| 33 |
+
if 'epub' in content_type:
|
| 34 |
+
book = epub.read_epub(io.BytesIO(response.content))
|
| 35 |
+
text_content = ""
|
| 36 |
+
for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
|
| 37 |
+
soup = BeautifulSoup(item.get_body_content(), 'html.parser')
|
| 38 |
+
text_content += soup.get_text() + "\n"
|
| 39 |
+
return text_content
|
| 40 |
+
elif 'html' in content_type:
|
| 41 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 42 |
+
return soup.get_text()
|
| 43 |
+
elif 'text' in content_type:
|
| 44 |
+
return response.text
|
| 45 |
+
else:
|
| 46 |
+
# Fallback for other content types or if content-type is not specific
|
| 47 |
+
# You might want to add more sophisticated handling here
|
| 48 |
+
return response.text
|
| 49 |
+
except requests.exceptions.RequestException as e:
|
| 50 |
+
print(f"Error fetching URL: {e}")
|
| 51 |
+
return None
|
| 52 |
+
|
| 53 |
+
def is_url(string):
|
| 54 |
+
try:
|
| 55 |
+
result = urlparse(string)
|
| 56 |
+
return all([result.scheme, result.netloc])
|
| 57 |
+
except ValueError:
|
| 58 |
+
return False
|
| 59 |
+
|
| 60 |
+
@app.route('/api/voice-transfer', methods=['POST'])
|
| 61 |
+
def voice_transfer():
|
| 62 |
+
if 'voice_file' not in request.files:
|
| 63 |
+
return jsonify({"error": "No voice file part"}), 400
|
| 64 |
+
|
| 65 |
+
file = request.files['voice_file']
|
| 66 |
+
|
| 67 |
+
if file.filename == '':
|
| 68 |
+
return jsonify({"error": "No selected file"}), 400
|
| 69 |
+
|
| 70 |
+
if file:
|
| 71 |
+
# Save the uploaded file temporarily
|
| 72 |
+
# In a production environment, you'd want a more robust storage solution
|
| 73 |
+
upload_folder = 'uploads'
|
| 74 |
+
if not os.path.exists(upload_folder):
|
| 75 |
+
os.makedirs(upload_folder)
|
| 76 |
+
|
| 77 |
+
voice_file_path = os.path.join(upload_folder, file.filename)
|
| 78 |
+
file.save(voice_file_path)
|
| 79 |
+
|
| 80 |
+
text_input = request.form.get('text')
|
| 81 |
+
print(text_input)
|
| 82 |
+
if not text_input:
|
| 83 |
+
return jsonify({"error": "No text or text_url provided"}), 400
|
| 84 |
+
|
| 85 |
+
text_content = ""
|
| 86 |
+
if is_url(text_input):
|
| 87 |
+
text_content = get_text_from_url(text_input)
|
| 88 |
+
if text_content is None:
|
| 89 |
+
return jsonify({"error": "Failed to retrieve or parse content from URL"}), 400
|
| 90 |
+
else:
|
| 91 |
+
print("Not URL")
|
| 92 |
+
text_content = text_input
|
| 93 |
+
|
| 94 |
+
# --- Perform Voice Cloning ---
|
| 95 |
+
print("Starting voice cloning process...")
|
| 96 |
+
# Call the new manager to generate the audio
|
| 97 |
+
output_file_path = voice_cloning_manager.generate_cloned_voice_audio(text_content, voice_file_path)
|
| 98 |
+
|
| 99 |
+
if output_file_path is None:
|
| 100 |
+
return jsonify({"error": "Failed to generate voice file."}), 500
|
| 101 |
+
|
| 102 |
+
return jsonify({
|
| 103 |
+
"message": "Voice transfer process completed successfully.",
|
| 104 |
+
"output_file": output_file_path
|
| 105 |
+
})
|
| 106 |
+
|
| 107 |
+
if __name__ == '__main__':
|
| 108 |
+
app.run(debug=True, port=5001)
|
backend
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit b6b29998fd7e22d730dbb8f7e6def9b338692082
|
|
|
|
|
|
fish_audio.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
from fish_audio_sdk import Session, TTSRequest, ReferenceAudio
|
| 4 |
+
|
| 5 |
+
def clone_voice_with_fish(text: str, reference_audio_path: str, output_path: str, reference_text: str = "Text in reference audio"):
|
| 6 |
+
"""
|
| 7 |
+
Generates speech with a cloned voice using the Fish Audio API.
|
| 8 |
+
|
| 9 |
+
:param text: The text to be converted to speech.
|
| 10 |
+
:param reference_audio_path: Path to the reference audio file for voice cloning.
|
| 11 |
+
:param output_path: Path to save the generated audio file.
|
| 12 |
+
:param reference_text: The transcription of the reference audio. This is important for better quality.
|
| 13 |
+
"""
|
| 14 |
+
# Restrict the input text to the first 450 characters.
|
| 15 |
+
if len(text) > 100:
|
| 16 |
+
text = '.'.join(text.split('.')[15:16])[:100]
|
| 17 |
+
|
| 18 |
+
# Remove special words (e.g., __Gutenberg__, _very_)
|
| 19 |
+
text = re.sub(r'__\w+__\s*|_\w+_\s*', '', text)
|
| 20 |
+
print(text)
|
| 21 |
+
|
| 22 |
+
# Load the API key from an environment variable for security.
|
| 23 |
+
api_key = os.getenv("FISH_AUDIO_API_KEY")
|
| 24 |
+
if not api_key:
|
| 25 |
+
raise ValueError("FISH_AUDIO_API_KEY environment variable not set.")
|
| 26 |
+
|
| 27 |
+
session = Session(api_key)
|
| 28 |
+
|
| 29 |
+
with open(reference_audio_path, "rb") as audio_file:
|
| 30 |
+
with open(output_path, "wb") as f:
|
| 31 |
+
for chunk in session.tts(TTSRequest(
|
| 32 |
+
text=text,
|
| 33 |
+
backend='s1',
|
| 34 |
+
references=[
|
| 35 |
+
ReferenceAudio(
|
| 36 |
+
audio=audio_file.read(),
|
| 37 |
+
text=reference_text,
|
| 38 |
+
)
|
| 39 |
+
]
|
| 40 |
+
)):
|
| 41 |
+
f.write(chunk)
|
| 42 |
+
print("File output to: ", output_path)
|
| 43 |
+
|
| 44 |
+
if __name__ == '__main__':
|
| 45 |
+
# This is an example of how to use the function.
|
| 46 |
+
# You would import clone_voice_with_fish from this file into your app.py.
|
| 47 |
+
clone_voice_with_fish(
|
| 48 |
+
text="The water's writing engraves the rocks like the graphite from my pencil engraves this paper.",
|
| 49 |
+
reference_audio_path="example_reference_elon.mp3",
|
| 50 |
+
output_path="output_fish_clone.mp3",
|
| 51 |
+
# It's best to have an accurate transcript of the reference audio for better results.
|
| 52 |
+
reference_text="My name is Elon Musk."
|
| 53 |
+
)
|
notes
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
curl -X POST -F "voice_file=@backend/example_reference_kx.mp3" -F "text=http://www.gutenberg.org/files/11/11-0.txt" http://127.0.0.1:5001/api/voice-transfer --output cloned_voice.mp3
|
| 2 |
+
|
| 3 |
+
curl -X POST -F "voice_file=@backend/kx.mp3" -F "text=宝贝晚上好,你吃饱了吗?要吃鲍鱼吗?" http://127.0.0.1:5001/api/voice-transfer --output cloned_voice.mp3
|
| 4 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Flask
|
| 2 |
+
requests
|
| 3 |
+
beautifulsoup4
|
| 4 |
+
ebooklib
|
| 5 |
+
gunicorn
|
| 6 |
+
fish-audio-sdk
|
| 7 |
+
python-dotenv
|