Chloe commited on
Commit
ef9a767
·
1 Parent(s): 9ddc7c3
Files changed (8) hide show
  1. .dockerignore +13 -0
  2. Dockerfile +22 -0
  3. app.py +116 -0
  4. app_ori.py +108 -0
  5. backend +0 -1
  6. fish_audio.py +53 -0
  7. notes +4 -0
  8. requirements.txt +7 -0
.dockerignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ .DS_Store
7
+ uploads/
8
+ outputs_v2/
9
+ *.mp3
10
+ *.wav
11
+ notes
12
+ app_ori.py
13
+ .gitignore
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a Python 3.10 runtime as a parent image for fish-audio-sdk compatibility
2
+ FROM python:3.10-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file into the container
8
+ COPY requirements.txt .
9
+
10
+ # Install any needed packages specified in requirements.txt
11
+ # We also install ffmpeg which is required for many audio operations
12
+ RUN apt-get update && apt-get install -y ffmpeg && rm -rf /var/lib/apt/lists/*
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Copy the rest of your application's code into the container
16
+ COPY . .
17
+
18
+ # Make port 7860 available to the world outside this container
19
+ EXPOSE 7860
20
+
21
+ # Command to run the application using gunicorn, a production-ready server
22
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--timeout", "600", "app:app"]
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, send_file
2
+ import os
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ import ebooklib
6
+ from ebooklib import epub
7
+ from urllib.parse import urlparse
8
+ import io
9
+ from fish_audio import clone_voice_with_fish
10
+ import uuid
11
+ from dotenv import load_dotenv
12
+
13
+ # Load environment variables from a .env file if it exists.
14
+ # This is particularly useful for local development.
15
+ load_dotenv()
16
+
17
+ app = Flask(__name__)
18
+
19
+ def get_text_from_url(url):
20
+ try:
21
+ response = requests.get(url)
22
+ response.raise_for_status() # Raise an exception for bad status codes
23
+
24
+ content_type = response.headers.get('content-type')
25
+
26
+ if 'epub' in content_type:
27
+ book = epub.read_epub(io.BytesIO(response.content))
28
+ text_content = ""
29
+ for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
30
+ soup = BeautifulSoup(item.get_body_content(), 'html.parser')
31
+ text_content += soup.get_text() + "\n"
32
+ return text_content
33
+ elif 'html' in content_type:
34
+ soup = BeautifulSoup(response.content, 'html.parser')
35
+ return soup.get_text()
36
+ elif 'text' in content_type:
37
+ return response.text
38
+ else:
39
+ # Fallback for other content types or if content-type is not specific
40
+ # You might want to add more sophisticated handling here
41
+ return response.text
42
+ except requests.exceptions.RequestException as e:
43
+ print(f"Error fetching URL: {e}")
44
+ return None
45
+
46
+ def is_url(string):
47
+ try:
48
+ result = urlparse(string)
49
+ return all([result.scheme, result.netloc])
50
+ except ValueError:
51
+ return False
52
+
53
+ @app.route('/api/voice-transfer', methods=['POST'])
54
+ def voice_transfer():
55
+ if 'voice_file' not in request.files:
56
+ return jsonify({"error": "No voice file part"}), 400
57
+
58
+ file = request.files['voice_file']
59
+
60
+ if file.filename == '':
61
+ return jsonify({"error": "No selected file"}), 400
62
+
63
+ if file:
64
+ # Save the uploaded file temporarily
65
+ # In a production environment, you'd want a more robust storage solution
66
+ upload_folder = 'uploads'
67
+ if not os.path.exists(upload_folder):
68
+ os.makedirs(upload_folder)
69
+
70
+ voice_file_path = os.path.join(upload_folder, file.filename)
71
+ file.save(voice_file_path)
72
+
73
+ text_input = request.form.get('text')
74
+ print(text_input)
75
+ if not text_input:
76
+ return jsonify({"error": "No text or text_url provided"}), 400
77
+
78
+ text_content = ""
79
+ if is_url(text_input):
80
+ text_content = get_text_from_url(text_input)
81
+ if text_content is None:
82
+ return jsonify({"error": "Failed to retrieve or parse content from URL"}), 400
83
+ else:
84
+ print("Not URL")
85
+ text_content = text_input
86
+
87
+ # --- Perform Voice Cloning using Fish Audio ---
88
+ print("Starting voice cloning process with Fish Audio...")
89
+
90
+ output_dir = 'outputs_v2'
91
+ os.makedirs(output_dir, exist_ok=True)
92
+ output_filename = f"output_cloned_{uuid.uuid4().hex}.mp3"
93
+ output_file_path = os.path.join(output_dir, output_filename)
94
+
95
+ try:
96
+ # Note: For best results, provide an accurate transcript of the reference audio.
97
+ # Since we don't get it from the user, we can pass a generic placeholder or an empty string.
98
+ clone_voice_with_fish(
99
+ text=text_content,
100
+ reference_audio_path=voice_file_path,
101
+ output_path=output_file_path,
102
+ reference_text="This is a reference audio for voice cloning."
103
+ )
104
+ except Exception as e:
105
+ print(f"Error during voice cloning with Fish Audio: {e}")
106
+ return jsonify({"error": "Failed to generate voice file."}), 500
107
+
108
+ try:
109
+ return send_file(output_file_path, as_attachment=True)
110
+ except Exception as e:
111
+ app.logger.error(f"Error sending file: {e}")
112
+ return jsonify({"error": "Failed to send audio file."}), 500
113
+
114
+ # if __name__ == '__main__':
115
+ # app.run(debug=True, port=5001)
116
+
app_ori.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import os
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ import ebooklib
6
+ from ebooklib import epub
7
+ from urllib.parse import urlparse
8
+ import io
9
+ import os
10
+ import torch
11
+ from openvoice import se_extractor
12
+ from openvoice.api import ToneColorConverter
13
+ from melo.api import TTS
14
+ from voice_cloning import VoiceCloningManager
15
+
16
+ app = Flask(__name__)
17
+
18
+ # --- Voice Cloning Setup ---
19
+ # Initialize the VoiceCloningManager once when the app starts.
20
+ # This loads the models into memory so they don't have to be reloaded for each request.
21
+ print("Initializing Voice Cloning Manager...")
22
+ voice_cloning_manager = VoiceCloningManager()
23
+ print("Voice Cloning Manager Initialized.")
24
+ # -------------------------
25
+
26
+ def get_text_from_url(url):
27
+ try:
28
+ response = requests.get(url)
29
+ response.raise_for_status() # Raise an exception for bad status codes
30
+
31
+ content_type = response.headers.get('content-type')
32
+
33
+ if 'epub' in content_type:
34
+ book = epub.read_epub(io.BytesIO(response.content))
35
+ text_content = ""
36
+ for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
37
+ soup = BeautifulSoup(item.get_body_content(), 'html.parser')
38
+ text_content += soup.get_text() + "\n"
39
+ return text_content
40
+ elif 'html' in content_type:
41
+ soup = BeautifulSoup(response.content, 'html.parser')
42
+ return soup.get_text()
43
+ elif 'text' in content_type:
44
+ return response.text
45
+ else:
46
+ # Fallback for other content types or if content-type is not specific
47
+ # You might want to add more sophisticated handling here
48
+ return response.text
49
+ except requests.exceptions.RequestException as e:
50
+ print(f"Error fetching URL: {e}")
51
+ return None
52
+
53
+ def is_url(string):
54
+ try:
55
+ result = urlparse(string)
56
+ return all([result.scheme, result.netloc])
57
+ except ValueError:
58
+ return False
59
+
60
+ @app.route('/api/voice-transfer', methods=['POST'])
61
+ def voice_transfer():
62
+ if 'voice_file' not in request.files:
63
+ return jsonify({"error": "No voice file part"}), 400
64
+
65
+ file = request.files['voice_file']
66
+
67
+ if file.filename == '':
68
+ return jsonify({"error": "No selected file"}), 400
69
+
70
+ if file:
71
+ # Save the uploaded file temporarily
72
+ # In a production environment, you'd want a more robust storage solution
73
+ upload_folder = 'uploads'
74
+ if not os.path.exists(upload_folder):
75
+ os.makedirs(upload_folder)
76
+
77
+ voice_file_path = os.path.join(upload_folder, file.filename)
78
+ file.save(voice_file_path)
79
+
80
+ text_input = request.form.get('text')
81
+ print(text_input)
82
+ if not text_input:
83
+ return jsonify({"error": "No text or text_url provided"}), 400
84
+
85
+ text_content = ""
86
+ if is_url(text_input):
87
+ text_content = get_text_from_url(text_input)
88
+ if text_content is None:
89
+ return jsonify({"error": "Failed to retrieve or parse content from URL"}), 400
90
+ else:
91
+ print("Not URL")
92
+ text_content = text_input
93
+
94
+ # --- Perform Voice Cloning ---
95
+ print("Starting voice cloning process...")
96
+ # Call the new manager to generate the audio
97
+ output_file_path = voice_cloning_manager.generate_cloned_voice_audio(text_content, voice_file_path)
98
+
99
+ if output_file_path is None:
100
+ return jsonify({"error": "Failed to generate voice file."}), 500
101
+
102
+ return jsonify({
103
+ "message": "Voice transfer process completed successfully.",
104
+ "output_file": output_file_path
105
+ })
106
+
107
+ if __name__ == '__main__':
108
+ app.run(debug=True, port=5001)
backend DELETED
@@ -1 +0,0 @@
1
- Subproject commit b6b29998fd7e22d730dbb8f7e6def9b338692082
 
 
fish_audio.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from fish_audio_sdk import Session, TTSRequest, ReferenceAudio
4
+
5
+ def clone_voice_with_fish(text: str, reference_audio_path: str, output_path: str, reference_text: str = "Text in reference audio"):
6
+ """
7
+ Generates speech with a cloned voice using the Fish Audio API.
8
+
9
+ :param text: The text to be converted to speech.
10
+ :param reference_audio_path: Path to the reference audio file for voice cloning.
11
+ :param output_path: Path to save the generated audio file.
12
+ :param reference_text: The transcription of the reference audio. This is important for better quality.
13
+ """
14
+ # Restrict the input text to the first 450 characters.
15
+ if len(text) > 100:
16
+ text = '.'.join(text.split('.')[15:16])[:100]
17
+
18
+ # Remove special words (e.g., __Gutenberg__, _very_)
19
+ text = re.sub(r'__\w+__\s*|_\w+_\s*', '', text)
20
+ print(text)
21
+
22
+ # Load the API key from an environment variable for security.
23
+ api_key = os.getenv("FISH_AUDIO_API_KEY")
24
+ if not api_key:
25
+ raise ValueError("FISH_AUDIO_API_KEY environment variable not set.")
26
+
27
+ session = Session(api_key)
28
+
29
+ with open(reference_audio_path, "rb") as audio_file:
30
+ with open(output_path, "wb") as f:
31
+ for chunk in session.tts(TTSRequest(
32
+ text=text,
33
+ backend='s1',
34
+ references=[
35
+ ReferenceAudio(
36
+ audio=audio_file.read(),
37
+ text=reference_text,
38
+ )
39
+ ]
40
+ )):
41
+ f.write(chunk)
42
+ print("File output to: ", output_path)
43
+
44
+ if __name__ == '__main__':
45
+ # This is an example of how to use the function.
46
+ # You would import clone_voice_with_fish from this file into your app.py.
47
+ clone_voice_with_fish(
48
+ text="The water's writing engraves the rocks like the graphite from my pencil engraves this paper.",
49
+ reference_audio_path="example_reference_elon.mp3",
50
+ output_path="output_fish_clone.mp3",
51
+ # It's best to have an accurate transcript of the reference audio for better results.
52
+ reference_text="My name is Elon Musk."
53
+ )
notes ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ curl -X POST -F "voice_file=@backend/example_reference_kx.mp3" -F "text=http://www.gutenberg.org/files/11/11-0.txt" http://127.0.0.1:5001/api/voice-transfer --output cloned_voice.mp3
2
+
3
+ curl -X POST -F "voice_file=@backend/kx.mp3" -F "text=宝贝晚上好,你吃饱了吗?要吃鲍鱼吗?" http://127.0.0.1:5001/api/voice-transfer --output cloned_voice.mp3
4
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Flask
2
+ requests
3
+ beautifulsoup4
4
+ ebooklib
5
+ gunicorn
6
+ fish-audio-sdk
7
+ python-dotenv