Spaces:

Chloe
/

audiogen

Sleeping

App Files Files Community

Chloe commited on Jun 23, 2025

Commit

ef9a767

1 Parent(s): 9ddc7c3

fix

Browse files

Files changed (8) hide show

.dockerignore +13 -0
Dockerfile +22 -0
app.py +116 -0
app_ori.py +108 -0
backend +0 -1
fish_audio.py +53 -0
notes +4 -0
requirements.txt +7 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,13 @@

+.git
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.DS_Store
+uploads/
+outputs_v2/
+*.mp3
+*.wav
+notes
+app_ori.py
+.gitignore

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+# Use a Python 3.10 runtime as a parent image for fish-audio-sdk compatibility
+FROM python:3.10-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install any needed packages specified in requirements.txt
+# We also install ffmpeg which is required for many audio operations
+RUN apt-get update && apt-get install -y ffmpeg && rm -rf /var/lib/apt/lists/*
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of your application's code into the container
+COPY . .
+# Make port 7860 available to the world outside this container
+EXPOSE 7860
+# Command to run the application using gunicorn, a production-ready server
+CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--timeout", "600", "app:app"]

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+from flask import Flask, request, jsonify, send_file
+import os
+import requests
+from bs4 import BeautifulSoup
+import ebooklib
+from ebooklib import epub
+from urllib.parse import urlparse
+import io
+from fish_audio import clone_voice_with_fish
+import uuid
+from dotenv import load_dotenv
+# Load environment variables from a .env file if it exists.
+# This is particularly useful for local development.
+load_dotenv()
+app = Flask(__name__)
+def get_text_from_url(url):
+    try:
+        response = requests.get(url)
+        response.raise_for_status() # Raise an exception for bad status codes
+        content_type = response.headers.get('content-type')
+        if 'epub' in content_type:
+            book = epub.read_epub(io.BytesIO(response.content))
+            text_content = ""
+            for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
+                soup = BeautifulSoup(item.get_body_content(), 'html.parser')
+                text_content += soup.get_text() + "\n"
+            return text_content
+        elif 'html' in content_type:
+            soup = BeautifulSoup(response.content, 'html.parser')
+            return soup.get_text()
+        elif 'text' in content_type:
+            return response.text
+        else:
+            # Fallback for other content types or if content-type is not specific
+            # You might want to add more sophisticated handling here
+            return response.text
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching URL: {e}")
+        return None
+def is_url(string):
+    try:
+        result = urlparse(string)
+        return all([result.scheme, result.netloc])
+    except ValueError:
+        return False
+@app.route('/api/voice-transfer', methods=['POST'])
+def voice_transfer():
+    if 'voice_file' not in request.files:
+        return jsonify({"error": "No voice file part"}), 400
+    file = request.files['voice_file']
+    if file.filename == '':
+        return jsonify({"error": "No selected file"}), 400
+    if file:
+        # Save the uploaded file temporarily
+        # In a production environment, you'd want a more robust storage solution
+        upload_folder = 'uploads'
+        if not os.path.exists(upload_folder):
+            os.makedirs(upload_folder)
+        voice_file_path = os.path.join(upload_folder, file.filename)
+        file.save(voice_file_path)
+        text_input = request.form.get('text')
+        print(text_input)
+        if not text_input:
+            return jsonify({"error": "No text or text_url provided"}), 400
+        text_content = ""
+        if is_url(text_input):
+            text_content = get_text_from_url(text_input)
+            if text_content is None:
+                return jsonify({"error": "Failed to retrieve or parse content from URL"}), 400
+        else:
+            print("Not URL")
+            text_content = text_input
+        # --- Perform Voice Cloning using Fish Audio ---
+        print("Starting voice cloning process with Fish Audio...")
+        output_dir = 'outputs_v2'
+        os.makedirs(output_dir, exist_ok=True)
+        output_filename = f"output_cloned_{uuid.uuid4().hex}.mp3"
+        output_file_path = os.path.join(output_dir, output_filename)
+        try:
+            # Note: For best results, provide an accurate transcript of the reference audio.
+            # Since we don't get it from the user, we can pass a generic placeholder or an empty string.
+            clone_voice_with_fish(
+                text=text_content,
+                reference_audio_path=voice_file_path,
+                output_path=output_file_path,
+                reference_text="This is a reference audio for voice cloning."
+            )
+        except Exception as e:
+            print(f"Error during voice cloning with Fish Audio: {e}")
+            return jsonify({"error": "Failed to generate voice file."}), 500
+        try:
+            return send_file(output_file_path, as_attachment=True)
+        except Exception as e:
+            app.logger.error(f"Error sending file: {e}")
+            return jsonify({"error": "Failed to send audio file."}), 500
+# if __name__ == '__main__':
+#     app.run(debug=True, port=5001)

app_ori.py ADDED Viewed

	@@ -0,0 +1,108 @@

+from flask import Flask, request, jsonify
+import os
+import requests
+from bs4 import BeautifulSoup
+import ebooklib
+from ebooklib import epub
+from urllib.parse import urlparse
+import io
+import os
+import torch
+from openvoice import se_extractor
+from openvoice.api import ToneColorConverter
+from melo.api import TTS
+from voice_cloning import VoiceCloningManager
+app = Flask(__name__)
+# --- Voice Cloning Setup ---
+# Initialize the VoiceCloningManager once when the app starts.
+# This loads the models into memory so they don't have to be reloaded for each request.
+print("Initializing Voice Cloning Manager...")
+voice_cloning_manager = VoiceCloningManager()
+print("Voice Cloning Manager Initialized.")
+# -------------------------
+def get_text_from_url(url):
+    try:
+        response = requests.get(url)
+        response.raise_for_status() # Raise an exception for bad status codes
+        content_type = response.headers.get('content-type')
+        if 'epub' in content_type:
+            book = epub.read_epub(io.BytesIO(response.content))
+            text_content = ""
+            for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
+                soup = BeautifulSoup(item.get_body_content(), 'html.parser')
+                text_content += soup.get_text() + "\n"
+            return text_content
+        elif 'html' in content_type:
+            soup = BeautifulSoup(response.content, 'html.parser')
+            return soup.get_text()
+        elif 'text' in content_type:
+            return response.text
+        else:
+            # Fallback for other content types or if content-type is not specific
+            # You might want to add more sophisticated handling here
+            return response.text
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching URL: {e}")
+        return None
+def is_url(string):
+    try:
+        result = urlparse(string)
+        return all([result.scheme, result.netloc])
+    except ValueError:
+        return False
+@app.route('/api/voice-transfer', methods=['POST'])
+def voice_transfer():
+    if 'voice_file' not in request.files:
+        return jsonify({"error": "No voice file part"}), 400
+    file = request.files['voice_file']
+    if file.filename == '':
+        return jsonify({"error": "No selected file"}), 400
+    if file:
+        # Save the uploaded file temporarily
+        # In a production environment, you'd want a more robust storage solution
+        upload_folder = 'uploads'
+        if not os.path.exists(upload_folder):
+            os.makedirs(upload_folder)
+        voice_file_path = os.path.join(upload_folder, file.filename)
+        file.save(voice_file_path)
+        text_input = request.form.get('text')
+        print(text_input)
+        if not text_input:
+            return jsonify({"error": "No text or text_url provided"}), 400
+        text_content = ""
+        if is_url(text_input):
+            text_content = get_text_from_url(text_input)
+            if text_content is None:
+                return jsonify({"error": "Failed to retrieve or parse content from URL"}), 400
+        else:
+            print("Not URL")
+            text_content = text_input
+        # --- Perform Voice Cloning ---
+        print("Starting voice cloning process...")
+        # Call the new manager to generate the audio
+        output_file_path = voice_cloning_manager.generate_cloned_voice_audio(text_content, voice_file_path)
+        if output_file_path is None:
+            return jsonify({"error": "Failed to generate voice file."}), 500
+        return jsonify({
+            "message": "Voice transfer process completed successfully.",
+            "output_file": output_file_path
+        })
+if __name__ == '__main__':
+    app.run(debug=True, port=5001)

backend DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit b6b29998fd7e22d730dbb8f7e6def9b338692082

fish_audio.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import re
+from fish_audio_sdk import Session, TTSRequest, ReferenceAudio
+def clone_voice_with_fish(text: str, reference_audio_path: str, output_path: str, reference_text: str = "Text in reference audio"):
+    """
+    Generates speech with a cloned voice using the Fish Audio API.
+    :param text: The text to be converted to speech.
+    :param reference_audio_path: Path to the reference audio file for voice cloning.
+    :param output_path: Path to save the generated audio file.
+    :param reference_text: The transcription of the reference audio. This is important for better quality.
+    """
+    # Restrict the input text to the first 450 characters.
+    if len(text) > 100:
+        text = '.'.join(text.split('.')[15:16])[:100]
+    # Remove special words (e.g., __Gutenberg__, _very_)
+    text = re.sub(r'__\w+__\s*|_\w+_\s*', '', text)
+    print(text)
+    # Load the API key from an environment variable for security.
+    api_key = os.getenv("FISH_AUDIO_API_KEY")
+    if not api_key:
+        raise ValueError("FISH_AUDIO_API_KEY environment variable not set.")
+    session = Session(api_key)
+    with open(reference_audio_path, "rb") as audio_file:
+        with open(output_path, "wb") as f:
+            for chunk in session.tts(TTSRequest(
+                text=text,
+                backend='s1',
+                references=[
+                    ReferenceAudio(
+                        audio=audio_file.read(),
+                        text=reference_text,
+                    )
+                ]
+            )):
+                f.write(chunk)
+    print("File output to: ", output_path)
+if __name__ == '__main__':
+    # This is an example of how to use the function.
+    # You would import clone_voice_with_fish from this file into your app.py.
+    clone_voice_with_fish(
+        text="The water's writing engraves the rocks like the graphite from my pencil engraves this paper.",
+        reference_audio_path="example_reference_elon.mp3",
+        output_path="output_fish_clone.mp3",
+        # It's best to have an accurate transcript of the reference audio for better results.
+        reference_text="My name is Elon Musk."
+    )

notes ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ curl -X POST -F "voice_file=@backend/example_reference_kx.mp3" -F "text=http://www.gutenberg.org/files/11/11-0.txt" http://127.0.0.1:5001/api/voice-transfer --output cloned_voice.mp3
2	+
3	+ curl -X POST -F "voice_file=@backend/kx.mp3" -F "text=宝贝晚上好，你吃饱了吗？要吃鲍鱼吗？" http://127.0.0.1:5001/api/voice-transfer --output cloned_voice.mp3
4	+

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+Flask
+requests
+beautifulsoup4
+ebooklib
+gunicorn
+fish-audio-sdk
+python-dotenv