Spaces:

lambdaofgod
/

page2speech

Sleeping

App Files Files Community

lambdaofgod commited on Feb 12, 2025

Commit

8035662

1 Parent(s): af74b58

init

Browse files

Files changed (4) hide show

app.py +175 -0
kokoro_tts.py +55 -0
pyproject.toml +13 -0
requirements.txt +119 -0

app.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import gradio as gr
+import requests
+from bs4 import BeautifulSoup
+import soundfile as sf
+from kokoro_tts import generate_audio
+class Voices:
+    flags = {
+        "a": "🇺🇸",
+        "b": "🇬🇧",
+        "e": "🇪🇸",
+        "f": "🇫🇷",
+        "h": "🇮🇳",
+        "i": "🇮🇹",
+        "j": "🇯🇵",
+        "p": "🇧🇷",
+        "z": "🇨🇳",
+    }
+    flags_win = {
+        "a": "american",
+        "b": "british",
+        "e": "spanish",
+        "f": "french",
+        "h": "hindi",
+        "i": "italian",
+        "j": "japanese",
+        "p": "portuguese",
+        "z": "chinese",
+    }
+    voices = {
+        "a": [
+            "af_alloy",
+            "af_aoede",
+            "af_bella",
+            "af_heart",
+            "af_jessica",
+            "af_kore",
+            "af_nicole",
+            "af_nova",
+            "af_river",
+            "af_sarah",
+            "af_sky",
+            "am_adam",
+            "am_echo",
+            "am_eric",
+            "am_fenrir",
+            "am_liam",
+            "am_michael",
+            "am_onyx",
+            "am_puck",
+            "am_santa",
+        ],
+        "b": [
+            "bf_alice",
+            "bf_emma",
+            "bf_isabella",
+            "bf_lily",
+            "bm_daniel",
+            "bm_fable",
+            "bm_george",
+            "bm_lewis",
+        ],
+        "e": ["ef_dora", "em_alex", "em_santa"],
+        "f": ["ff_siwis"],
+        "h": ["hf_alpha", "hf_beta", "hm_omega", "hm_psi"],
+        "i": ["if_sara", "im_nicola"],
+        "j": ["jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo"],
+        "p": ["pf_dora", "pm_alex", "pm_santa"],
+        "z": [
+            "zf_xiaobei",
+            "zf_xiaoni",
+            "zf_xiaoxiao",
+            "zf_xiaoyi",
+            "zm_yunjian",
+            "zm_yunxi",
+            "zm_yunxia",
+            "zm_yunyang",
+        ],
+    }
+def extract_text_from_url(url):
+    try:
+        # Download the webpage content
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        # Parse the HTML content
+        soup = BeautifulSoup(response.text, "html.parser")
+        # Remove script and style elements
+        for script in soup(["script", "style"]):
+            script.decompose()
+        # Get text and clean it up
+        text = soup.get_text(separator="\n", strip=True)
+        # Remove excessive newlines and whitespace
+        lines = (line.strip() for line in text.splitlines())
+        text = "\n".join(line for line in lines if line)
+        return text
+    except Exception as e:
+        return f"Error: {str(e)}"
+def get_language_choices():
+    return [
+        (f"{Voices.flags[code]} {Voices.flags_win[code].title()}", code)
+        for code in Voices.voices.keys()
+    ]
+def get_voice_choices(lang_code):
+    if lang_code in Voices.voices:
+        return Voices.voices[lang_code]
+    return []
+def text_to_audio(text, lang_code, voice, progress=gr.Progress()):
+    try:
+        audio_data = generate_audio(
+            text, lang_code=lang_code, voice=voice, progress=progress
+        )
+        return (24000, audio_data)  # Return tuple of (sample_rate, audio_data)
+    except Exception as e:
+        print(f"Error generating audio: {e}")
+        return None
+# Create Gradio interface
+with gr.Blocks(title="Web Page Text Extractor & Audio Generator") as demo:
+    gr.Markdown("# Web Page Text Extractor & Audio Generator")
+    gr.Markdown(
+        "Scrape a website and generate text using [hexgrad/Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M)"
+    )
+    with gr.Row():
+        url_input = gr.Textbox(
+            label="Enter URL", value="https://paulgraham.com/words.html"
+        )
+        extract_btn = gr.Button("Extract Text")
+    text_output = gr.Textbox(label="Extracted Text", lines=10, interactive=True)
+    with gr.Row():
+        lang_dropdown = gr.Dropdown(
+            choices=get_language_choices(),
+            label="Language",
+            value="a",  # Default to English
+        )
+        voice_dropdown = gr.Dropdown(
+            choices=Voices.voices["a"],  # Default to English voices
+            label="Voice",
+            value="am_onyx",  # Default voice
+        )
+    generate_btn = gr.Button("Generate Audio")
+    audio_output = gr.Audio(label="Generated Audio")
+    def update_voices(lang_code):
+        return gr.Dropdown(choices=get_voice_choices(lang_code))
+    extract_btn.click(fn=extract_text_from_url, inputs=url_input, outputs=text_output)
+    lang_dropdown.change(fn=update_voices, inputs=lang_dropdown, outputs=voice_dropdown)
+    generate_btn.click(
+        fn=text_to_audio,
+        inputs=[text_output, lang_dropdown, voice_dropdown],
+        outputs=audio_output,
+    )
+if __name__ == "__main__":
+    demo.launch()

kokoro_tts.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from kokoro import KPipeline
+import soundfile as sf
+import numpy as np
+import logging
+def generate_audio(
+    text,
+    lang_code="a",
+    voice="af_heart",
+    speed=1,
+    save_segments=False,
+    progress=None,
+):
+    """
+    Generate audio from text using Kokoro TTS pipeline
+    Args:
+        text (str): Text to convert to speech
+        lang_code (str): Language code for the TTS model
+        voice (str): Voice ID to use
+        speed (float): Speech speed multiplier
+        save_segments (bool): Whether to save individual audio segments
+    Returns:
+        numpy.ndarray: Combined audio data at 24kHz sample rate
+    """
+    pipeline = KPipeline(lang_code=lang_code)
+    generator = pipeline(text, voice=voice, speed=speed, split_pattern=r"\.")
+    all_audio = []
+    segments = list(generator)  # Get total number of segments
+    for i, (gs, ps, audio) in enumerate(
+        progress.tqdm(segments, desc="Generating audio")
+    ):
+        logging.info("Processing segment")
+        logging.info(f"Graphemes: {gs}")
+        logging.info(f"Phonemes: {ps}")
+        all_audio.append(audio)
+        if save_segments:
+            sf.write(f"segment_{i}.wav", audio, 24000)
+    # Concatenate all audio segments
+    combined_audio = np.concatenate(all_audio)
+    return combined_audio
+if __name__ == "__main__":
+    # Example usage
+    sample_text = "Hello world"
+    audio_data = generate_audio(sample_text)
+    sf.write("out.wav", audio_data, 24000)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,13 @@

+[project]
+name = "page2speech"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "beautifulsoup4>=4.13.3",
+    "gradio>=5.16.0",
+    "kokoro>=0.3.4",
+    "pip>=25.0.1",
+    "soundfile>=0.13.1",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,119 @@

+aiofiles==23.2.1
+annotated-types==0.7.0
+anyio==4.8.0
+attrs==25.1.0
+babel==2.17.0
+beautifulsoup4==4.13.3
+blis==1.2.0
+catalogue==2.0.10
+certifi==2025.1.31
+cffi==1.17.1
+charset-normalizer==3.4.1
+click==8.1.8
+clldutils==3.21.0
+cloudpathlib==0.20.0
+colorama==0.4.6
+colorlog==6.9.0
+confection==0.1.5
+csvw==3.5.1
+curated-tokenizers==0.0.9
+curated-transformers==0.1.1
+cymem==2.0.11
+dlinfo==2.0.0
+docopt==0.6.2
+en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85
+espeakng-loader==0.2.4
+exceptiongroup==1.2.2
+fastapi==0.115.8
+ffmpy==0.5.0
+filelock==3.17.0
+fsspec==2025.2.0
+gradio==5.16.0
+gradio_client==1.7.0
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.28.1
+idna==3.10
+isodate==0.7.2
+Jinja2==3.1.5
+joblib==1.4.2
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+kokoro==0.7.15
+langcodes==3.5.0
+language-tags==1.2.0
+language_data==1.3.0
+loguru==0.7.3
+lxml==5.3.1
+marisa-trie==1.2.1
+Markdown==3.7
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+misaki==0.7.15
+mpmath==1.3.0
+murmurhash==1.0.12
+networkx==3.4.2
+num2words==0.5.14
+numpy==1.26.4
+orjson==3.10.15
+packaging==24.2
+pandas==2.2.3
+phonemizer-fork==3.3.2
+pillow==11.1.0
+preshed==3.0.9
+pycparser==2.22
+pydantic==2.10.6
+pydantic_core==2.27.2
+pydub==0.25.1
+Pygments==2.19.1
+pylatexenc==2.10
+pyparsing==3.2.1
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2025.1
+PyYAML==6.0.2
+rdflib==7.1.3
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+rfc3986==1.5.0
+rich==13.9.4
+rpds-py==0.22.3
+ruff==0.9.6
+safehttpx==0.1.6
+safetensors==0.5.2
+scipy==1.15.1
+segments==2.2.1
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+smart-open==7.1.0
+sniffio==1.3.1
+soundfile==0.13.1
+soupsieve==2.6
+spacy==3.8.4
+spacy-curated-transformers==0.3.0
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+srsly==2.5.1
+starlette==0.45.3
+sympy==1.13.1
+tabulate==0.9.0
+thinc==8.3.4
+tokenizers==0.21.0
+tomlkit==0.13.2
+torch==2.6.0
+tqdm==4.67.1
+transformers==4.48.3
+typer==0.15.1
+typing_extensions==4.12.2
+tzdata==2025.1
+uritemplate==4.1.1
+urllib3==2.3.0
+uvicorn==0.34.0
+wasabi==1.1.3
+weasel==0.4.1
+websockets==14.2
+wrapt==1.17.2