Spaces:
Sleeping
Sleeping
| import spaces | |
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from kokoro_tts import generate_audio | |
| import logging | |
| logging.basicConfig(level=logging.INFO) | |
| class Voices: | |
| flags = { | |
| "a": "๐บ๐ธ", | |
| "b": "๐ฌ๐ง", | |
| "e": "๐ช๐ธ", | |
| "f": "๐ซ๐ท", | |
| "h": "๐ฎ๐ณ", | |
| "i": "๐ฎ๐น", | |
| "j": "๐ฏ๐ต", | |
| "p": "๐ง๐ท", | |
| "z": "๐จ๐ณ", | |
| } | |
| flags_win = { | |
| "a": "american", | |
| "b": "british", | |
| "e": "spanish", | |
| "f": "french", | |
| "h": "hindi", | |
| "i": "italian", | |
| "j": "japanese", | |
| "p": "portuguese", | |
| "z": "chinese", | |
| } | |
| voices = { | |
| "a": [ | |
| "af_alloy", | |
| "af_aoede", | |
| "af_bella", | |
| "af_heart", | |
| "af_jessica", | |
| "af_kore", | |
| "af_nicole", | |
| "af_nova", | |
| "af_river", | |
| "af_sarah", | |
| "af_sky", | |
| "am_adam", | |
| "am_echo", | |
| "am_eric", | |
| "am_fenrir", | |
| "am_liam", | |
| "am_michael", | |
| "am_onyx", | |
| "am_puck", | |
| "am_santa", | |
| ], | |
| "b": [ | |
| "bf_alice", | |
| "bf_emma", | |
| "bf_isabella", | |
| "bf_lily", | |
| "bm_daniel", | |
| "bm_fable", | |
| "bm_george", | |
| "bm_lewis", | |
| ], | |
| "e": ["ef_dora", "em_alex", "em_santa"], | |
| "f": ["ff_siwis"], | |
| "h": ["hf_alpha", "hf_beta", "hm_omega", "hm_psi"], | |
| "i": ["if_sara", "im_nicola"], | |
| "j": ["jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo"], | |
| "p": ["pf_dora", "pm_alex", "pm_santa"], | |
| "z": [ | |
| "zf_xiaobei", | |
| "zf_xiaoni", | |
| "zf_xiaoxiao", | |
| "zf_xiaoyi", | |
| "zm_yunjian", | |
| "zm_yunxi", | |
| "zm_yunxia", | |
| "zm_yunyang", | |
| ], | |
| } | |
| def extract_text_from_url(url): | |
| try: | |
| # Download the webpage content | |
| response = requests.get(url) | |
| response.raise_for_status() # Raise an exception for bad status codes | |
| # Parse the HTML content | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| # Remove script and style elements | |
| for script in soup(["script", "style"]): | |
| script.decompose() | |
| # Get text and clean it up | |
| text = soup.get_text(separator="\n", strip=True) | |
| # Remove excessive newlines and whitespace | |
| lines = (line.strip() for line in text.splitlines()) | |
| text = "\n".join(line for line in lines if line) | |
| return text | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def get_language_choices(): | |
| return [ | |
| (f"{Voices.flags[code]} {Voices.flags_win[code].title()}", code) | |
| for code in Voices.voices.keys() | |
| ] | |
| def get_voice_choices(lang_code): | |
| if lang_code in Voices.voices: | |
| return Voices.voices[lang_code] | |
| return [] | |
| def text_to_audio(text, lang_code, voice, progress=gr.Progress()): | |
| try: | |
| audio_data = generate_audio(text, voice=voice, progress=progress) | |
| return (24000, audio_data) # Return tuple of (sample_rate, audio_data) | |
| except Exception as e: | |
| print(f"Error generating audio: {e}") | |
| return None | |
| # Create Gradio interface | |
| with gr.Blocks(title="Web Page Text Extractor & Audio Generator") as demo: | |
| gr.Markdown("# Web Page Text Extractor & Audio Generator") | |
| gr.Markdown( | |
| "Scrape a website and generate text using [hexgrad/Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M)" | |
| ) | |
| with gr.Row(): | |
| url_input = gr.Textbox( | |
| label="Enter URL", value="https://paulgraham.com/words.html" | |
| ) | |
| extract_btn = gr.Button("Extract Text") | |
| text_output = gr.Textbox(label="Extracted Text", lines=10, interactive=True) | |
| with gr.Row(): | |
| lang_dropdown = gr.Dropdown( | |
| choices=get_language_choices(), | |
| label="Language", | |
| value="a", # Default to English | |
| ) | |
| voice_dropdown = gr.Dropdown( | |
| choices=Voices.voices["a"], # Default to English voices | |
| label="Voice", | |
| value="am_onyx", # Default voice | |
| ) | |
| generate_btn = gr.Button("Generate Audio") | |
| audio_output = gr.Audio(label="Generated Audio") | |
| def update_voices(lang_code): | |
| return gr.Dropdown(choices=get_voice_choices(lang_code)) | |
| extract_btn.click(fn=extract_text_from_url, inputs=url_input, outputs=text_output) | |
| lang_dropdown.change(fn=update_voices, inputs=lang_dropdown, outputs=voice_dropdown) | |
| generate_btn.click( | |
| fn=text_to_audio, | |
| inputs=[text_output, lang_dropdown, voice_dropdown], | |
| outputs=audio_output, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |