Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| from urllib.parse import urlparse | |
| from bs4 import BeautifulSoup | |
| AUDIO_DIR = "audio_outputs" | |
| voice_map = {'grandma GG': 'rKVm0Cb9J2wrzmZupJea', 'tech wizard': 'ocn9CucaUfmmP6Two6Ik', 'perky sidekick': 'DWR3ijzKmphlRUhbBI7t', 'bill the newscaster': 'R1vZMopVRO75M5xBKX52', 'spunky charlie': 'q3yXDjF0aq4JCEo9u2g4', 'sassy teen': 'mBj2IDD9aXruPJHLGCAv'} | |
| def sanitize_url(url): | |
| if not url.startswith(("http://", "https://")): | |
| return "https://" + url | |
| return url | |
| def extract_internal_links(html_content, base_url): | |
| soup = BeautifulSoup(html_content, "html.parser") | |
| parsed_base = urlparse(base_url) | |
| base_domain = parsed_base.netloc | |
| links = set() | |
| for tag in soup.find_all("a", href=True): | |
| href = tag["href"] | |
| parsed_href = urlparse(href) | |
| if parsed_href.netloc == "" or parsed_href.netloc == base_domain: | |
| full_url = parsed_href.geturl() | |
| if not full_url.startswith("http"): | |
| full_url = f"{parsed_base.scheme}://{base_domain}{href}" | |
| links.add(full_url) | |
| return list(links) | |
| def crawl_documentation(url): | |
| import requests | |
| try: | |
| response = requests.get(url, timeout=10) | |
| response.raise_for_status() | |
| return response.text | |
| except Exception as e: | |
| return f"Error fetching page: {e}" | |
| def get_voice_prompt_style(voice): | |
| tone = {'grandma GG': 'dry, witty, and brutally honest β will roast you if you mess up.', 'tech wizard': 'cryptic, snarky, and a prodigy with code β speaks in digital spells.', 'perky sidekick': 'energetic, cheerful, and endlessly supportive β like a high-five machine.', 'bill the newscaster': 'polished, confident, and composed β delivers everything like breaking news.', 'spunky charlie': 'wildly curious, playful, and full of devil-may-care energy.', 'sassy teen': 'sarcastic, sharp-tongued, and too cool to care β flexes brainpower with attitude.'} | |
| return tone.get(voice.lower(), "neutral") | |
| def save_audio_file(audio_path, content): | |
| os.makedirs(AUDIO_DIR, exist_ok=True) | |
| with open(audio_path, "wb") as f: | |
| f.write(content) | |
| __all__ = [ | |
| "sanitize_url", | |
| "extract_internal_links", | |
| "crawl_documentation", | |
| "get_voice_prompt_style", | |
| "save_audio_file", | |
| "voice_map", | |
| "AUDIO_DIR", | |
| ] | |