Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| import string | |
| # Download once | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| def fetch_text(url): | |
| try: | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| response = requests.get(url, headers=headers, timeout=5) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Only extract <p> tag text for speed & relevance | |
| paragraphs = soup.find_all('p') | |
| text = ' '.join([p.get_text() for p in paragraphs]) | |
| if not text.strip(): | |
| return None, f"No readable <p> content found at {url}" | |
| return text, None | |
| except Exception as e: | |
| return None, f"Error fetching {url}: {str(e)}" | |
| def extract_keywords(text): | |
| text = text.lower() | |
| tokens = word_tokenize(text) | |
| words = [w for w in tokens if w.isalnum()] | |
| stop_words = set(stopwords.words('english')) | |
| return set([w for w in words if w not in stop_words and len(w) > 2]) | |
| def compare_keywords(url_a, url_b): | |
| text_a, error_a = fetch_text(url_a) | |
| text_b, error_b = fetch_text(url_b) | |
| if error_a or error_b: | |
| return f"β Errors:\n\n{error_a or ''}\n{error_b or ''}" | |
| keywords_a = extract_keywords(text_a) | |
| keywords_b = extract_keywords(text_b) | |
| missing = sorted(list(keywords_b - keywords_a)) | |
| if not missing: | |
| return "β No unique keywords found in B that are missing in A." | |
| return f"π Keywords in B but not A:\n\n" + "\n".join(missing) | |
| # Gradio interface | |
| demo = gr.Interface( | |
| fn=compare_keywords, | |
| inputs=[ | |
| gr.Textbox(label="Your Website (A)"), | |
| gr.Textbox(label="Competitor Website (B)") | |
| ], | |
| outputs="text", | |
| title="π Website Keyword Gap Finder", | |
| description="Enter two URLs. See what keywords your competitor uses that you don't." | |
| ) | |
| demo.launch() | |