import gradio as gr import requests from bs4 import BeautifulSoup import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize import string # Download once nltk.download('punkt') nltk.download('stopwords') def fetch_text(url): try: headers = {"User-Agent": "Mozilla/5.0"} response = requests.get(url, headers=headers, timeout=5) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') # Only extract
tag text for speed & relevance paragraphs = soup.find_all('p') text = ' '.join([p.get_text() for p in paragraphs]) if not text.strip(): return None, f"No readable
content found at {url}" return text, None except Exception as e: return None, f"Error fetching {url}: {str(e)}" def extract_keywords(text): text = text.lower() tokens = word_tokenize(text) words = [w for w in tokens if w.isalnum()] stop_words = set(stopwords.words('english')) return set([w for w in words if w not in stop_words and len(w) > 2]) def compare_keywords(url_a, url_b): text_a, error_a = fetch_text(url_a) text_b, error_b = fetch_text(url_b) if error_a or error_b: return f"❌ Errors:\n\n{error_a or ''}\n{error_b or ''}" keywords_a = extract_keywords(text_a) keywords_b = extract_keywords(text_b) missing = sorted(list(keywords_b - keywords_a)) if not missing: return "✅ No unique keywords found in B that are missing in A." return f"🔍 Keywords in B but not A:\n\n" + "\n".join(missing) # Gradio interface demo = gr.Interface( fn=compare_keywords, inputs=[ gr.Textbox(label="Your Website (A)"), gr.Textbox(label="Competitor Website (B)") ], outputs="text", title="🔑 Website Keyword Gap Finder", description="Enter two URLs. See what keywords your competitor uses that you don't." ) demo.launch()