compcomp / app.py
engrjamalakram's picture
Create app.py
2f7b0d4 verified
import gradio as gr
import requests
from bs4 import BeautifulSoup
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
# Download once
nltk.download('punkt')
nltk.download('stopwords')
def fetch_text(url):
try:
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers, timeout=5)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Only extract <p> tag text for speed & relevance
paragraphs = soup.find_all('p')
text = ' '.join([p.get_text() for p in paragraphs])
if not text.strip():
return None, f"No readable <p> content found at {url}"
return text, None
except Exception as e:
return None, f"Error fetching {url}: {str(e)}"
def extract_keywords(text):
text = text.lower()
tokens = word_tokenize(text)
words = [w for w in tokens if w.isalnum()]
stop_words = set(stopwords.words('english'))
return set([w for w in words if w not in stop_words and len(w) > 2])
def compare_keywords(url_a, url_b):
text_a, error_a = fetch_text(url_a)
text_b, error_b = fetch_text(url_b)
if error_a or error_b:
return f"❌ Errors:\n\n{error_a or ''}\n{error_b or ''}"
keywords_a = extract_keywords(text_a)
keywords_b = extract_keywords(text_b)
missing = sorted(list(keywords_b - keywords_a))
if not missing:
return "βœ… No unique keywords found in B that are missing in A."
return f"πŸ” Keywords in B but not A:\n\n" + "\n".join(missing)
# Gradio interface
demo = gr.Interface(
fn=compare_keywords,
inputs=[
gr.Textbox(label="Your Website (A)"),
gr.Textbox(label="Competitor Website (B)")
],
outputs="text",
title="πŸ”‘ Website Keyword Gap Finder",
description="Enter two URLs. See what keywords your competitor uses that you don't."
)
demo.launch()