EmDashCounter / app.py
silveroxides's picture
Add support for single file link or upload of pdf/txt files for em dash check
6c0453b
import gradio as gr
import requests
import zipfile
import io
import os
from pypdf import PdfReader
# Video URL of The Count
VIDEO_URL = "https://images-ext-1.discordapp.net/external/PD3SdBHrOGxZtFU8zypW2PjGXabBG-OwY3rXB8XWG7Y/https/media.tenor.com/1nFPylOyXZIAAAPo/sesame-street-the-count.mp4"
def get_count_response(count):
if count == 0:
return "Zero! Zero em dashes! Ah ah ah!"
elif count == 1:
return "One! One em dash! Ah ah ah!"
elif count <= 20:
counting = " ".join([f"{i}!" for i in range(1, count + 1)])
return f"{counting} {count} em dashes! Ah ah ah!"
else:
return f"One! Two! Three! ... {count}! {count} em dashes! Ah ah ah!"
def count_em_dashes_in_bytes(content_bytes, filename):
count = 0
ext = os.path.splitext(filename.lower())[1]
if ext == '.pdf':
try:
reader = PdfReader(io.BytesIO(content_bytes))
for page in reader.pages:
text = page.extract_text()
if text:
count += text.count('β€”')
except Exception:
pass
else:
# Assume text-based for other common extensions
try:
content = content_bytes.decode('utf-8', errors='ignore')
count = content.count('β€”')
except Exception:
pass
return count
def process_input(input_url, uploaded_file):
total_count = 0
processed = False
# Handle Uploaded File
if uploaded_file is not None:
with open(uploaded_file.name, "rb") as f:
file_bytes = f.read()
total_count += count_em_dashes_in_bytes(file_bytes, uploaded_file.name)
processed = True
# Handle URL
if input_url and input_url.strip():
url = input_url.strip()
# Check if it's likely a GitHub Repo
if "github.com" in url and "/archive/" not in url and not any(url.lower().endswith(ext) for ext in ['.pdf', '.txt', '.md', '.py', '.js']):
# Normalize GitHub URL
base_url = url.rstrip('/')
if base_url.endswith('.git'):
base_url = base_url[:-4]
branches = ['main', 'master']
r = None
for branch in branches:
test_url = f"{base_url}/archive/refs/heads/{branch}.zip"
try:
response = requests.get(test_url, timeout=20)
if response.status_code == 200:
r = response
break
except Exception:
continue
if r:
try:
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
for filename in z.namelist():
if filename.endswith('/'): continue
text_extensions = {'.py', '.md', '.txt', '.js', '.ts', '.html', '.css', '.c', '.cpp', '.h', '.java', '.rs', '.go', '.json', '.yml', '.yaml'}
if any(filename.lower().endswith(ext) for ext in text_extensions):
with z.open(filename) as f:
total_count += count_em_dashes_in_bytes(f.read(), filename)
processed = True
except Exception:
pass
else:
# Handle as single file URL
try:
response = requests.get(url, timeout=20)
if response.status_code == 200:
filename = url.split('/')[-1] or "file.txt"
total_count += count_em_dashes_in_bytes(response.content, filename)
processed = True
except Exception:
pass
if not processed:
return "I could not find anything to count! Provide a valid URL or upload a file! Ah ah ah!", gr.update(visible=False)
return get_count_response(total_count), gr.update(visible=True)
# Define custom CSS for a Sesame Street / The Count theme
custom_css = """
body, .gradio-container { background-color: #000000 !important; color: #e0e0e0 !important; font-family: 'Georgia', serif !important; }
.gr-box { background-color: #1a0633 !important; border: 2px solid #4b0082 !important; }
#large-input textarea, #large-input input {
background-color: #2b0b4d !important;
color: #ffffff !important;
font-size: 1.5rem !important;
border: 2px solid #9932cc !important;
}
#large-output textarea, #large-output input {
background-color: #000000 !important;
color: #32cd32 !important;
font-size: 1.8rem !important;
font-weight: bold !important;
border: 3px solid #32cd32 !important;
text-shadow: 2px 2px #1a0633;
}
#large-button {
background-color: #4b0082 !important;
color: #32cd32 !important;
font-size: 1.6rem !important;
font-weight: bold !important;
border: 4px solid #32cd32 !important;
height: 80px !important;
box-shadow: 0 0 10px #4b0082;
transition: all 0.3s ease;
cursor: pointer;
}
#large-button:hover {
background-color: #9932cc !important;
color: #ffffff !important;
box-shadow: 0 0 20px #32cd32;
transform: scale(1.02);
}
.gr-form label span {
font-size: 1.4rem !important;
color: #9932cc !important;
font-weight: bold !important;
text-transform: uppercase;
letter-spacing: 2px;
}
h1 { color: #9932cc !important; text-shadow: 2px 2px #000000 !important; font-size: 3rem !important; text-align: center !important; }
h3 { color: #e0e0e0 !important; text-align: center !important; margin-bottom: 2rem !important; }
.file-upload { background-color: #2b0b4d !important; border: 2px dashed #9932cc !important; }
"""
with gr.Blocks(title="The Count's Em Dash Counter", css=custom_css) as demo:
gr.Markdown("# πŸ§›β€β™‚οΈ The Count's Em Dash Counter")
gr.Markdown("### Provide a GitHub repo, a file URL, or upload documents to count em dashes (β€”)! Ah ah ah!")
with gr.Row():
with gr.Column(scale=4):
repo_url = gr.Textbox(
label="GitHub or File URL",
placeholder="https://github.com/... OR https://example.com/file.pdf",
lines=1,
elem_id="large-input"
)
file_upload = gr.File(
label="Upload Documents (PDF, TXT, MD...)",
file_types=[".pdf", ".txt", ".md", ".py", ".js", ".ts", ".html", ".css", ".json"],
elem_classes="file-upload"
)
count_btn = gr.Button("Count them! Ah ah ah!", variant="primary", elem_id="large-button")
result_text = gr.Textbox(
label="The Count Says:",
interactive=False,
elem_id="large-output",
lines=3
)
with gr.Column(scale=5):
video = gr.Video(
value=VIDEO_URL,
label="The Count",
autoplay=True,
loop=True,
show_label=False,
interactive=False,
visible=False
)
count_btn.click(
fn=process_input,
inputs=[repo_url, file_upload],
outputs=[result_text, video]
)
if __name__ == "__main__":
demo.launch()