import gradio as gr import requests import zipfile import io import os from pypdf import PdfReader # Video URL of The Count VIDEO_URL = "https://images-ext-1.discordapp.net/external/PD3SdBHrOGxZtFU8zypW2PjGXabBG-OwY3rXB8XWG7Y/https/media.tenor.com/1nFPylOyXZIAAAPo/sesame-street-the-count.mp4" def get_count_response(count): if count == 0: return "Zero! Zero em dashes! Ah ah ah!" elif count == 1: return "One! One em dash! Ah ah ah!" elif count <= 20: counting = " ".join([f"{i}!" for i in range(1, count + 1)]) return f"{counting} {count} em dashes! Ah ah ah!" else: return f"One! Two! Three! ... {count}! {count} em dashes! Ah ah ah!" def count_em_dashes_in_bytes(content_bytes, filename): count = 0 ext = os.path.splitext(filename.lower())[1] if ext == '.pdf': try: reader = PdfReader(io.BytesIO(content_bytes)) for page in reader.pages: text = page.extract_text() if text: count += text.count('—') except Exception: pass else: # Assume text-based for other common extensions try: content = content_bytes.decode('utf-8', errors='ignore') count = content.count('—') except Exception: pass return count def process_input(input_url, uploaded_file): total_count = 0 processed = False # Handle Uploaded File if uploaded_file is not None: with open(uploaded_file.name, "rb") as f: file_bytes = f.read() total_count += count_em_dashes_in_bytes(file_bytes, uploaded_file.name) processed = True # Handle URL if input_url and input_url.strip(): url = input_url.strip() # Check if it's likely a GitHub Repo if "github.com" in url and "/archive/" not in url and not any(url.lower().endswith(ext) for ext in ['.pdf', '.txt', '.md', '.py', '.js']): # Normalize GitHub URL base_url = url.rstrip('/') if base_url.endswith('.git'): base_url = base_url[:-4] branches = ['main', 'master'] r = None for branch in branches: test_url = f"{base_url}/archive/refs/heads/{branch}.zip" try: response = requests.get(test_url, timeout=20) if response.status_code == 200: r = response break except Exception: continue if r: try: with zipfile.ZipFile(io.BytesIO(r.content)) as z: for filename in z.namelist(): if filename.endswith('/'): continue text_extensions = {'.py', '.md', '.txt', '.js', '.ts', '.html', '.css', '.c', '.cpp', '.h', '.java', '.rs', '.go', '.json', '.yml', '.yaml'} if any(filename.lower().endswith(ext) for ext in text_extensions): with z.open(filename) as f: total_count += count_em_dashes_in_bytes(f.read(), filename) processed = True except Exception: pass else: # Handle as single file URL try: response = requests.get(url, timeout=20) if response.status_code == 200: filename = url.split('/')[-1] or "file.txt" total_count += count_em_dashes_in_bytes(response.content, filename) processed = True except Exception: pass if not processed: return "I could not find anything to count! Provide a valid URL or upload a file! Ah ah ah!", gr.update(visible=False) return get_count_response(total_count), gr.update(visible=True) # Define custom CSS for a Sesame Street / The Count theme custom_css = """ body, .gradio-container { background-color: #000000 !important; color: #e0e0e0 !important; font-family: 'Georgia', serif !important; } .gr-box { background-color: #1a0633 !important; border: 2px solid #4b0082 !important; } #large-input textarea, #large-input input { background-color: #2b0b4d !important; color: #ffffff !important; font-size: 1.5rem !important; border: 2px solid #9932cc !important; } #large-output textarea, #large-output input { background-color: #000000 !important; color: #32cd32 !important; font-size: 1.8rem !important; font-weight: bold !important; border: 3px solid #32cd32 !important; text-shadow: 2px 2px #1a0633; } #large-button { background-color: #4b0082 !important; color: #32cd32 !important; font-size: 1.6rem !important; font-weight: bold !important; border: 4px solid #32cd32 !important; height: 80px !important; box-shadow: 0 0 10px #4b0082; transition: all 0.3s ease; cursor: pointer; } #large-button:hover { background-color: #9932cc !important; color: #ffffff !important; box-shadow: 0 0 20px #32cd32; transform: scale(1.02); } .gr-form label span { font-size: 1.4rem !important; color: #9932cc !important; font-weight: bold !important; text-transform: uppercase; letter-spacing: 2px; } h1 { color: #9932cc !important; text-shadow: 2px 2px #000000 !important; font-size: 3rem !important; text-align: center !important; } h3 { color: #e0e0e0 !important; text-align: center !important; margin-bottom: 2rem !important; } .file-upload { background-color: #2b0b4d !important; border: 2px dashed #9932cc !important; } """ with gr.Blocks(title="The Count's Em Dash Counter", css=custom_css) as demo: gr.Markdown("# 🧛‍♂️ The Count's Em Dash Counter") gr.Markdown("### Provide a GitHub repo, a file URL, or upload documents to count em dashes (—)! Ah ah ah!") with gr.Row(): with gr.Column(scale=4): repo_url = gr.Textbox( label="GitHub or File URL", placeholder="https://github.com/... OR https://example.com/file.pdf", lines=1, elem_id="large-input" ) file_upload = gr.File( label="Upload Documents (PDF, TXT, MD...)", file_types=[".pdf", ".txt", ".md", ".py", ".js", ".ts", ".html", ".css", ".json"], elem_classes="file-upload" ) count_btn = gr.Button("Count them! Ah ah ah!", variant="primary", elem_id="large-button") result_text = gr.Textbox( label="The Count Says:", interactive=False, elem_id="large-output", lines=3 ) with gr.Column(scale=5): video = gr.Video( value=VIDEO_URL, label="The Count", autoplay=True, loop=True, show_label=False, interactive=False, visible=False ) count_btn.click( fn=process_input, inputs=[repo_url, file_upload], outputs=[result_text, video] ) if __name__ == "__main__": demo.launch()