File size: 7,373 Bytes
b2cf356
 
 
 
 
6c0453b
b2cf356
 
 
 
6c0453b
 
 
 
 
 
 
 
 
 
b2cf356
6c0453b
 
 
b2cf356
6c0453b
b2cf356
6c0453b
 
 
 
 
b2cf356
6c0453b
 
 
 
 
 
 
 
 
b2cf356
6c0453b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2cf356
6c0453b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2cf356
6c0453b
 
 
 
b2cf356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c0453b
b2cf356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c0453b
b2cf356
 
 
 
6c0453b
b2cf356
 
 
 
6c0453b
 
b2cf356
 
 
6c0453b
 
 
 
 
 
 
b2cf356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c0453b
 
b2cf356
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import gradio as gr
import requests
import zipfile
import io
import os
from pypdf import PdfReader

# Video URL of The Count
VIDEO_URL = "https://images-ext-1.discordapp.net/external/PD3SdBHrOGxZtFU8zypW2PjGXabBG-OwY3rXB8XWG7Y/https/media.tenor.com/1nFPylOyXZIAAAPo/sesame-street-the-count.mp4"

def get_count_response(count):
    if count == 0:
        return "Zero! Zero em dashes! Ah ah ah!"
    elif count == 1:
        return "One! One em dash! Ah ah ah!"
    elif count <= 20:
        counting = " ".join([f"{i}!" for i in range(1, count + 1)])
        return f"{counting} {count} em dashes! Ah ah ah!"
    else:
        return f"One! Two! Three! ... {count}! {count} em dashes! Ah ah ah!"

def count_em_dashes_in_bytes(content_bytes, filename):
    count = 0
    ext = os.path.splitext(filename.lower())[1]
    
    if ext == '.pdf':
        try:
            reader = PdfReader(io.BytesIO(content_bytes))
            for page in reader.pages:
                text = page.extract_text()
                if text:
                    count += text.count('β€”')
        except Exception:
            pass
    else:
        # Assume text-based for other common extensions
        try:
            content = content_bytes.decode('utf-8', errors='ignore')
            count = content.count('β€”')
        except Exception:
            pass
    return count

def process_input(input_url, uploaded_file):
    total_count = 0
    processed = False

    # Handle Uploaded File
    if uploaded_file is not None:
        with open(uploaded_file.name, "rb") as f:
            file_bytes = f.read()
            total_count += count_em_dashes_in_bytes(file_bytes, uploaded_file.name)
        processed = True

    # Handle URL
    if input_url and input_url.strip():
        url = input_url.strip()
        
        # Check if it's likely a GitHub Repo
        if "github.com" in url and "/archive/" not in url and not any(url.lower().endswith(ext) for ext in ['.pdf', '.txt', '.md', '.py', '.js']):
            # Normalize GitHub URL
            base_url = url.rstrip('/')
            if base_url.endswith('.git'):
                base_url = base_url[:-4]
            
            branches = ['main', 'master']
            r = None
            for branch in branches:
                test_url = f"{base_url}/archive/refs/heads/{branch}.zip"
                try:
                    response = requests.get(test_url, timeout=20)
                    if response.status_code == 200:
                        r = response
                        break
                except Exception:
                    continue
            
            if r:
                try:
                    with zipfile.ZipFile(io.BytesIO(r.content)) as z:
                        for filename in z.namelist():
                            if filename.endswith('/'): continue
                            text_extensions = {'.py', '.md', '.txt', '.js', '.ts', '.html', '.css', '.c', '.cpp', '.h', '.java', '.rs', '.go', '.json', '.yml', '.yaml'}
                            if any(filename.lower().endswith(ext) for ext in text_extensions):
                                with z.open(filename) as f:
                                    total_count += count_em_dashes_in_bytes(f.read(), filename)
                    processed = True
                except Exception:
                    pass
        else:
            # Handle as single file URL
            try:
                response = requests.get(url, timeout=20)
                if response.status_code == 200:
                    filename = url.split('/')[-1] or "file.txt"
                    total_count += count_em_dashes_in_bytes(response.content, filename)
                    processed = True
            except Exception:
                pass

    if not processed:
        return "I could not find anything to count! Provide a valid URL or upload a file! Ah ah ah!", gr.update(visible=False)

    return get_count_response(total_count), gr.update(visible=True)

# Define custom CSS for a Sesame Street / The Count theme
custom_css = """
body, .gradio-container { background-color: #000000 !important; color: #e0e0e0 !important; font-family: 'Georgia', serif !important; }
.gr-box { background-color: #1a0633 !important; border: 2px solid #4b0082 !important; }
#large-input textarea, #large-input input { 
    background-color: #2b0b4d !important; 
    color: #ffffff !important; 
    font-size: 1.5rem !important; 
    border: 2px solid #9932cc !important;
}
#large-output textarea, #large-output input { 
    background-color: #000000 !important; 
    color: #32cd32 !important; 
    font-size: 1.8rem !important; 
    font-weight: bold !important; 
    border: 3px solid #32cd32 !important;
    text-shadow: 2px 2px #1a0633;
}
#large-button { 
    background-color: #4b0082 !important; 
    color: #32cd32 !important; 
    font-size: 1.6rem !important; 
    font-weight: bold !important; 
    border: 4px solid #32cd32 !important; 
    height: 80px !important;
    box-shadow: 0 0 10px #4b0082;
    transition: all 0.3s ease;
    cursor: pointer;
}
#large-button:hover {
    background-color: #9932cc !important;
    color: #ffffff !important;
    box-shadow: 0 0 20px #32cd32;
    transform: scale(1.02);
}
.gr-form label span { 
    font-size: 1.4rem !important; 
    color: #9932cc !important; 
    font-weight: bold !important;
    text-transform: uppercase;
    letter-spacing: 2px;
}
h1 { color: #9932cc !important; text-shadow: 2px 2px #000000 !important; font-size: 3rem !important; text-align: center !important; }
h3 { color: #e0e0e0 !important; text-align: center !important; margin-bottom: 2rem !important; }
.file-upload { background-color: #2b0b4d !important; border: 2px dashed #9932cc !important; }
"""

with gr.Blocks(title="The Count's Em Dash Counter", css=custom_css) as demo:
    gr.Markdown("# πŸ§›β€β™‚οΈ The Count's Em Dash Counter")
    gr.Markdown("### Provide a GitHub repo, a file URL, or upload documents to count em dashes (β€”)! Ah ah ah!")
    
    with gr.Row():
        with gr.Column(scale=4):
            repo_url = gr.Textbox(
                label="GitHub or File URL", 
                placeholder="https://github.com/... OR https://example.com/file.pdf",
                lines=1,
                elem_id="large-input"
            )
            
            file_upload = gr.File(
                label="Upload Documents (PDF, TXT, MD...)",
                file_types=[".pdf", ".txt", ".md", ".py", ".js", ".ts", ".html", ".css", ".json"],
                elem_classes="file-upload"
            )
            
            count_btn = gr.Button("Count them! Ah ah ah!", variant="primary", elem_id="large-button")
            result_text = gr.Textbox(
                label="The Count Says:", 
                interactive=False, 
                elem_id="large-output",
                lines=3
            )
            
        with gr.Column(scale=5):
            video = gr.Video(
                value=VIDEO_URL,
                label="The Count",
                autoplay=True,
                loop=True,
                show_label=False,
                interactive=False,
                visible=False
            )

    count_btn.click(
        fn=process_input,
        inputs=[repo_url, file_upload],
        outputs=[result_text, video]
    )

if __name__ == "__main__":
    demo.launch()