Spaces:
Sleeping
Sleeping
Initial deployment of The Daily Snorter
Browse files- .gitattributes +3 -0
- Dockerfile +16 -0
- README.md +14 -5
- corpus.db +3 -0
- curated_jokes.py +440 -0
- requirements.txt +20 -0
- scraper/__init__.py +0 -0
- scraper/__main__.py +4 -0
- scraper/cli.py +427 -0
- scraper/config.py +65 -0
- scraper/db.py +522 -0
- scraper/extract_jokes.py +207 -0
- scraper/platforms/__init__.py +0 -0
- scraper/platforms/bluesky.py +89 -0
- scraper/platforms/guardian.py +88 -0
- scraper/platforms/instagram.py +463 -0
- scraper/platforms/reddit.py +114 -0
- scraper/platforms/rss.py +76 -0
- scraper/platforms/x_twitter.py +120 -0
- scraper/platforms/youtube.py +171 -0
- scraper/scoring.py +112 -0
- scraper/static/__init__.py +0 -0
- scraper/static/import_datasets.py +161 -0
- scraper/utils.py +67 -0
- static/snorter-hero.png +3 -0
- static/snorter-logo.png +3 -0
- templates/index.html +1345 -0
- templates/tombola.html +1172 -0
- templates/worldcup.html +818 -0
- webapp.py +1231 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
corpus.db filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
static/snorter-hero.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
static/snorter-logo.png filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install dependencies
|
| 6 |
+
COPY requirements.txt .
|
| 7 |
+
RUN pip install --no-cache-dir -r requirements.txt gunicorn
|
| 8 |
+
|
| 9 |
+
# Copy application
|
| 10 |
+
COPY . .
|
| 11 |
+
|
| 12 |
+
# HuggingFace Spaces expects port 7860
|
| 13 |
+
EXPOSE 7860
|
| 14 |
+
|
| 15 |
+
# Run with gunicorn for production
|
| 16 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "120", "webapp:app"]
|
README.md
CHANGED
|
@@ -1,10 +1,19 @@
|
|
| 1 |
---
|
| 2 |
-
title: Daily Snorter
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: The Daily Snorter
|
| 3 |
+
emoji: 🐽
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: pink
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# The Daily Snorter
|
| 12 |
+
|
| 13 |
+
A curated comedy discovery app. Swipe through jokes across three categories:
|
| 14 |
+
|
| 15 |
+
- **Classics** — Timeless one-liners and proven comedy gold
|
| 16 |
+
- **Dad Jokes** — Wholesome groaners for the whole family
|
| 17 |
+
- **Fresh** — Latest comedy from social media, engagement-filtered and quality-scored
|
| 18 |
+
|
| 19 |
+
Built with engagement-weighted scoring, statistical quality tiers, and behavioural profiling to serve you better jokes the more you use it.
|
corpus.db
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af777093a62d846b0a907d6f634ded598a2d17481d8ab41c7a7bbf67a2a6c9b6
|
| 3 |
+
size 83984384
|
curated_jokes.py
ADDED
|
@@ -0,0 +1,440 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Curated collection of classic, short, quotable jokes from famous humorists and comedians.
|
| 3 |
+
Compiled from well-known published quotes, stand-up specials, and award-winning material.
|
| 4 |
+
|
| 5 |
+
Each entry contains:
|
| 6 |
+
- text: The joke text
|
| 7 |
+
- author: The comedian/humorist's name
|
| 8 |
+
- style: Category (one-liner, observational, dark, wordplay, deadpan, absurd, wit, self-deprecating)
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
JOKES = [
|
| 12 |
+
|
| 13 |
+
# =========================================================================
|
| 14 |
+
# LITERARY / CLASSIC WITS
|
| 15 |
+
# =========================================================================
|
| 16 |
+
|
| 17 |
+
# --- Mark Twain ---
|
| 18 |
+
{"text": "Suppose you were an idiot, and suppose you were a member of Congress; but I repeat myself.", "author": "Mark Twain", "style": "wit"},
|
| 19 |
+
{"text": "Clothes make the man. Naked people have little or no influence on society.", "author": "Mark Twain", "style": "wit"},
|
| 20 |
+
{"text": "Never argue with stupid people, they will drag you down to their level and then beat you with experience.", "author": "Mark Twain", "style": "wit"},
|
| 21 |
+
{"text": "I am an old man and have known a great many troubles, but most of them never happened.", "author": "Mark Twain", "style": "wit"},
|
| 22 |
+
{"text": "Never put off till tomorrow what may be done day after tomorrow just as well.", "author": "Mark Twain", "style": "wit"},
|
| 23 |
+
{"text": "The secret of getting ahead is getting started.", "author": "Mark Twain", "style": "wit"},
|
| 24 |
+
{"text": "A clear conscience is the sure sign of a bad memory.", "author": "Mark Twain", "style": "wit"},
|
| 25 |
+
{"text": "The difference between the right word and the almost right word is the difference between lightning and a lightning bug.", "author": "Mark Twain", "style": "wit"},
|
| 26 |
+
{"text": "If you don't read the newspaper, you're uninformed. If you read the newspaper, you're misinformed.", "author": "Mark Twain", "style": "observational"},
|
| 27 |
+
{"text": "What would men be without women? Scarce, sir... mighty scarce.", "author": "Mark Twain", "style": "wit"},
|
| 28 |
+
|
| 29 |
+
# --- Oscar Wilde ---
|
| 30 |
+
{"text": "I can resist everything except temptation.", "author": "Oscar Wilde", "style": "wit"},
|
| 31 |
+
{"text": "Always forgive your enemies; nothing annoys them so much.", "author": "Oscar Wilde", "style": "wit"},
|
| 32 |
+
{"text": "Some cause happiness wherever they go; others whenever they go.", "author": "Oscar Wilde", "style": "wit"},
|
| 33 |
+
{"text": "I am not young enough to know everything.", "author": "Oscar Wilde", "style": "wit"},
|
| 34 |
+
{"text": "The play was a great success, but the audience was a total failure.", "author": "Oscar Wilde", "style": "wit"},
|
| 35 |
+
{"text": "I have nothing to declare except my genius.", "author": "Oscar Wilde", "style": "wit"},
|
| 36 |
+
{"text": "Work is the curse of the drinking classes.", "author": "Oscar Wilde", "style": "wit"},
|
| 37 |
+
{"text": "Bigamy is having one wife too many. Monogamy is the same.", "author": "Oscar Wilde", "style": "wit"},
|
| 38 |
+
{"text": "Experience is simply the name we give our mistakes.", "author": "Oscar Wilde", "style": "wit"},
|
| 39 |
+
{"text": "This wallpaper and I are fighting a duel to the death. Either it goes or I do.", "author": "Oscar Wilde", "style": "wit"},
|
| 40 |
+
|
| 41 |
+
# --- Dorothy Parker ---
|
| 42 |
+
{"text": "This is not a novel to be tossed aside lightly. It should be thrown with great force.", "author": "Dorothy Parker", "style": "wit"},
|
| 43 |
+
{"text": "The first thing I do in the morning is brush my teeth and sharpen my tongue.", "author": "Dorothy Parker", "style": "wit"},
|
| 44 |
+
{"text": "I don't know much about being a millionaire, but I'll bet I'd be darling at it.", "author": "Dorothy Parker", "style": "wit"},
|
| 45 |
+
{"text": "I require only three things of a man: he must be handsome, ruthless, and stupid.", "author": "Dorothy Parker", "style": "wit"},
|
| 46 |
+
{"text": "What fresh hell is this?", "author": "Dorothy Parker", "style": "wit"},
|
| 47 |
+
{"text": "Beauty is only skin deep, but ugly goes clean to the bone.", "author": "Dorothy Parker", "style": "wit"},
|
| 48 |
+
{"text": "If you want to know what God thinks of money, just look at the people he gave it to.", "author": "Dorothy Parker", "style": "wit"},
|
| 49 |
+
{"text": "You can lead a horticulture, but you can't make her think.", "author": "Dorothy Parker", "style": "wordplay"},
|
| 50 |
+
{"text": "I like to have a martini, two at the very most. After three I'm under the table, after four I'm under my host.", "author": "Dorothy Parker", "style": "wit"},
|
| 51 |
+
{"text": "That woman speaks eighteen languages, and can't say 'No' in any of them.", "author": "Dorothy Parker", "style": "wit"},
|
| 52 |
+
|
| 53 |
+
# --- Groucho Marx ---
|
| 54 |
+
{"text": "I refuse to join any club that would have me as a member.", "author": "Groucho Marx", "style": "one-liner"},
|
| 55 |
+
{"text": "Outside of a dog, a book is a man's best friend. Inside of a dog it's too dark to read.", "author": "Groucho Marx", "style": "wordplay"},
|
| 56 |
+
{"text": "One morning I shot an elephant in my pajamas. How he got into my pajamas I'll never know.", "author": "Groucho Marx", "style": "wordplay"},
|
| 57 |
+
{"text": "I never forget a face, but in your case I'll be glad to make an exception.", "author": "Groucho Marx", "style": "wit"},
|
| 58 |
+
{"text": "I've had a perfectly wonderful evening, but this wasn't it.", "author": "Groucho Marx", "style": "wit"},
|
| 59 |
+
{"text": "Those are my principles, and if you don't like them... well, I have others.", "author": "Groucho Marx", "style": "wit"},
|
| 60 |
+
{"text": "The secret of life is honesty and fair dealing. If you can fake that, you've got it made.", "author": "Groucho Marx", "style": "wit"},
|
| 61 |
+
{"text": "Marriage is a wonderful institution, but who wants to live in an institution?", "author": "Groucho Marx", "style": "one-liner"},
|
| 62 |
+
{"text": "I find television very educating. Every time somebody turns on the set, I go into the other room and read a book.", "author": "Groucho Marx", "style": "wit"},
|
| 63 |
+
{"text": "A child of five would understand this. Send someone to fetch a child of five.", "author": "Groucho Marx", "style": "absurd"},
|
| 64 |
+
|
| 65 |
+
# --- W.C. Fields ---
|
| 66 |
+
{"text": "I cook with wine, sometimes I even add it to the food.", "author": "W.C. Fields", "style": "one-liner"},
|
| 67 |
+
{"text": "I never drink water because of the disgusting things that fish do in it.", "author": "W.C. Fields", "style": "one-liner"},
|
| 68 |
+
{"text": "I distrust camels, and anyone else who can go a week without a drink.", "author": "W.C. Fields", "style": "one-liner"},
|
| 69 |
+
{"text": "Start every day off with a smile and get it over with.", "author": "W.C. Fields", "style": "one-liner"},
|
| 70 |
+
{"text": "A woman drove me to drink and I didn't even have the decency to thank her.", "author": "W.C. Fields", "style": "one-liner"},
|
| 71 |
+
{"text": "I am free of all prejudice. I hate everyone equally.", "author": "W.C. Fields", "style": "one-liner"},
|
| 72 |
+
{"text": "It ain't what they call you, it's what you answer to.", "author": "W.C. Fields", "style": "wit"},
|
| 73 |
+
{"text": "Horse sense is the thing a horse has which keeps it from betting on people.", "author": "W.C. Fields", "style": "wit"},
|
| 74 |
+
|
| 75 |
+
# --- Spike Milligan ---
|
| 76 |
+
{"text": "All I ask is the chance to prove that money can't make me happy.", "author": "Spike Milligan", "style": "one-liner"},
|
| 77 |
+
{"text": "I have the body of an eighteen year old. I keep it in the fridge.", "author": "Spike Milligan", "style": "dark"},
|
| 78 |
+
{"text": "I thought I'd begin by reading a poem by Shakespeare, but then I thought, why should I? He never reads any of mine.", "author": "Spike Milligan", "style": "absurd"},
|
| 79 |
+
{"text": "And God said, 'Let there be light' and there was light, but the Electricity Board said He would have to wait until Thursday to be connected.", "author": "Spike Milligan", "style": "absurd"},
|
| 80 |
+
{"text": "Said Hamlet to Ophelia, I'll draw a sketch of thee. What kind of pencil shall I use? 2B or not 2B?", "author": "Spike Milligan", "style": "wordplay"},
|
| 81 |
+
{"text": "Is there anything worn under the kilt? No, it's all in perfect working order.", "author": "Spike Milligan", "style": "wordplay"},
|
| 82 |
+
{"text": "My father had a profound influence on me. He was a lunatic.", "author": "Spike Milligan", "style": "one-liner"},
|
| 83 |
+
{"text": "Money can't buy you happiness but it does bring you a more pleasant form of misery.", "author": "Spike Milligan", "style": "wit"},
|
| 84 |
+
|
| 85 |
+
# --- P.G. Wodehouse ---
|
| 86 |
+
{"text": "He had just about enough intelligence to open his mouth when he wanted to eat, but certainly no more.", "author": "P.G. Wodehouse", "style": "wit"},
|
| 87 |
+
{"text": "He had the look of one who had drunk the cup of life and found a dead beetle at the bottom.", "author": "P.G. Wodehouse", "style": "wit"},
|
| 88 |
+
{"text": "He was a tubby little chap who looked as if he had been poured into his clothes and had forgotten to say 'When!'", "author": "P.G. Wodehouse", "style": "wit"},
|
| 89 |
+
{"text": "There is only one cure for grey hair. It was invented by a Frenchman. It is called the guillotine.", "author": "P.G. Wodehouse", "style": "dark"},
|
| 90 |
+
{"text": "I could see that, if not actually disgruntled, he was far from being gruntled.", "author": "P.G. Wodehouse", "style": "wordplay"},
|
| 91 |
+
{"text": "She had a penetrating sort of laugh. Rather like a train going into a tunnel.", "author": "P.G. Wodehouse", "style": "wit"},
|
| 92 |
+
{"text": "The fascination of shooting as a sport depends almost wholly on whether you are at the right or wrong end of the gun.", "author": "P.G. Wodehouse", "style": "wit"},
|
| 93 |
+
{"text": "It is a good rule in life never to apologize. The right sort of people do not want apologies, and the wrong sort take a mean advantage of them.", "author": "P.G. Wodehouse", "style": "wit"},
|
| 94 |
+
|
| 95 |
+
# =========================================================================
|
| 96 |
+
# STAND-UP LEGENDS
|
| 97 |
+
# =========================================================================
|
| 98 |
+
|
| 99 |
+
# --- Bill Hicks ---
|
| 100 |
+
{"text": "It's always funny until someone gets hurt. Then it's just hilarious.", "author": "Bill Hicks", "style": "dark"},
|
| 101 |
+
{"text": "I don't mean to sound bitter, cold, or cruel, but I am, so that's how it comes out.", "author": "Bill Hicks", "style": "one-liner"},
|
| 102 |
+
{"text": "Children are smarter than any of us. You know how I know that? I don't know one child with a full time job and children.", "author": "Bill Hicks", "style": "observational"},
|
| 103 |
+
{"text": "I believe God created me in one day. Yeah, looks like He rushed it.", "author": "Bill Hicks", "style": "self-deprecating"},
|
| 104 |
+
{"text": "I never got along with my dad. Kids used to come up to me and say, 'My dad can beat up your dad.' I'd say, 'Yeah? When?'", "author": "Bill Hicks", "style": "dark"},
|
| 105 |
+
{"text": "We all pay for life with death, so everything in between should be free.", "author": "Bill Hicks", "style": "observational"},
|
| 106 |
+
{"text": "When two or more people agree on an issue, I form on the other side.", "author": "Bill Hicks", "style": "one-liner"},
|
| 107 |
+
{"text": "People say 'Iraq had the fourth largest army in the world.' Yeah, maybe, but you know what, after the first three largest armies, there's a real big drop-off.", "author": "Bill Hicks", "style": "observational"},
|
| 108 |
+
|
| 109 |
+
# --- Mitch Hedberg ---
|
| 110 |
+
{"text": "I used to do drugs. I still do, but I used to, too.", "author": "Mitch Hedberg", "style": "one-liner"},
|
| 111 |
+
{"text": "My fake plants died because I did not pretend to water them.", "author": "Mitch Hedberg", "style": "deadpan"},
|
| 112 |
+
{"text": "An escalator can never break: it can only become stairs. You should never see an 'Escalator Temporarily Out Of Order' sign, just 'Escalator Temporarily Stairs. Sorry for the convenience.'", "author": "Mitch Hedberg", "style": "observational"},
|
| 113 |
+
{"text": "I'm against picketing, but I don't know how to show it.", "author": "Mitch Hedberg", "style": "deadpan"},
|
| 114 |
+
{"text": "I wear a necklace, cause I wanna know when I'm upside down.", "author": "Mitch Hedberg", "style": "absurd"},
|
| 115 |
+
{"text": "Dogs are forever in the push-up position.", "author": "Mitch Hedberg", "style": "observational"},
|
| 116 |
+
{"text": "A burrito is a sleeping bag for ground beef.", "author": "Mitch Hedberg", "style": "one-liner"},
|
| 117 |
+
{"text": "Fettuccine alfredo is macaroni and cheese for adults.", "author": "Mitch Hedberg", "style": "observational"},
|
| 118 |
+
{"text": "Every book is a children's book if the kid can read.", "author": "Mitch Hedberg", "style": "deadpan"},
|
| 119 |
+
{"text": "Rice is great if you're really hungry and want to eat two thousand of something.", "author": "Mitch Hedberg", "style": "deadpan"},
|
| 120 |
+
|
| 121 |
+
# --- Steven Wright ---
|
| 122 |
+
{"text": "I intend to live forever. So far, so good.", "author": "Steven Wright", "style": "deadpan"},
|
| 123 |
+
{"text": "A lot of people are afraid of heights. Not me, I'm afraid of widths.", "author": "Steven Wright", "style": "deadpan"},
|
| 124 |
+
{"text": "I went to a restaurant that serves 'breakfast at any time.' So I ordered French Toast during the Renaissance.", "author": "Steven Wright", "style": "absurd"},
|
| 125 |
+
{"text": "I bought some batteries, but they weren't included.", "author": "Steven Wright", "style": "deadpan"},
|
| 126 |
+
{"text": "Everywhere is within walking distance if you have the time.", "author": "Steven Wright", "style": "deadpan"},
|
| 127 |
+
{"text": "Right now I'm having amnesia and deja vu at the same time. I think I've forgotten this before.", "author": "Steven Wright", "style": "deadpan"},
|
| 128 |
+
{"text": "I put instant coffee in a microwave oven and almost went back in time.", "author": "Steven Wright", "style": "absurd"},
|
| 129 |
+
{"text": "I bought a humidifier and a dehumidifier. I put them in the same room and let them fight it out.", "author": "Steven Wright", "style": "absurd"},
|
| 130 |
+
{"text": "There's a fine line between fishing and just standing on the shore like an idiot.", "author": "Steven Wright", "style": "deadpan"},
|
| 131 |
+
{"text": "Experience is something you don't get until just after you need it.", "author": "Steven Wright", "style": "deadpan"},
|
| 132 |
+
|
| 133 |
+
# --- Rodney Dangerfield ---
|
| 134 |
+
{"text": "I get no respect. The way my luck is running, if I was a politician I would be honest.", "author": "Rodney Dangerfield", "style": "self-deprecating"},
|
| 135 |
+
{"text": "I told my psychiatrist that everyone hates me. He said I was being ridiculous — everyone hasn't met me yet.", "author": "Rodney Dangerfield", "style": "self-deprecating"},
|
| 136 |
+
{"text": "I was such an ugly kid. When I played in the sandbox the cat kept covering me up.", "author": "Rodney Dangerfield", "style": "self-deprecating"},
|
| 137 |
+
{"text": "My wife made me join a bridge club. I jump off next Tuesday.", "author": "Rodney Dangerfield", "style": "dark"},
|
| 138 |
+
{"text": "I looked up my family tree and found three dogs using it.", "author": "Rodney Dangerfield", "style": "self-deprecating"},
|
| 139 |
+
{"text": "When I was born I was so ugly the doctor slapped my mother.", "author": "Rodney Dangerfield", "style": "self-deprecating"},
|
| 140 |
+
{"text": "My wife and I were happy for twenty years. Then we met.", "author": "Rodney Dangerfield", "style": "one-liner"},
|
| 141 |
+
{"text": "I could tell that my parents hated me. My bath toys were a toaster and a radio.", "author": "Rodney Dangerfield", "style": "dark"},
|
| 142 |
+
{"text": "I went to a fight the other night, and a hockey game broke out.", "author": "Rodney Dangerfield", "style": "one-liner"},
|
| 143 |
+
|
| 144 |
+
# --- Henny Youngman ---
|
| 145 |
+
{"text": "Take my wife — please.", "author": "Henny Youngman", "style": "one-liner"},
|
| 146 |
+
{"text": "A doctor gave a man six months to live. The man couldn't pay his bill, so the doctor gave him another six months.", "author": "Henny Youngman", "style": "one-liner"},
|
| 147 |
+
{"text": "I take my wife everywhere, but she keeps finding her way back.", "author": "Henny Youngman", "style": "one-liner"},
|
| 148 |
+
{"text": "My wife's cooking is so bad, the flies pitched in to fix the screen door.", "author": "Henny Youngman", "style": "one-liner"},
|
| 149 |
+
{"text": "My wife said to me, 'For our anniversary I want to go somewhere I've never been before.' I said, 'Try the kitchen.'", "author": "Henny Youngman", "style": "one-liner"},
|
| 150 |
+
{"text": "A man goes to a psychiatrist. The doctor says, 'You're crazy.' The man says, 'I want a second opinion.' The doctor says, 'Okay, you're ugly too.'", "author": "Henny Youngman", "style": "one-liner"},
|
| 151 |
+
{"text": "The secret of a happy marriage remains a secret.", "author": "Henny Youngman", "style": "one-liner"},
|
| 152 |
+
{"text": "When I read about the evils of drinking, I gave up reading.", "author": "Henny Youngman", "style": "one-liner"},
|
| 153 |
+
|
| 154 |
+
# --- Joan Rivers ---
|
| 155 |
+
{"text": "People say that money is not the key to happiness, but I always figured if you have enough money, you can have a key made.", "author": "Joan Rivers", "style": "one-liner"},
|
| 156 |
+
{"text": "I wish I had a twin, so I could know what I'd look like without plastic surgery.", "author": "Joan Rivers", "style": "self-deprecating"},
|
| 157 |
+
{"text": "I've had so much plastic surgery, when I die they will donate my body to Tupperware.", "author": "Joan Rivers", "style": "self-deprecating"},
|
| 158 |
+
{"text": "I blame my mother for my poor sex life. All she told me was 'the man goes on top and the woman underneath.' For three years my husband and I slept in bunk beds.", "author": "Joan Rivers", "style": "one-liner"},
|
| 159 |
+
{"text": "I don't exercise. If God had wanted me to bend over, he would have put diamonds on the floor.", "author": "Joan Rivers", "style": "one-liner"},
|
| 160 |
+
{"text": "At my funeral, I want Meryl Streep crying in five different accents.", "author": "Joan Rivers", "style": "one-liner"},
|
| 161 |
+
{"text": "My husband wanted to be cremated. I told him I'd scatter his ashes at Neiman Marcus — that way, I'd visit him every day.", "author": "Joan Rivers", "style": "dark"},
|
| 162 |
+
{"text": "I was so ugly that my parents sent my picture to Ripley's Believe It or Not — they sent it back and said, 'We don't believe it.'", "author": "Joan Rivers", "style": "self-deprecating"},
|
| 163 |
+
|
| 164 |
+
# --- George Carlin ---
|
| 165 |
+
{"text": "Have you ever noticed that anybody driving slower than you is an idiot, and anyone going faster than you is a maniac?", "author": "George Carlin", "style": "observational"},
|
| 166 |
+
{"text": "'I am' is reportedly the shortest sentence in the English language. Could it be that 'I do' is the longest sentence?", "author": "George Carlin", "style": "wordplay"},
|
| 167 |
+
{"text": "Don't sweat the petty things and don't pet the sweaty things.", "author": "George Carlin", "style": "wordplay"},
|
| 168 |
+
{"text": "I went to a bookstore and asked the saleswoman, 'Where's the self-help section?' She said if she told me, it would defeat the purpose.", "author": "George Carlin", "style": "observational"},
|
| 169 |
+
{"text": "The reason I talk to myself is that I'm the only one whose answers I accept.", "author": "George Carlin", "style": "one-liner"},
|
| 170 |
+
{"text": "Inside every cynical person, there is a disappointed idealist.", "author": "George Carlin", "style": "observational"},
|
| 171 |
+
{"text": "When you're born you get a ticket to the freak show. When you're born in America, you get a front row seat.", "author": "George Carlin", "style": "observational"},
|
| 172 |
+
{"text": "That's why they call it the American Dream, because you have to be asleep to believe it.", "author": "George Carlin", "style": "observational"},
|
| 173 |
+
|
| 174 |
+
# --- Richard Pryor ---
|
| 175 |
+
{"text": "I'm not addicted to cocaine. I just like the way it smells.", "author": "Richard Pryor", "style": "one-liner"},
|
| 176 |
+
{"text": "I had to stop drinking, 'cause I got tired of waking up in my car driving ninety.", "author": "Richard Pryor", "style": "dark"},
|
| 177 |
+
{"text": "Marriage is really tough because you have to deal with feelings... and lawyers.", "author": "Richard Pryor", "style": "one-liner"},
|
| 178 |
+
{"text": "Who you gonna believe, me or your lying eyes?", "author": "Richard Pryor", "style": "one-liner"},
|
| 179 |
+
{"text": "I went through every phone book in Africa, and I didn't find one goddamn Pryor!", "author": "Richard Pryor", "style": "observational"},
|
| 180 |
+
{"text": "When that fire hit your ass, that will sober your ass up quick! I saw something, I went 'Well, that's a pretty blue. You know what? THAT'S MY ARM!'", "author": "Richard Pryor", "style": "dark"},
|
| 181 |
+
|
| 182 |
+
# --- Robin Williams ---
|
| 183 |
+
{"text": "Never pick a fight with an ugly person, they've got nothing to lose.", "author": "Robin Williams", "style": "one-liner"},
|
| 184 |
+
{"text": "If it's the Psychic Network, why do they need a phone number?", "author": "Robin Williams", "style": "observational"},
|
| 185 |
+
{"text": "Ah, yes, divorce... from the Latin word meaning to rip out a man's genitals through his wallet.", "author": "Robin Williams", "style": "dark"},
|
| 186 |
+
{"text": "Politics: 'Poli' a Latin word meaning 'many'; and 'tics' meaning 'bloodsucking creatures'.", "author": "Robin Williams", "style": "wordplay"},
|
| 187 |
+
{"text": "Why do they call it rush hour when nothing moves?", "author": "Robin Williams", "style": "observational"},
|
| 188 |
+
{"text": "If women ran the world, we wouldn't have wars, just intense negotiations every 28 days.", "author": "Robin Williams", "style": "observational"},
|
| 189 |
+
{"text": "Cricket is basically baseball on valium.", "author": "Robin Williams", "style": "one-liner"},
|
| 190 |
+
{"text": "Spring is nature's way of saying, 'Let's party!'", "author": "Robin Williams", "style": "one-liner"},
|
| 191 |
+
|
| 192 |
+
# =========================================================================
|
| 193 |
+
# BRITISH / IRISH COMEDY
|
| 194 |
+
# =========================================================================
|
| 195 |
+
|
| 196 |
+
# --- Chic Murray ---
|
| 197 |
+
{"text": "It's a small world, but I wouldn't want to have to paint it.", "author": "Chic Murray", "style": "deadpan"},
|
| 198 |
+
{"text": "I met this cowboy with a brown paper hat, paper waistcoat and paper trousers. He was wanted for rustling.", "author": "Chic Murray", "style": "wordplay"},
|
| 199 |
+
{"text": "My mother was so house proud that when my father got up to sleepwalk she had the bed made by the time he got back.", "author": "Chic Murray", "style": "absurd"},
|
| 200 |
+
{"text": "I first met my wife in the tunnel of love. She was digging it at the time.", "author": "Chic Murray", "style": "deadpan"},
|
| 201 |
+
{"text": "My father was a simple man. My mother was a simple woman. You see the result standing in front of you — a simpleton.", "author": "Chic Murray", "style": "self-deprecating"},
|
| 202 |
+
{"text": "I rang the bell of a small bed and breakfast place. A lady appeared at an upstairs window. 'What do you want?' 'I want to stay here.' 'Well, stay there then,' and she shut the window.", "author": "Chic Murray", "style": "deadpan"},
|
| 203 |
+
{"text": "We were so poor, the ultimate luxury in our house was ashtrays without advertisements.", "author": "Chic Murray", "style": "deadpan"},
|
| 204 |
+
|
| 205 |
+
# --- Tommy Cooper ---
|
| 206 |
+
{"text": "I used to be indecisive but now I am not quite sure.", "author": "Tommy Cooper", "style": "one-liner"},
|
| 207 |
+
{"text": "Last night I slept like a log. I woke up in the fireplace.", "author": "Tommy Cooper", "style": "one-liner"},
|
| 208 |
+
{"text": "I went to buy some camouflage trousers the other day but I couldn't find any.", "author": "Tommy Cooper", "style": "one-liner"},
|
| 209 |
+
{"text": "Last night I dreamt I ate a ten-pound marshmallow. When I woke up the pillow was gone.", "author": "Tommy Cooper", "style": "one-liner"},
|
| 210 |
+
{"text": "Police arrested two kids yesterday, one was drinking battery acid, the other was eating fireworks. They charged one and let the other one off.", "author": "Tommy Cooper", "style": "wordplay"},
|
| 211 |
+
{"text": "Somebody complimented me on my driving today. They left a little note on the windscreen. It said 'Parking Fine.' So that was nice.", "author": "Tommy Cooper", "style": "one-liner"},
|
| 212 |
+
{"text": "I'm on a whiskey diet. I've lost three days already.", "author": "Tommy Cooper", "style": "one-liner"},
|
| 213 |
+
{"text": "I went window shopping today. I bought four windows.", "author": "Tommy Cooper", "style": "one-liner"},
|
| 214 |
+
{"text": "Gambling has brought our family together. We had to move to a smaller house.", "author": "Tommy Cooper", "style": "one-liner"},
|
| 215 |
+
|
| 216 |
+
# --- Billy Connolly ---
|
| 217 |
+
{"text": "Never trust a man who, when left alone in a room with a tea cosy, doesn't try it on.", "author": "Billy Connolly", "style": "observational"},
|
| 218 |
+
{"text": "Marriage is a wonderful invention: then again, so is a bicycle repair kit.", "author": "Billy Connolly", "style": "one-liner"},
|
| 219 |
+
{"text": "There are two seasons in Scotland: June and Winter.", "author": "Billy Connolly", "style": "observational"},
|
| 220 |
+
{"text": "My definition of an intellectual is someone who can listen to the William Tell Overture without thinking of the Lone Ranger.", "author": "Billy Connolly", "style": "observational"},
|
| 221 |
+
{"text": "Who discovered we could get milk from cows, and what did he think he was doing at the time?", "author": "Billy Connolly", "style": "observational"},
|
| 222 |
+
{"text": "Before you judge a man, walk a mile in his shoes. After that who cares? He's a mile away and you've got his shoes!", "author": "Billy Connolly", "style": "one-liner"},
|
| 223 |
+
{"text": "A lot of people say it's a lack of vocabulary that makes you swear. Rubbish. I know thousands of words but I still prefer 'f***.'", "author": "Billy Connolly", "style": "observational"},
|
| 224 |
+
{"text": "If Jesus was a Jew, how come he has a Mexican first name?", "author": "Billy Connolly", "style": "observational"},
|
| 225 |
+
|
| 226 |
+
# --- Peter Kay ---
|
| 227 |
+
{"text": "A cement mixer collided with a prison van. Motorists are asked to be on the lookout for sixteen hardened criminals.", "author": "Peter Kay", "style": "wordplay"},
|
| 228 |
+
{"text": "I think animal testing is a terrible idea; they get all nervous and give the wrong answers.", "author": "Peter Kay", "style": "wordplay"},
|
| 229 |
+
{"text": "So this bloke says to me, 'Can I come in your house and talk about your carpets?' I thought, 'That's all I need — a Jehoover's Witness.'", "author": "Peter Kay", "style": "wordplay"},
|
| 230 |
+
{"text": "It's impossible to look cool whilst picking up a Frisbee.", "author": "Peter Kay", "style": "observational"},
|
| 231 |
+
{"text": "Garlic bread — it's the future, I've tasted it.", "author": "Peter Kay", "style": "observational"},
|
| 232 |
+
{"text": "The most painful household accident is wearing socks and stepping on an upturned plug.", "author": "Peter Kay", "style": "observational"},
|
| 233 |
+
{"text": "Do you know that look women get when they want sex? Me neither.", "author": "Peter Kay", "style": "self-deprecating"},
|
| 234 |
+
|
| 235 |
+
# --- Tim Vine ---
|
| 236 |
+
{"text": "I've just been on a once-in-a-lifetime holiday. I'll tell you what, never again.", "author": "Tim Vine", "style": "wordplay"},
|
| 237 |
+
{"text": "Crime in multi-storey car parks. That is wrong on so many different levels.", "author": "Tim Vine", "style": "wordplay"},
|
| 238 |
+
{"text": "Velcro: what a rip-off.", "author": "Tim Vine", "style": "wordplay"},
|
| 239 |
+
{"text": "I've decided to sell my Hoover... well, it was just collecting dust.", "author": "Tim Vine", "style": "wordplay"},
|
| 240 |
+
{"text": "I was reading a book — 'The History of Glue' — I couldn't put it down.", "author": "Tim Vine", "style": "wordplay"},
|
| 241 |
+
{"text": "'Can you teach me to do the splits?' 'How flexible are you?' 'I can't make Tuesdays.'", "author": "Tim Vine", "style": "wordplay"},
|
| 242 |
+
{"text": "I went out on a date with Simile. I don't know what I metaphor.", "author": "Tim Vine", "style": "wordplay"},
|
| 243 |
+
{"text": "My next-door neighbour worships exhaust pipes, he's a catholic converter.", "author": "Tim Vine", "style": "wordplay"},
|
| 244 |
+
{"text": "Exit signs — they're on the way out.", "author": "Tim Vine", "style": "wordplay"},
|
| 245 |
+
{"text": "So I rang up my local swimming baths. I said, 'Is that the local swimming baths?' He said, 'It depends where you're calling from.'", "author": "Tim Vine", "style": "wordplay"},
|
| 246 |
+
|
| 247 |
+
# --- Milton Jones ---
|
| 248 |
+
{"text": "To the man on crutches, dressed in camouflage, who stole my wallet: you can hide but you can't run.", "author": "Milton Jones", "style": "wordplay"},
|
| 249 |
+
{"text": "About a month before he died, my grandfather, we covered his back full of lard. After that he went downhill very quickly.", "author": "Milton Jones", "style": "wordplay"},
|
| 250 |
+
{"text": "The worst job I ever had was as a forensicologist for the United Nations. One time I thought I'd come across the mass grave of a thousand snowmen. But it turned out it was just a field of carrots.", "author": "Milton Jones", "style": "absurd"},
|
| 251 |
+
{"text": "Years ago I used to supply Filofaxes for the Mafia. Yes, I was involved in very organised crime.", "author": "Milton Jones", "style": "wordplay"},
|
| 252 |
+
{"text": "Recently I've been attending meetings of Eavesdroppers Anonymous — not that they know!", "author": "Milton Jones", "style": "one-liner"},
|
| 253 |
+
{"text": "My grandfather invented the cold air balloon. It never really took off.", "author": "Milton Jones", "style": "wordplay"},
|
| 254 |
+
{"text": "On the other hand... you have different fingers.", "author": "Milton Jones", "style": "absurd"},
|
| 255 |
+
|
| 256 |
+
# --- Lee Mack ---
|
| 257 |
+
{"text": "I remember the last thing my nan said to me before she died. 'What are you doing here with that hammer?'", "author": "Lee Mack", "style": "dark"},
|
| 258 |
+
{"text": "I was in Waterstones the other day trying to buy a world map. The shop assistant said, 'Oh, we don't do them any more, try WH Smith.' I said, 'Blimey, how big is WH Smith?'", "author": "Lee Mack", "style": "wordplay"},
|
| 259 |
+
{"text": "I used to go out with an anesthetist — she was a local girl.", "author": "Lee Mack", "style": "wordplay"},
|
| 260 |
+
{"text": "A friend of mine always wanted to be run over by a steam train. When it happened, he was chuffed to bits.", "author": "Lee Mack", "style": "dark"},
|
| 261 |
+
{"text": "I found out my friend had a secret life as a priest. It's his altar ego.", "author": "Lee Mack", "style": "wordplay"},
|
| 262 |
+
{"text": "People who live in glass houses have to answer the door.", "author": "Lee Mack", "style": "one-liner"},
|
| 263 |
+
|
| 264 |
+
# --- Stewart Francis ---
|
| 265 |
+
{"text": "You know who really gives kids a bad name? Posh and Becks.", "author": "Stewart Francis", "style": "wordplay"},
|
| 266 |
+
{"text": "My dad has a weird hobby: he collects empty bottles — which sounds so much better than 'alcoholic.'", "author": "Stewart Francis", "style": "one-liner"},
|
| 267 |
+
{"text": "So what if I can't spell Armageddon? It's not the end of the world.", "author": "Stewart Francis", "style": "wordplay"},
|
| 268 |
+
{"text": "Even though I'm proud my dad invented the rear-view mirror, we're not as close as we appear.", "author": "Stewart Francis", "style": "wordplay"},
|
| 269 |
+
{"text": "My uncle was crushed by a piano. His funeral was very low key.", "author": "Stewart Francis", "style": "dark"},
|
| 270 |
+
{"text": "Have you ever imagined a world with no hypothetical situations?", "author": "Stewart Francis", "style": "deadpan"},
|
| 271 |
+
{"text": "I quit my job at the helium gas factory. I refused to be spoken to in that tone.", "author": "Stewart Francis", "style": "wordplay"},
|
| 272 |
+
|
| 273 |
+
# --- Gary Delaney ---
|
| 274 |
+
{"text": "I told my wife she was drawing her eyebrows too high. She looked surprised.", "author": "Gary Delaney", "style": "wordplay"},
|
| 275 |
+
{"text": "We can't even afford a garden, so when my wife bought us a trampoline I hit the roof.", "author": "Gary Delaney", "style": "wordplay"},
|
| 276 |
+
{"text": "I bought a ceiling fan. Complete waste of money. He just stood there clapping.", "author": "Gary Delaney", "style": "wordplay"},
|
| 277 |
+
{"text": "My therapist says I have a preoccupation with vengeance. We'll see about that.", "author": "Gary Delaney", "style": "dark"},
|
| 278 |
+
{"text": "I can give you the cause of anaphylactic shock in a nutshell.", "author": "Gary Delaney", "style": "wordplay"},
|
| 279 |
+
{"text": "Dave drowned. So at the funeral we got him a wreath in the shape of a lifebelt. Well, it's what he would have wanted.", "author": "Gary Delaney", "style": "dark"},
|
| 280 |
+
|
| 281 |
+
# --- Jimmy Carr ---
|
| 282 |
+
{"text": "Swimming is good for you, especially if you're drowning. Not only do you get a cardiovascular workout but also you don't die.", "author": "Jimmy Carr", "style": "dark"},
|
| 283 |
+
{"text": "I realized I was dyslexic when I went to a toga party dressed as a goat.", "author": "Jimmy Carr", "style": "wordplay"},
|
| 284 |
+
{"text": "When I was a kid, I used to have an imaginary friend. I used to think he went everywhere with me. Turns out he was just a kid with no friends following me around the playground.", "author": "Jimmy Carr", "style": "dark"},
|
| 285 |
+
{"text": "If only Africa had more mosquito nets, then every year we could save millions of mosquitoes from dying needlessly of AIDS.", "author": "Jimmy Carr", "style": "dark"},
|
| 286 |
+
{"text": "Throw acid on a woman's face, and no one bats an eye. Throw acid on the floor, and everyone loses their mind.", "author": "Jimmy Carr", "style": "dark"},
|
| 287 |
+
{"text": "My girlfriend said she wanted me to tease her, so I said 'Alright, fatty.'", "author": "Jimmy Carr", "style": "dark"},
|
| 288 |
+
{"text": "I say no to drugs, but they don't listen.", "author": "Jimmy Carr", "style": "one-liner"},
|
| 289 |
+
|
| 290 |
+
# =========================================================================
|
| 291 |
+
# EDINBURGH FRINGE JOKE AWARD WINNERS (Dave's Funniest Joke)
|
| 292 |
+
# =========================================================================
|
| 293 |
+
|
| 294 |
+
{"text": "I can't believe Amy Winehouse self-harms. She's so irritating she must be able to find someone to do it for her.", "author": "Zoe Lyons (Edinburgh Fringe Winner 2008)", "style": "dark"},
|
| 295 |
+
{"text": "Hedgehogs — why can't they just share the hedge?", "author": "Dan Antopolski (Edinburgh Fringe Winner 2009)", "style": "absurd"},
|
| 296 |
+
# Tim Vine 2010 — already included in Tim Vine section above
|
| 297 |
+
{"text": "I needed a password eight characters long so I picked Snow White and the Seven Dwarves.", "author": "Nick Helm (Edinburgh Fringe Winner 2011)", "style": "wordplay"},
|
| 298 |
+
# Stewart Francis 2012 — already included in Stewart Francis section above
|
| 299 |
+
{"text": "I heard a rumour that Cadbury is bringing out an oriental chocolate bar. Could be a Chinese Wispa.", "author": "Rob Auton (Edinburgh Fringe Winner 2013)", "style": "wordplay"},
|
| 300 |
+
# Tim Vine 2014 — already included in Tim Vine section above
|
| 301 |
+
{"text": "I just deleted all the German names off my phone. It's Hans free.", "author": "Darren Walsh (Edinburgh Fringe Winner 2015)", "style": "wordplay"},
|
| 302 |
+
{"text": "My dad has suggested that I register for a donor card. He's a man after my own heart.", "author": "Masai Graham (Edinburgh Fringe Winner 2016)", "style": "wordplay"},
|
| 303 |
+
{"text": "I'm not a fan of the new pound coin, but then again, I hate all change.", "author": "Ken Cheng (Edinburgh Fringe Winner 2017)", "style": "wordplay"},
|
| 304 |
+
{"text": "Working at the job centre has to be a tense job — knowing that if you get fired, you still have to come in the next day.", "author": "Adam Rowe (Edinburgh Fringe Winner 2018)", "style": "observational"},
|
| 305 |
+
{"text": "I keep randomly shouting out 'Broccoli' and 'Cauliflower' — I think I might have florets.", "author": "Olaf Falafel (Edinburgh Fringe Winner 2019)", "style": "wordplay"},
|
| 306 |
+
# 2020: No award (COVID-19 pandemic, Fringe cancelled)
|
| 307 |
+
{"text": "I thought the word 'Caesarean' began with the letter S, but when I looked in the dictionary, it was in the C section.", "author": "Masai Graham (Edinburgh Fringe Winner 2021)", "style": "wordplay"},
|
| 308 |
+
{"text": "I tried to steal spaghetti from the shop, but the female guard saw me and I couldn't get pasta.", "author": "Masai Graham (Edinburgh Fringe Winner 2022)", "style": "wordplay"},
|
| 309 |
+
{"text": "I started dating a zookeeper, but it turned out he was a cheetah.", "author": "Lorna Rose Treen (Edinburgh Fringe Winner 2023)", "style": "wordplay"},
|
| 310 |
+
{"text": "I was going to sail around the globe in the world's smallest ship but I bottled it.", "author": "Mark Simmons (Edinburgh Fringe Winner 2024)", "style": "wordplay"},
|
| 311 |
+
{"text": "I had to visit the trauma unit last weekend. He prefers the term dad.", "author": "Andy Gleeks (Edinburgh Fringe Winner 2025)", "style": "wordplay"},
|
| 312 |
+
|
| 313 |
+
# =========================================================================
|
| 314 |
+
# MODERN STAND-UP
|
| 315 |
+
# =========================================================================
|
| 316 |
+
|
| 317 |
+
# --- Demetri Martin ---
|
| 318 |
+
{"text": "Employee of the month is a good example of how somebody can be both a winner and a loser at the same time.", "author": "Demetri Martin", "style": "deadpan"},
|
| 319 |
+
{"text": "I wanna make a jigsaw puzzle that's 40,000 pieces. And when you finish it, it says 'go outside.'", "author": "Demetri Martin", "style": "deadpan"},
|
| 320 |
+
{"text": "I wrapped my Christmas presents early this year, but I used the wrong paper. See, the paper I used said 'Happy Birthday' on it. I didn't want to waste it so I just wrote 'Jesus' on it.", "author": "Demetri Martin", "style": "absurd"},
|
| 321 |
+
{"text": "I like parties, but I don't like pinatas because the pinata promotes violence against flamboyant animals.", "author": "Demetri Martin", "style": "absurd"},
|
| 322 |
+
{"text": "A lot of people don't like bumper stickers. I don't mind bumper stickers. To me, a bumper sticker is a shortcut. It's like a little sign that says 'Hey, let's never hang out.'", "author": "Demetri Martin", "style": "observational"},
|
| 323 |
+
{"text": "I think the worst time to have a heart attack is during a game of charades.", "author": "Demetri Martin", "style": "deadpan"},
|
| 324 |
+
{"text": "The digital camera is a great invention because it allows us to reminisce. Instantly.", "author": "Demetri Martin", "style": "deadpan"},
|
| 325 |
+
{"text": "Saying 'I'm sorry' is the same as saying 'I apologize.' Except at a funeral.", "author": "Demetri Martin", "style": "dark"},
|
| 326 |
+
|
| 327 |
+
# --- Anthony Jeselnik ---
|
| 328 |
+
{"text": "I've spent the past two years looking for my ex-girlfriend's killer... but no one will do it.", "author": "Anthony Jeselnik", "style": "dark"},
|
| 329 |
+
{"text": "Whenever I meet a pretty girl, the first thing I look for is intelligence; because if she doesn't have that, she's mine.", "author": "Anthony Jeselnik", "style": "dark"},
|
| 330 |
+
{"text": "My uncle killed himself playing Russian Roulette. But I choose to remember him as a great Russian Roulette player.", "author": "Anthony Jeselnik", "style": "dark"},
|
| 331 |
+
{"text": "I let my roommate pick the movie last night. And halfway through, I was like, 'Dude, Paul Blart: Mall Cop 2? Again?'", "author": "Anthony Jeselnik", "style": "deadpan"},
|
| 332 |
+
{"text": "My girlfriend's mad at me because I got her a t-shirt that says 'I'm With Stupid.' It doesn't help that I got it for her funeral.", "author": "Anthony Jeselnik", "style": "dark"},
|
| 333 |
+
{"text": "I told my friend that she was drawing her eyebrows too high. She looked surprised.", "author": "Anthony Jeselnik", "style": "wordplay"},
|
| 334 |
+
{"text": "My mom said that the best day of her life was the day I was born. I said, 'You must have had a terrible life.'", "author": "Anthony Jeselnik", "style": "dark"},
|
| 335 |
+
{"text": "When you are lonely, dimming the lights and playing soft jazz music isn't going to help. It just makes it weird for the pizza guy.", "author": "Anthony Jeselnik", "style": "dark"},
|
| 336 |
+
|
| 337 |
+
# --- Nate Bargatze ---
|
| 338 |
+
{"text": "In a marriage, one of you is a dreamer. 'Money's not real. Let's have fun.' And the other person hates fun. And that's how you make a marriage.", "author": "Nate Bargatze", "style": "observational"},
|
| 339 |
+
{"text": "My dad was a magician. Or as he liked to say, 'It goes clown, then magic. There's two steps. You can take them in either order.'", "author": "Nate Bargatze", "style": "deadpan"},
|
| 340 |
+
{"text": "What's easier than working out is just telling people you used to weigh 300 pounds. They don't know. I've never weighed 300 pounds, but I look pretty amazing for a guy who used to weigh 300.", "author": "Nate Bargatze", "style": "deadpan"},
|
| 341 |
+
{"text": "My wife and I don't have the same taste in anything. The only thing we've ever agreed on is getting married, and we were both wrong.", "author": "Nate Bargatze", "style": "deadpan"},
|
| 342 |
+
{"text": "Do you know the difference between a dentist and a sadist? Newer magazines.", "author": "Nate Bargatze", "style": "one-liner"},
|
| 343 |
+
{"text": "I don't even know how to be healthy. Every time I hear something's good for me, the next day they say it's bad for me. I just eat what I want and hope for the best.", "author": "Nate Bargatze", "style": "observational"},
|
| 344 |
+
|
| 345 |
+
# --- Hannibal Buress ---
|
| 346 |
+
{"text": "When people go through something rough in life, they say, 'I'm taking it one day at a time.' Yes, so is everybody. Because that's how time works.", "author": "Hannibal Buress", "style": "deadpan"},
|
| 347 |
+
{"text": "I don't like when people say, 'I'll pray for you.' You're going to pray for me? So basically, you're going to sit at home and do nothing?", "author": "Hannibal Buress", "style": "observational"},
|
| 348 |
+
{"text": "Whenever I eat at a restaurant I never put the napkin in my lap. People say, 'Hannibal, why don't you put the napkin in your lap?' Because I believe in myself.", "author": "Hannibal Buress", "style": "deadpan"},
|
| 349 |
+
{"text": "I'm in awful shape. My girlfriend gives me health tips sometimes, like, 'Hannibal, you're going to die.'", "author": "Hannibal Buress", "style": "deadpan"},
|
| 350 |
+
{"text": "You gotta be careful with compliments. One wrong word and suddenly you're the weird guy at the party. 'Hey, nice earlobes.' I just wanted to be nice!", "author": "Hannibal Buress", "style": "observational"},
|
| 351 |
+
{"text": "Why would I go to a club on my birthday? So I can be surrounded by people who don't know and don't care?", "author": "Hannibal Buress", "style": "deadpan"},
|
| 352 |
+
|
| 353 |
+
# --- John Mulaney ---
|
| 354 |
+
{"text": "You could pour soup in my lap and I'd probably apologize to you.", "author": "John Mulaney", "style": "self-deprecating"},
|
| 355 |
+
{"text": "College was like a four-year game show called 'Do My Friends Hate Me or Do I Just Need to Go to Sleep?'", "author": "John Mulaney", "style": "observational"},
|
| 356 |
+
{"text": "In terms of instant relief, cancelling plans is like heroin.", "author": "John Mulaney", "style": "observational"},
|
| 357 |
+
{"text": "I don't look older, I just look worse.", "author": "John Mulaney", "style": "self-deprecating"},
|
| 358 |
+
{"text": "You are gathered together as a school, and you are told never to talk to an adult you don't know. And you are told this by an adult you don't know.", "author": "John Mulaney", "style": "observational"},
|
| 359 |
+
{"text": "I always thought quicksand was going to be a much bigger problem than it turned out to be.", "author": "John Mulaney", "style": "observational"},
|
| 360 |
+
{"text": "The one thing you can't replace is your reputation. Unless you never had one.", "author": "John Mulaney", "style": "deadpan"},
|
| 361 |
+
|
| 362 |
+
# --- Bo Burnham ---
|
| 363 |
+
{"text": "Laughter is the best medicine, you know, besides medicine.", "author": "Bo Burnham", "style": "deadpan"},
|
| 364 |
+
{"text": "He believed you should fight fire with fire. Which is a terrible way to live your life, because he was a firefighter.", "author": "Bo Burnham", "style": "dark"},
|
| 365 |
+
{"text": "If I had a dime for every time a homeless guy asked me for change, I'd still say no.", "author": "Bo Burnham", "style": "dark"},
|
| 366 |
+
{"text": "What do you call a kid with no arms and an eye patch? Names.", "author": "Bo Burnham", "style": "dark"},
|
| 367 |
+
{"text": "The average person has one Fallopian tube.", "author": "Bo Burnham", "style": "deadpan"},
|
| 368 |
+
{"text": "I know very little about anything, but I do know this: if you can live your life without an audience, you should do it.", "author": "Bo Burnham", "style": "observational"},
|
| 369 |
+
{"text": "My whole family thinks I'm gay. I keep telling them 'I'm not gay! I'm not gay!' And this isn't helping because I can't stop having sex with dudes.", "author": "Bo Burnham", "style": "dark"},
|
| 370 |
+
|
| 371 |
+
# --- Taylor Tomlinson ---
|
| 372 |
+
{"text": "Having bangs feels exactly like being on mushrooms. The whole time you're looking at your friends asking, 'Do I look weird?'", "author": "Taylor Tomlinson", "style": "observational"},
|
| 373 |
+
{"text": "You have to work on yourself in your 20s because if you don't, then you'll turn 30 and all the shitty parts of your personality will solidify and that'll just be who you are now.", "author": "Taylor Tomlinson", "style": "observational"},
|
| 374 |
+
{"text": "Being bipolar is like not knowing how to swim. It might be embarrassing to tell people, and it might be hard to take you certain places.", "author": "Taylor Tomlinson", "style": "observational"},
|
| 375 |
+
{"text": "I had to break up with my last boyfriend because he didn't know what he wanted. And he was like, 'How can you say that?' And I was like, 'You told me that.'", "author": "Taylor Tomlinson", "style": "observational"},
|
| 376 |
+
{"text": "People think it's brave to have children. It's actually braver to not, because every day someone asks you why.", "author": "Taylor Tomlinson", "style": "observational"},
|
| 377 |
+
{"text": "I lost my virginity at 22. To a youth pastor. Some of those words are more concerning depending on what order you put them in.", "author": "Taylor Tomlinson", "style": "dark"},
|
| 378 |
+
|
| 379 |
+
# =========================================================================
|
| 380 |
+
# AUSTRALIAN / NEW ZEALAND
|
| 381 |
+
# =========================================================================
|
| 382 |
+
|
| 383 |
+
# --- Tim Minchin ---
|
| 384 |
+
{"text": "You know what they call alternative medicine that's been proved to work? Medicine.", "author": "Tim Minchin", "style": "observational"},
|
| 385 |
+
{"text": "It's quite hard being right in the face of people who are wrong without sounding like a complete dickhead.", "author": "Tim Minchin", "style": "observational"},
|
| 386 |
+
{"text": "Happiness is like an orgasm: if you think about it too much, it goes away.", "author": "Tim Minchin", "style": "observational"},
|
| 387 |
+
{"text": "Everything organic and natural is good, ignoring the fact that organic natural substances include arsenic, poo, and crocodiles.", "author": "Tim Minchin", "style": "observational"},
|
| 388 |
+
{"text": "Science adjusts its views based on what's observed. Faith is the denial of observation so that belief can be preserved.", "author": "Tim Minchin", "style": "observational"},
|
| 389 |
+
{"text": "Life is meaningless. And I love every second of it.", "author": "Tim Minchin", "style": "observational"},
|
| 390 |
+
|
| 391 |
+
# --- Hannah Gadsby ---
|
| 392 |
+
{"text": "I identify as tired.", "author": "Hannah Gadsby", "style": "deadpan"},
|
| 393 |
+
{"text": "I built a career out of self-deprecating humor, and I don't want to do that anymore. Do you understand what self-deprecation means when it comes from somebody who already exists in the margins? It's not humility. It's humiliation.", "author": "Hannah Gadsby", "style": "observational"},
|
| 394 |
+
{"text": "I hate TED talks. Everybody's got a TED talk. Even me, and I have nothing to say.", "author": "Hannah Gadsby", "style": "self-deprecating"},
|
| 395 |
+
{"text": "There is nothing stronger than a broken woman who has rebuilt herself.", "author": "Hannah Gadsby", "style": "observational"},
|
| 396 |
+
{"text": "People think I stopped telling jokes. I didn't stop. I just stopped telling the ones that let you off the hook.", "author": "Hannah Gadsby", "style": "observational"},
|
| 397 |
+
|
| 398 |
+
# --- Carl Barron ---
|
| 399 |
+
{"text": "I was asked by a waitress, with a straight face she said, 'Would you care for an orange juice?' I said, 'If it needed me.'", "author": "Carl Barron", "style": "deadpan"},
|
| 400 |
+
{"text": "I know I'm getting old because I'm starting to get excited about tea.", "author": "Carl Barron", "style": "observational"},
|
| 401 |
+
{"text": "My uncle gave me a walkie-talkie for my birthday. He said, 'Don't call me, I'll call you.'", "author": "Carl Barron", "style": "one-liner"},
|
| 402 |
+
{"text": "I went to the doctor the other day and he said, 'Don't eat anything fatty.' I said, 'What, like chips and burgers?' He said, 'No fatty, don't eat anything.'", "author": "Carl Barron", "style": "wordplay"},
|
| 403 |
+
{"text": "My dad used to say, 'Always fight fire with fire.' Which is probably why he got thrown out of the fire brigade.", "author": "Carl Barron", "style": "one-liner"},
|
| 404 |
+
]
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
# Verify and report stats
|
| 408 |
+
if __name__ == "__main__":
|
| 409 |
+
from collections import Counter
|
| 410 |
+
|
| 411 |
+
print(f"Total jokes: {len(JOKES)}")
|
| 412 |
+
print()
|
| 413 |
+
|
| 414 |
+
# Count by author
|
| 415 |
+
author_counts = Counter(j["author"].split(" (")[0] for j in JOKES)
|
| 416 |
+
print("Jokes per comedian:")
|
| 417 |
+
for author, count in sorted(author_counts.items(), key=lambda x: -x[1]):
|
| 418 |
+
print(f" {author}: {count}")
|
| 419 |
+
|
| 420 |
+
print()
|
| 421 |
+
|
| 422 |
+
# Count by style
|
| 423 |
+
style_counts = Counter(j["style"] for j in JOKES)
|
| 424 |
+
print("Jokes per style:")
|
| 425 |
+
for style, count in sorted(style_counts.items(), key=lambda x: -x[1]):
|
| 426 |
+
print(f" {style}: {count}")
|
| 427 |
+
|
| 428 |
+
print()
|
| 429 |
+
|
| 430 |
+
# Check for length violations
|
| 431 |
+
long_jokes = [(j["author"], len(j["text"]), j["text"][:60]) for j in JOKES if len(j["text"]) > 500]
|
| 432 |
+
if long_jokes:
|
| 433 |
+
print(f"WARNING: {len(long_jokes)} jokes exceed 500 characters:")
|
| 434 |
+
for author, length, preview in long_jokes:
|
| 435 |
+
print(f" {author} ({length} chars): {preview}...")
|
| 436 |
+
else:
|
| 437 |
+
print("All jokes are under 500 characters.")
|
| 438 |
+
|
| 439 |
+
under_280 = sum(1 for j in JOKES if len(j["text"]) <= 280)
|
| 440 |
+
print(f"Jokes under 280 characters (tweet-length): {under_280}/{len(JOKES)} ({under_280*100//len(JOKES)}%)")
|
requirements.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Web dashboard
|
| 2 |
+
flask>=3.0
|
| 3 |
+
gunicorn>=21.2
|
| 4 |
+
|
| 5 |
+
# CLI (imported by scraper package)
|
| 6 |
+
click>=8.0
|
| 7 |
+
|
| 8 |
+
# HTTP requests
|
| 9 |
+
requests>=2.31
|
| 10 |
+
|
| 11 |
+
# Utilities
|
| 12 |
+
python-dotenv>=1.0
|
| 13 |
+
|
| 14 |
+
# Scraper platform deps (needed for imports)
|
| 15 |
+
atproto>=0.0.46
|
| 16 |
+
tweepy>=4.14
|
| 17 |
+
praw>=7.7
|
| 18 |
+
google-api-python-client>=2.100
|
| 19 |
+
youtube-transcript-api>=0.6
|
| 20 |
+
feedparser>=6.0
|
scraper/__init__.py
ADDED
|
File without changes
|
scraper/__main__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from scraper.cli import cli
|
| 2 |
+
|
| 3 |
+
if __name__ == "__main__":
|
| 4 |
+
cli()
|
scraper/cli.py
ADDED
|
@@ -0,0 +1,427 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import click
|
| 2 |
+
import logging
|
| 3 |
+
from scraper import db
|
| 4 |
+
from scraper.config import TOPICS, STYLES, SOURCE_CATEGORIES
|
| 5 |
+
from scraper.scoring import rescore_source
|
| 6 |
+
from scraper.utils import truncate
|
| 7 |
+
|
| 8 |
+
logging.basicConfig(
|
| 9 |
+
level=logging.INFO,
|
| 10 |
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
| 11 |
+
datefmt="%H:%M:%S",
|
| 12 |
+
)
|
| 13 |
+
logger = logging.getLogger("joke-corpus")
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@click.group()
|
| 17 |
+
def cli():
|
| 18 |
+
"""Joke corpus — scrape, score, and curate comedy."""
|
| 19 |
+
db.init_db()
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# --- Scrape commands ---
|
| 23 |
+
|
| 24 |
+
@cli.command()
|
| 25 |
+
@click.option("--source", "platform", default=None, help="Scrape a single platform")
|
| 26 |
+
@click.option("--backfill", is_flag=True, help="Collect historical posts")
|
| 27 |
+
def scrape(platform, backfill):
|
| 28 |
+
"""Scrape all configured sources for new content."""
|
| 29 |
+
sources = db.list_sources(platform)
|
| 30 |
+
if not sources:
|
| 31 |
+
click.echo("No sources configured. Use 'add' to add some.")
|
| 32 |
+
return
|
| 33 |
+
|
| 34 |
+
for source in sources:
|
| 35 |
+
plat = source["platform"]
|
| 36 |
+
handle = source["handle_or_url"]
|
| 37 |
+
click.echo(f"Scraping {plat}: {handle}...")
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
scraper = _get_scraper(plat)
|
| 41 |
+
if scraper is None:
|
| 42 |
+
click.echo(f" No scraper implemented for '{plat}' yet. Skipping.")
|
| 43 |
+
continue
|
| 44 |
+
|
| 45 |
+
found, new = scraper.scrape_source(dict(source), backfill=backfill)
|
| 46 |
+
db.update_source_last_scraped(source["id"])
|
| 47 |
+
db.log_scrape(source["id"], found, new, "success")
|
| 48 |
+
rescore_source(source["id"])
|
| 49 |
+
click.echo(f" Found {found}, {new} new entries.")
|
| 50 |
+
|
| 51 |
+
except Exception as e:
|
| 52 |
+
db.log_scrape(source["id"], 0, 0, f"error: {e}")
|
| 53 |
+
logger.error(f" Error scraping {handle}: {e}")
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@cli.command("import-datasets")
|
| 57 |
+
def import_datasets():
|
| 58 |
+
"""One-time import of pre-built comedy datasets."""
|
| 59 |
+
from scraper.static import import_datasets as imp
|
| 60 |
+
imp.run()
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
# --- Browse commands ---
|
| 64 |
+
|
| 65 |
+
@cli.command()
|
| 66 |
+
@click.option("--topic", type=click.Choice(TOPICS, case_sensitive=False), default=None)
|
| 67 |
+
@click.option("--style", default=None, help="Comma-separated style names")
|
| 68 |
+
@click.option("--tier", type=click.Choice(["S", "A", "B", "C"]), default=None)
|
| 69 |
+
@click.option("--platform", default=None)
|
| 70 |
+
@click.option("--days", default=7, help="Look back N days (0 = all time)")
|
| 71 |
+
@click.option("--limit", default=20)
|
| 72 |
+
def top(topic, style, tier, platform, days, limit):
|
| 73 |
+
"""Show top-scored entries."""
|
| 74 |
+
style_val = style.split(",")[0] if style else None
|
| 75 |
+
days_val = days if days > 0 else None
|
| 76 |
+
entries = db.top_entries(
|
| 77 |
+
topic=topic, style=style_val, tier=tier,
|
| 78 |
+
platform=platform, days=days_val, limit=limit,
|
| 79 |
+
)
|
| 80 |
+
_display_entries(entries)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
@cli.command()
|
| 84 |
+
@click.argument("query")
|
| 85 |
+
@click.option("--topic", type=click.Choice(TOPICS, case_sensitive=False), default=None)
|
| 86 |
+
@click.option("--style", default=None)
|
| 87 |
+
@click.option("--tier", type=click.Choice(["S", "A", "B", "C"]), default=None)
|
| 88 |
+
@click.option("--platform", default=None)
|
| 89 |
+
@click.option("--limit", default=20)
|
| 90 |
+
def search(query, topic, style, tier, platform, limit):
|
| 91 |
+
"""Full-text search the corpus."""
|
| 92 |
+
entries = db.search_entries(
|
| 93 |
+
query=query, topic=topic, style=style,
|
| 94 |
+
tier=tier, platform=platform, limit=limit,
|
| 95 |
+
)
|
| 96 |
+
_display_entries(entries)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
@cli.command()
|
| 100 |
+
@click.option("--topic", type=click.Choice(TOPICS, case_sensitive=False), default=None)
|
| 101 |
+
@click.option("--style", default=None)
|
| 102 |
+
@click.option("--tier", type=click.Choice(["S", "A", "B", "C"]), default=None)
|
| 103 |
+
def random(topic, style, tier):
|
| 104 |
+
"""Show a random entry (inspiration mode)."""
|
| 105 |
+
entry = db.random_entry(topic=topic, style=style, tier=tier)
|
| 106 |
+
if entry:
|
| 107 |
+
_display_entry_detail(entry)
|
| 108 |
+
else:
|
| 109 |
+
click.echo("No entries match those filters.")
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# --- Classify command ---
|
| 113 |
+
|
| 114 |
+
@cli.command()
|
| 115 |
+
@click.argument("entry_id", type=int)
|
| 116 |
+
@click.option("--topic", type=click.Choice(TOPICS, case_sensitive=False), default=None)
|
| 117 |
+
@click.option("--style", default=None, help="Comma-separated style names")
|
| 118 |
+
def classify(entry_id, topic, style):
|
| 119 |
+
"""Set topic and/or styles for an entry."""
|
| 120 |
+
style_names = None
|
| 121 |
+
if style:
|
| 122 |
+
style_names = [s.strip() for s in style.split(",")]
|
| 123 |
+
invalid = [s for s in style_names if s not in STYLES]
|
| 124 |
+
if invalid:
|
| 125 |
+
click.echo(f"Unknown styles: {', '.join(invalid)}")
|
| 126 |
+
click.echo(f"Valid styles: {', '.join(STYLES)}")
|
| 127 |
+
return
|
| 128 |
+
|
| 129 |
+
db.classify_entry(entry_id, topic=topic, style_names=style_names)
|
| 130 |
+
click.echo(f"Entry {entry_id} classified.")
|
| 131 |
+
if topic:
|
| 132 |
+
click.echo(f" Topic: {topic}")
|
| 133 |
+
if style_names:
|
| 134 |
+
click.echo(f" Styles: {', '.join(style_names)}")
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
# --- Curate commands ---
|
| 138 |
+
|
| 139 |
+
@cli.command()
|
| 140 |
+
@click.argument("entry_id", type=int)
|
| 141 |
+
@click.argument("rating", type=click.IntRange(1, 5))
|
| 142 |
+
def rate(entry_id, rating):
|
| 143 |
+
"""Rate an entry 1-5."""
|
| 144 |
+
db.rate_entry(entry_id, rating)
|
| 145 |
+
click.echo(f"Entry {entry_id} rated {rating}/5.")
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
@cli.command()
|
| 149 |
+
@click.argument("entry_id", type=int)
|
| 150 |
+
def fav(entry_id):
|
| 151 |
+
"""Toggle favourite on an entry."""
|
| 152 |
+
result = db.favourite_entry(entry_id)
|
| 153 |
+
if result is not None:
|
| 154 |
+
state = "favourited" if result else "unfavourited"
|
| 155 |
+
click.echo(f"Entry {entry_id} {state}.")
|
| 156 |
+
else:
|
| 157 |
+
click.echo(f"Entry {entry_id} not found.")
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
@cli.command()
|
| 161 |
+
@click.argument("entry_id", type=int)
|
| 162 |
+
@click.argument("text")
|
| 163 |
+
def notes(entry_id, text):
|
| 164 |
+
"""Add notes to an entry."""
|
| 165 |
+
db.add_notes(entry_id, text)
|
| 166 |
+
click.echo(f"Notes added to entry {entry_id}.")
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
@cli.command()
|
| 170 |
+
@click.argument("entry_id", type=int)
|
| 171 |
+
def show(entry_id):
|
| 172 |
+
"""Show full detail for an entry."""
|
| 173 |
+
entry, styles = db.get_entry(entry_id)
|
| 174 |
+
if entry:
|
| 175 |
+
_display_entry_full(entry, styles)
|
| 176 |
+
else:
|
| 177 |
+
click.echo(f"Entry {entry_id} not found.")
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
# --- Source management ---
|
| 181 |
+
|
| 182 |
+
@cli.command("add")
|
| 183 |
+
@click.argument("category", type=click.Choice(SOURCE_CATEGORIES))
|
| 184 |
+
@click.argument("platform")
|
| 185 |
+
@click.argument("handle_or_url")
|
| 186 |
+
@click.option("--topic", type=click.Choice(TOPICS, case_sensitive=False), default=None,
|
| 187 |
+
help="Default topic for entries from this source")
|
| 188 |
+
@click.option("--name", default=None, help="Display name")
|
| 189 |
+
@click.argument("notes_text", required=False, default=None)
|
| 190 |
+
def add_source(category, platform, handle_or_url, topic, name, notes_text):
|
| 191 |
+
"""Add a source to track."""
|
| 192 |
+
try:
|
| 193 |
+
source_id = db.add_source(
|
| 194 |
+
category=category,
|
| 195 |
+
platform=platform,
|
| 196 |
+
handle_or_url=handle_or_url,
|
| 197 |
+
display_name=name,
|
| 198 |
+
default_topic=topic,
|
| 199 |
+
notes=notes_text,
|
| 200 |
+
)
|
| 201 |
+
click.echo(f"Source added (id={source_id}): {platform} / {handle_or_url}")
|
| 202 |
+
if topic:
|
| 203 |
+
click.echo(f" Default topic: {topic}")
|
| 204 |
+
except Exception as e:
|
| 205 |
+
click.echo(f"Error: {e}")
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
@cli.command("list")
|
| 209 |
+
@click.option("--platform", default=None)
|
| 210 |
+
def list_sources(platform):
|
| 211 |
+
"""List all tracked sources."""
|
| 212 |
+
sources = db.list_sources(platform)
|
| 213 |
+
if not sources:
|
| 214 |
+
click.echo("No sources configured.")
|
| 215 |
+
return
|
| 216 |
+
|
| 217 |
+
for s in sources:
|
| 218 |
+
topic_str = f" [{s['default_topic']}]" if s["default_topic"] else ""
|
| 219 |
+
last = s["last_scraped"] or "never"
|
| 220 |
+
click.echo(
|
| 221 |
+
f" {s['id']:>3} {s['platform']:<12} {s['handle_or_url']:<30}"
|
| 222 |
+
f"{topic_str:<15} last: {last}"
|
| 223 |
+
)
|
| 224 |
+
if s["notes"]:
|
| 225 |
+
click.echo(f" {s['notes']}")
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
@cli.command("remove")
|
| 229 |
+
@click.argument("source_id", type=int)
|
| 230 |
+
@click.confirmation_option(prompt="Delete this source and all its entries?")
|
| 231 |
+
def remove_source(source_id):
|
| 232 |
+
"""Remove a source and all its entries."""
|
| 233 |
+
source = db.get_source(source_id)
|
| 234 |
+
if source:
|
| 235 |
+
db.remove_source(source_id)
|
| 236 |
+
click.echo(f"Removed: {source['platform']} / {source['handle_or_url']}")
|
| 237 |
+
else:
|
| 238 |
+
click.echo(f"Source {source_id} not found.")
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
# --- Stats ---
|
| 242 |
+
|
| 243 |
+
@cli.command()
|
| 244 |
+
def stats():
|
| 245 |
+
"""Show corpus statistics."""
|
| 246 |
+
s = db.get_stats()
|
| 247 |
+
click.echo(f"\nCorpus: {s['total_entries']} entries from {s['total_sources']} sources")
|
| 248 |
+
click.echo(f"Favourites: {s['favourites']}")
|
| 249 |
+
|
| 250 |
+
if s["by_tier"]:
|
| 251 |
+
click.echo("\nBy tier:")
|
| 252 |
+
for row in s["by_tier"]:
|
| 253 |
+
click.echo(f" {row['quality_tier']}: {row['n']}")
|
| 254 |
+
|
| 255 |
+
if s["by_platform"]:
|
| 256 |
+
click.echo("\nBy platform:")
|
| 257 |
+
for row in s["by_platform"]:
|
| 258 |
+
click.echo(f" {row['platform']:<15} {row['n']}")
|
| 259 |
+
|
| 260 |
+
if s["by_topic"]:
|
| 261 |
+
click.echo("\nBy topic:")
|
| 262 |
+
for row in s["by_topic"]:
|
| 263 |
+
click.echo(f" {row['topic']:<15} {row['n']}")
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
# --- Export ---
|
| 267 |
+
|
| 268 |
+
@cli.command()
|
| 269 |
+
@click.option("--topic", type=click.Choice(TOPICS, case_sensitive=False), default=None)
|
| 270 |
+
@click.option("--style", default=None)
|
| 271 |
+
@click.option("--favourites-only", is_flag=True)
|
| 272 |
+
@click.option("--output", "-o", default=None, help="Output file (default: stdout)")
|
| 273 |
+
def export(topic, style, favourites_only, output):
|
| 274 |
+
"""Export entries to markdown."""
|
| 275 |
+
with db.get_db() as conn:
|
| 276 |
+
conditions = ["e.quality_tier IN ('S', 'A')"]
|
| 277 |
+
params = []
|
| 278 |
+
|
| 279 |
+
if favourites_only:
|
| 280 |
+
conditions = ["e.favourite = 1"]
|
| 281 |
+
|
| 282 |
+
if topic:
|
| 283 |
+
conditions.append("e.topic = ?")
|
| 284 |
+
params.append(topic)
|
| 285 |
+
|
| 286 |
+
if style:
|
| 287 |
+
conditions.append(
|
| 288 |
+
"""e.id IN (SELECT es.entry_id FROM entry_styles es
|
| 289 |
+
JOIN styles s ON s.id = es.style_id
|
| 290 |
+
WHERE s.name = ?)"""
|
| 291 |
+
)
|
| 292 |
+
params.append(style)
|
| 293 |
+
|
| 294 |
+
where = " AND ".join(conditions)
|
| 295 |
+
entries = conn.execute(
|
| 296 |
+
f"""SELECT e.*, s.display_name as source_name
|
| 297 |
+
FROM entries e
|
| 298 |
+
LEFT JOIN sources s ON e.source_id = s.id
|
| 299 |
+
WHERE {where}
|
| 300 |
+
ORDER BY e.quality_tier, e.normalised_score DESC""",
|
| 301 |
+
params,
|
| 302 |
+
).fetchall()
|
| 303 |
+
|
| 304 |
+
lines = [f"# Joke Corpus Export\n"]
|
| 305 |
+
lines.append(f"Entries: {len(entries)}\n")
|
| 306 |
+
if topic:
|
| 307 |
+
lines.append(f"Topic: {topic}\n")
|
| 308 |
+
if style:
|
| 309 |
+
lines.append(f"Style: {style}\n")
|
| 310 |
+
lines.append("---\n")
|
| 311 |
+
|
| 312 |
+
for e in entries:
|
| 313 |
+
lines.append(f"\n## [{e['quality_tier']}] {e['source_name'] or e['platform']} — {e['author'] or 'unknown'}\n")
|
| 314 |
+
if e["topic"]:
|
| 315 |
+
lines.append(f"**Topic:** {e['topic']}")
|
| 316 |
+
if e["url"]:
|
| 317 |
+
lines.append(f"**URL:** {e['url']}")
|
| 318 |
+
if e["ally_rating"]:
|
| 319 |
+
lines.append(f"**Rating:** {'★' * e['ally_rating']}")
|
| 320 |
+
lines.append(f"\n{e['text']}\n")
|
| 321 |
+
if e["ally_notes"]:
|
| 322 |
+
lines.append(f"\n> {e['ally_notes']}\n")
|
| 323 |
+
lines.append("---\n")
|
| 324 |
+
|
| 325 |
+
text = "\n".join(lines)
|
| 326 |
+
if output:
|
| 327 |
+
with open(output, "w") as f:
|
| 328 |
+
f.write(text)
|
| 329 |
+
click.echo(f"Exported {len(entries)} entries to {output}")
|
| 330 |
+
else:
|
| 331 |
+
click.echo(text)
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
# --- Display helpers ---
|
| 335 |
+
|
| 336 |
+
def _display_entries(entries):
|
| 337 |
+
if not entries:
|
| 338 |
+
click.echo("No entries found.")
|
| 339 |
+
return
|
| 340 |
+
|
| 341 |
+
for e in entries:
|
| 342 |
+
fav_str = " ★" if e["favourite"] else ""
|
| 343 |
+
rating_str = f" [{e['ally_rating']}/5]" if e["ally_rating"] else ""
|
| 344 |
+
topic_str = f" [{e['topic']}]" if e["topic"] else ""
|
| 345 |
+
source_str = e["source_name"] or e["platform"]
|
| 346 |
+
|
| 347 |
+
click.echo(
|
| 348 |
+
f"\n {e['id']:>5} [{e['quality_tier']}]{fav_str}{rating_str}"
|
| 349 |
+
f" {source_str}{topic_str}"
|
| 350 |
+
)
|
| 351 |
+
click.echo(f" {truncate(e['text'], 200)}")
|
| 352 |
+
|
| 353 |
+
click.echo(f"\n ({len(entries)} entries)")
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
def _display_entry_detail(entry):
|
| 357 |
+
fav_str = " ★" if entry["favourite"] else ""
|
| 358 |
+
click.echo(f"\n [{entry['quality_tier']}]{fav_str} {entry['platform']} id={entry['id']}")
|
| 359 |
+
if entry["topic"]:
|
| 360 |
+
click.echo(f" Topic: {entry['topic']}")
|
| 361 |
+
click.echo(f"\n {entry['text']}\n")
|
| 362 |
+
if entry["url"]:
|
| 363 |
+
click.echo(f" {entry['url']}")
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
def _display_entry_full(entry, styles):
|
| 367 |
+
fav_str = " ★" if entry["favourite"] else ""
|
| 368 |
+
click.echo(f"\n [{entry['quality_tier']}]{fav_str} {entry['platform']} id={entry['id']}")
|
| 369 |
+
if entry["topic"]:
|
| 370 |
+
click.echo(f" Topic: {entry['topic']}")
|
| 371 |
+
if styles:
|
| 372 |
+
click.echo(f" Styles: {', '.join(styles)}")
|
| 373 |
+
if entry["ally_rating"]:
|
| 374 |
+
click.echo(f" Rating: {entry['ally_rating']}/5")
|
| 375 |
+
click.echo(f"\n {entry['text']}\n")
|
| 376 |
+
if entry["url"]:
|
| 377 |
+
click.echo(f" URL: {entry['url']}")
|
| 378 |
+
if entry["ally_notes"]:
|
| 379 |
+
click.echo(f" Notes: {entry['ally_notes']}")
|
| 380 |
+
|
| 381 |
+
# Engagement
|
| 382 |
+
has_engagement = any(
|
| 383 |
+
entry.get(k) for k in ["likes", "shares", "saves", "quotes", "replies"]
|
| 384 |
+
)
|
| 385 |
+
if has_engagement:
|
| 386 |
+
click.echo(
|
| 387 |
+
f" Engagement: {entry.get('likes', 0)} likes, "
|
| 388 |
+
f"{entry.get('shares', 0)} shares, "
|
| 389 |
+
f"{entry.get('saves', 0)} saves, "
|
| 390 |
+
f"{entry.get('quotes', 0)} quotes, "
|
| 391 |
+
f"{entry.get('replies', 0)} replies"
|
| 392 |
+
)
|
| 393 |
+
click.echo(
|
| 394 |
+
f" Score: raw={entry['raw_score']:.0f} "
|
| 395 |
+
f"norm={entry['normalised_score']:.1f}"
|
| 396 |
+
)
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
# --- Scraper registry ---
|
| 400 |
+
|
| 401 |
+
def _get_scraper(platform):
|
| 402 |
+
"""Return the scraper module for a platform, or None."""
|
| 403 |
+
try:
|
| 404 |
+
if platform == "bluesky":
|
| 405 |
+
from scraper.platforms import bluesky
|
| 406 |
+
return bluesky
|
| 407 |
+
elif platform == "x_twitter":
|
| 408 |
+
from scraper.platforms import x_twitter
|
| 409 |
+
return x_twitter
|
| 410 |
+
elif platform == "reddit":
|
| 411 |
+
from scraper.platforms import reddit
|
| 412 |
+
return reddit
|
| 413 |
+
elif platform == "youtube":
|
| 414 |
+
from scraper.platforms import youtube
|
| 415 |
+
return youtube
|
| 416 |
+
elif platform == "rss":
|
| 417 |
+
logger.info("RSS scraping disabled (no engagement data)")
|
| 418 |
+
return None
|
| 419 |
+
elif platform == "guardian":
|
| 420 |
+
from scraper.platforms import guardian
|
| 421 |
+
return guardian
|
| 422 |
+
elif platform == "instagram":
|
| 423 |
+
from scraper.platforms import instagram
|
| 424 |
+
return instagram
|
| 425 |
+
except ImportError:
|
| 426 |
+
pass
|
| 427 |
+
return None
|
scraper/config.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
# Load .env from project root
|
| 6 |
+
PROJECT_ROOT = Path(__file__).parent.parent
|
| 7 |
+
load_dotenv(PROJECT_ROOT / ".env")
|
| 8 |
+
|
| 9 |
+
DB_PATH = PROJECT_ROOT / "corpus.db"
|
| 10 |
+
|
| 11 |
+
# Valid taxonomy values
|
| 12 |
+
TOPICS = [
|
| 13 |
+
"left_wing",
|
| 14 |
+
"right_wing",
|
| 15 |
+
"apolitical",
|
| 16 |
+
"health",
|
| 17 |
+
"science",
|
| 18 |
+
"culture",
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
STYLES = [
|
| 22 |
+
"deadpan",
|
| 23 |
+
"absurdist",
|
| 24 |
+
"satirical",
|
| 25 |
+
"observational",
|
| 26 |
+
"dark",
|
| 27 |
+
"warm",
|
| 28 |
+
"self_deprecating",
|
| 29 |
+
"roast",
|
| 30 |
+
"storytelling",
|
| 31 |
+
"one_liner",
|
| 32 |
+
"wordplay",
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
SOURCE_CATEGORIES = [
|
| 36 |
+
"social",
|
| 37 |
+
"journalism",
|
| 38 |
+
"transcript",
|
| 39 |
+
"podcast",
|
| 40 |
+
"dataset",
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
# Platform credentials
|
| 44 |
+
TWITTER_BEARER_TOKEN = os.getenv("TWITTER_BEARER_TOKEN", "")
|
| 45 |
+
TWITTER_API_KEY = os.getenv("TWITTER_API_KEY", "")
|
| 46 |
+
TWITTER_API_SECRET = os.getenv("TWITTER_API_SECRET", "")
|
| 47 |
+
|
| 48 |
+
REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID", "")
|
| 49 |
+
REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET", "")
|
| 50 |
+
REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT", "joke-corpus/0.1")
|
| 51 |
+
|
| 52 |
+
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY", "")
|
| 53 |
+
|
| 54 |
+
GUARDIAN_API_KEY = os.getenv("GUARDIAN_API_KEY", "")
|
| 55 |
+
|
| 56 |
+
BLUESKY_HANDLE = os.getenv("BLUESKY_HANDLE", "")
|
| 57 |
+
BLUESKY_APP_PASSWORD = os.getenv("BLUESKY_APP_PASSWORD", "")
|
| 58 |
+
|
| 59 |
+
APIFY_API_TOKEN = os.getenv("APIFY_API_TOKEN", "")
|
| 60 |
+
|
| 61 |
+
# --- Engagement thresholds (quality gates) ---
|
| 62 |
+
REDDIT_MIN_SCORE = 50
|
| 63 |
+
TWITTER_MIN_FAVES = 100
|
| 64 |
+
INSTAGRAM_MIN_LIKES_RATIO = 0.2 # 20% of account's median likes
|
| 65 |
+
INSTAGRAM_MIN_LIKES_ABSOLUTE = 50 # absolute floor
|
scraper/db.py
ADDED
|
@@ -0,0 +1,522 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
from contextlib import contextmanager
|
| 3 |
+
from datetime import datetime, timezone
|
| 4 |
+
from scraper.config import DB_PATH, STYLES
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def get_connection():
|
| 8 |
+
conn = sqlite3.connect(DB_PATH)
|
| 9 |
+
conn.row_factory = sqlite3.Row
|
| 10 |
+
conn.execute("PRAGMA journal_mode=WAL")
|
| 11 |
+
conn.execute("PRAGMA foreign_keys=ON")
|
| 12 |
+
return conn
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@contextmanager
|
| 16 |
+
def get_db():
|
| 17 |
+
conn = get_connection()
|
| 18 |
+
try:
|
| 19 |
+
yield conn
|
| 20 |
+
conn.commit()
|
| 21 |
+
finally:
|
| 22 |
+
conn.close()
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def init_db():
|
| 26 |
+
with get_db() as conn:
|
| 27 |
+
conn.executescript("""
|
| 28 |
+
CREATE TABLE IF NOT EXISTS sources (
|
| 29 |
+
id INTEGER PRIMARY KEY,
|
| 30 |
+
category TEXT NOT NULL,
|
| 31 |
+
platform TEXT NOT NULL,
|
| 32 |
+
handle_or_url TEXT NOT NULL,
|
| 33 |
+
display_name TEXT,
|
| 34 |
+
follower_count INTEGER,
|
| 35 |
+
median_engagement REAL DEFAULT 0,
|
| 36 |
+
default_quality TEXT DEFAULT 'B',
|
| 37 |
+
default_topic TEXT,
|
| 38 |
+
last_scraped TIMESTAMP,
|
| 39 |
+
notes TEXT,
|
| 40 |
+
UNIQUE(platform, handle_or_url)
|
| 41 |
+
);
|
| 42 |
+
|
| 43 |
+
CREATE TABLE IF NOT EXISTS entries (
|
| 44 |
+
id INTEGER PRIMARY KEY,
|
| 45 |
+
source_id INTEGER REFERENCES sources(id),
|
| 46 |
+
platform TEXT NOT NULL,
|
| 47 |
+
platform_entry_id TEXT,
|
| 48 |
+
url TEXT,
|
| 49 |
+
text TEXT NOT NULL,
|
| 50 |
+
author TEXT,
|
| 51 |
+
content_type TEXT DEFAULT 'post',
|
| 52 |
+
posted_at TIMESTAMP,
|
| 53 |
+
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 54 |
+
topic TEXT,
|
| 55 |
+
likes INTEGER,
|
| 56 |
+
shares INTEGER,
|
| 57 |
+
saves INTEGER,
|
| 58 |
+
quotes INTEGER,
|
| 59 |
+
replies INTEGER,
|
| 60 |
+
raw_score REAL DEFAULT 0,
|
| 61 |
+
normalised_score REAL DEFAULT 0,
|
| 62 |
+
quality_tier TEXT DEFAULT 'C',
|
| 63 |
+
ally_rating INTEGER,
|
| 64 |
+
ally_notes TEXT,
|
| 65 |
+
favourite BOOLEAN DEFAULT 0,
|
| 66 |
+
UNIQUE(platform, platform_entry_id)
|
| 67 |
+
);
|
| 68 |
+
|
| 69 |
+
CREATE TABLE IF NOT EXISTS styles (
|
| 70 |
+
id INTEGER PRIMARY KEY,
|
| 71 |
+
name TEXT UNIQUE NOT NULL
|
| 72 |
+
);
|
| 73 |
+
|
| 74 |
+
CREATE TABLE IF NOT EXISTS entry_styles (
|
| 75 |
+
entry_id INTEGER REFERENCES entries(id) ON DELETE CASCADE,
|
| 76 |
+
style_id INTEGER REFERENCES styles(id),
|
| 77 |
+
PRIMARY KEY (entry_id, style_id)
|
| 78 |
+
);
|
| 79 |
+
|
| 80 |
+
CREATE TABLE IF NOT EXISTS scrape_log (
|
| 81 |
+
id INTEGER PRIMARY KEY,
|
| 82 |
+
source_id INTEGER REFERENCES sources(id),
|
| 83 |
+
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 84 |
+
entries_found INTEGER,
|
| 85 |
+
new_entries INTEGER,
|
| 86 |
+
status TEXT
|
| 87 |
+
);
|
| 88 |
+
""")
|
| 89 |
+
|
| 90 |
+
# World Cup tables
|
| 91 |
+
conn.executescript("""
|
| 92 |
+
CREATE TABLE IF NOT EXISTS tournaments (
|
| 93 |
+
id INTEGER PRIMARY KEY,
|
| 94 |
+
name TEXT NOT NULL,
|
| 95 |
+
category TEXT,
|
| 96 |
+
pool_filter TEXT DEFAULT 'mixed',
|
| 97 |
+
status TEXT DEFAULT 'open',
|
| 98 |
+
total_rounds INTEGER DEFAULT 6,
|
| 99 |
+
current_round INTEGER DEFAULT 1,
|
| 100 |
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 101 |
+
started_at TIMESTAMP,
|
| 102 |
+
completed_at TIMESTAMP,
|
| 103 |
+
winner_entry_id INTEGER REFERENCES entries(id)
|
| 104 |
+
);
|
| 105 |
+
|
| 106 |
+
CREATE TABLE IF NOT EXISTS matchups (
|
| 107 |
+
id INTEGER PRIMARY KEY,
|
| 108 |
+
tournament_id INTEGER NOT NULL REFERENCES tournaments(id),
|
| 109 |
+
round_number INTEGER NOT NULL,
|
| 110 |
+
bracket_position INTEGER NOT NULL,
|
| 111 |
+
entry_a_id INTEGER NOT NULL REFERENCES entries(id),
|
| 112 |
+
entry_b_id INTEGER NOT NULL REFERENCES entries(id),
|
| 113 |
+
votes_a INTEGER DEFAULT 0,
|
| 114 |
+
votes_b INTEGER DEFAULT 0,
|
| 115 |
+
winner_id INTEGER REFERENCES entries(id),
|
| 116 |
+
status TEXT DEFAULT 'active',
|
| 117 |
+
opened_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 118 |
+
closed_at TIMESTAMP
|
| 119 |
+
);
|
| 120 |
+
|
| 121 |
+
CREATE TABLE IF NOT EXISTS votes (
|
| 122 |
+
id INTEGER PRIMARY KEY,
|
| 123 |
+
matchup_id INTEGER NOT NULL REFERENCES matchups(id),
|
| 124 |
+
entry_id INTEGER NOT NULL REFERENCES entries(id),
|
| 125 |
+
voter_token TEXT NOT NULL,
|
| 126 |
+
voter_ip TEXT,
|
| 127 |
+
voted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 128 |
+
UNIQUE(matchup_id, voter_token)
|
| 129 |
+
);
|
| 130 |
+
|
| 131 |
+
CREATE INDEX IF NOT EXISTS idx_matchups_tournament
|
| 132 |
+
ON matchups(tournament_id, round_number);
|
| 133 |
+
CREATE INDEX IF NOT EXISTS idx_matchups_status
|
| 134 |
+
ON matchups(status);
|
| 135 |
+
CREATE INDEX IF NOT EXISTS idx_votes_matchup
|
| 136 |
+
ON votes(matchup_id);
|
| 137 |
+
CREATE INDEX IF NOT EXISTS idx_votes_voter
|
| 138 |
+
ON votes(voter_token);
|
| 139 |
+
""")
|
| 140 |
+
|
| 141 |
+
# Add elo_rating and image_url columns if missing
|
| 142 |
+
for col, default in [("elo_rating", "1200.0"), ("image_url", "NULL")]:
|
| 143 |
+
try:
|
| 144 |
+
conn.execute(f"ALTER TABLE entries ADD COLUMN {col} REAL DEFAULT {default}"
|
| 145 |
+
if col == "elo_rating" else
|
| 146 |
+
f"ALTER TABLE entries ADD COLUMN {col} TEXT")
|
| 147 |
+
except Exception:
|
| 148 |
+
pass # Column already exists
|
| 149 |
+
|
| 150 |
+
# Create FTS table if it doesn't exist
|
| 151 |
+
tables = [r[0] for r in conn.execute(
|
| 152 |
+
"SELECT name FROM sqlite_master WHERE type='table'"
|
| 153 |
+
).fetchall()]
|
| 154 |
+
if "entries_fts" not in tables:
|
| 155 |
+
conn.execute("""
|
| 156 |
+
CREATE VIRTUAL TABLE entries_fts USING fts5(
|
| 157 |
+
text, author, topic,
|
| 158 |
+
content=entries, content_rowid=id
|
| 159 |
+
)
|
| 160 |
+
""")
|
| 161 |
+
|
| 162 |
+
# Pre-populate styles
|
| 163 |
+
for style in STYLES:
|
| 164 |
+
conn.execute(
|
| 165 |
+
"INSERT OR IGNORE INTO styles (name) VALUES (?)", (style,)
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
# --- Source CRUD ---
|
| 170 |
+
|
| 171 |
+
def add_source(category, platform, handle_or_url, display_name=None,
|
| 172 |
+
default_topic=None, notes=None):
|
| 173 |
+
with get_db() as conn:
|
| 174 |
+
conn.execute(
|
| 175 |
+
"""INSERT INTO sources (category, platform, handle_or_url,
|
| 176 |
+
display_name, default_topic, notes)
|
| 177 |
+
VALUES (?, ?, ?, ?, ?, ?)""",
|
| 178 |
+
(category, platform, handle_or_url, display_name, default_topic,
|
| 179 |
+
notes),
|
| 180 |
+
)
|
| 181 |
+
return conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def list_sources(platform=None):
|
| 185 |
+
with get_db() as conn:
|
| 186 |
+
if platform:
|
| 187 |
+
return conn.execute(
|
| 188 |
+
"SELECT * FROM sources WHERE platform = ? ORDER BY id",
|
| 189 |
+
(platform,),
|
| 190 |
+
).fetchall()
|
| 191 |
+
return conn.execute(
|
| 192 |
+
"SELECT * FROM sources ORDER BY platform, id"
|
| 193 |
+
).fetchall()
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def get_source(source_id):
|
| 197 |
+
with get_db() as conn:
|
| 198 |
+
return conn.execute(
|
| 199 |
+
"SELECT * FROM sources WHERE id = ?", (source_id,)
|
| 200 |
+
).fetchone()
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def remove_source(source_id):
|
| 204 |
+
with get_db() as conn:
|
| 205 |
+
conn.execute("DELETE FROM entries WHERE source_id = ?", (source_id,))
|
| 206 |
+
conn.execute("DELETE FROM sources WHERE id = ?", (source_id,))
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def update_source_last_scraped(source_id):
|
| 210 |
+
with get_db() as conn:
|
| 211 |
+
conn.execute(
|
| 212 |
+
"UPDATE sources SET last_scraped = ? WHERE id = ?",
|
| 213 |
+
(datetime.now(timezone.utc).isoformat(), source_id),
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def update_source_median(source_id):
|
| 218 |
+
with get_db() as conn:
|
| 219 |
+
row = conn.execute(
|
| 220 |
+
"""SELECT raw_score FROM entries
|
| 221 |
+
WHERE source_id = ? AND raw_score > 0
|
| 222 |
+
ORDER BY raw_score""",
|
| 223 |
+
(source_id,),
|
| 224 |
+
).fetchall()
|
| 225 |
+
if row:
|
| 226 |
+
mid = len(row) // 2
|
| 227 |
+
median = row[mid]["raw_score"]
|
| 228 |
+
conn.execute(
|
| 229 |
+
"UPDATE sources SET median_engagement = ? WHERE id = ?",
|
| 230 |
+
(median, source_id),
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
# --- Entry CRUD ---
|
| 235 |
+
|
| 236 |
+
def insert_entry(source_id, platform, text, platform_entry_id=None,
|
| 237 |
+
url=None, author=None, content_type="post",
|
| 238 |
+
posted_at=None, topic=None, likes=None, shares=None,
|
| 239 |
+
saves=None, quotes=None, replies=None, image_url=None):
|
| 240 |
+
with get_db() as conn:
|
| 241 |
+
try:
|
| 242 |
+
conn.execute(
|
| 243 |
+
"""INSERT INTO entries
|
| 244 |
+
(source_id, platform, platform_entry_id, url, text,
|
| 245 |
+
author, content_type, posted_at, topic,
|
| 246 |
+
likes, shares, saves, quotes, replies, image_url)
|
| 247 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
| 248 |
+
(source_id, platform, platform_entry_id, url, text,
|
| 249 |
+
author, content_type, posted_at, topic,
|
| 250 |
+
likes, shares, saves, quotes, replies, image_url),
|
| 251 |
+
)
|
| 252 |
+
entry_id = conn.execute(
|
| 253 |
+
"SELECT last_insert_rowid()"
|
| 254 |
+
).fetchone()[0]
|
| 255 |
+
|
| 256 |
+
# Sync FTS
|
| 257 |
+
conn.execute(
|
| 258 |
+
"""INSERT INTO entries_fts(rowid, text, author, topic)
|
| 259 |
+
VALUES (?, ?, ?, ?)""",
|
| 260 |
+
(entry_id, text, author, topic),
|
| 261 |
+
)
|
| 262 |
+
return entry_id
|
| 263 |
+
except sqlite3.IntegrityError:
|
| 264 |
+
# Duplicate — already have this entry
|
| 265 |
+
return None
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def get_entry(entry_id):
|
| 269 |
+
with get_db() as conn:
|
| 270 |
+
entry = conn.execute(
|
| 271 |
+
"SELECT * FROM entries WHERE id = ?", (entry_id,)
|
| 272 |
+
).fetchone()
|
| 273 |
+
if entry:
|
| 274 |
+
styles = conn.execute(
|
| 275 |
+
"""SELECT s.name FROM styles s
|
| 276 |
+
JOIN entry_styles es ON s.id = es.style_id
|
| 277 |
+
WHERE es.entry_id = ?""",
|
| 278 |
+
(entry_id,),
|
| 279 |
+
).fetchall()
|
| 280 |
+
return dict(entry), [r["name"] for r in styles]
|
| 281 |
+
return None, []
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def search_entries(query, topic=None, style=None, tier=None, platform=None,
|
| 285 |
+
limit=20):
|
| 286 |
+
with get_db() as conn:
|
| 287 |
+
conditions = []
|
| 288 |
+
params = []
|
| 289 |
+
|
| 290 |
+
if query:
|
| 291 |
+
conditions.append(
|
| 292 |
+
"e.id IN (SELECT rowid FROM entries_fts WHERE entries_fts MATCH ?)"
|
| 293 |
+
)
|
| 294 |
+
params.append(query)
|
| 295 |
+
|
| 296 |
+
if topic:
|
| 297 |
+
conditions.append("e.topic = ?")
|
| 298 |
+
params.append(topic)
|
| 299 |
+
|
| 300 |
+
if style:
|
| 301 |
+
conditions.append(
|
| 302 |
+
"""e.id IN (SELECT es.entry_id FROM entry_styles es
|
| 303 |
+
JOIN styles s ON s.id = es.style_id
|
| 304 |
+
WHERE s.name = ?)"""
|
| 305 |
+
)
|
| 306 |
+
params.append(style)
|
| 307 |
+
|
| 308 |
+
if tier:
|
| 309 |
+
conditions.append("e.quality_tier = ?")
|
| 310 |
+
params.append(tier)
|
| 311 |
+
|
| 312 |
+
if platform:
|
| 313 |
+
conditions.append("e.platform = ?")
|
| 314 |
+
params.append(platform)
|
| 315 |
+
|
| 316 |
+
where = " AND ".join(conditions) if conditions else "1=1"
|
| 317 |
+
params.append(limit)
|
| 318 |
+
|
| 319 |
+
return conn.execute(
|
| 320 |
+
f"""SELECT e.*, s.display_name as source_name
|
| 321 |
+
FROM entries e
|
| 322 |
+
LEFT JOIN sources s ON e.source_id = s.id
|
| 323 |
+
WHERE {where}
|
| 324 |
+
ORDER BY e.normalised_score DESC, e.raw_score DESC
|
| 325 |
+
LIMIT ?""",
|
| 326 |
+
params,
|
| 327 |
+
).fetchall()
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
def top_entries(topic=None, style=None, tier=None, platform=None,
|
| 331 |
+
days=7, limit=20):
|
| 332 |
+
with get_db() as conn:
|
| 333 |
+
conditions = []
|
| 334 |
+
params = []
|
| 335 |
+
|
| 336 |
+
if days:
|
| 337 |
+
conditions.append("e.scraped_at >= datetime('now', ?)")
|
| 338 |
+
params.append(f"-{days} days")
|
| 339 |
+
|
| 340 |
+
if topic:
|
| 341 |
+
conditions.append("e.topic = ?")
|
| 342 |
+
params.append(topic)
|
| 343 |
+
|
| 344 |
+
if style:
|
| 345 |
+
conditions.append(
|
| 346 |
+
"""e.id IN (SELECT es.entry_id FROM entry_styles es
|
| 347 |
+
JOIN styles s ON s.id = es.style_id
|
| 348 |
+
WHERE s.name = ?)"""
|
| 349 |
+
)
|
| 350 |
+
params.append(style)
|
| 351 |
+
|
| 352 |
+
if tier:
|
| 353 |
+
conditions.append("e.quality_tier = ?")
|
| 354 |
+
params.append(tier)
|
| 355 |
+
else:
|
| 356 |
+
conditions.append("e.quality_tier IN ('S', 'A')")
|
| 357 |
+
|
| 358 |
+
if platform:
|
| 359 |
+
conditions.append("e.platform = ?")
|
| 360 |
+
params.append(platform)
|
| 361 |
+
|
| 362 |
+
where = " AND ".join(conditions) if conditions else "1=1"
|
| 363 |
+
params.append(limit)
|
| 364 |
+
|
| 365 |
+
return conn.execute(
|
| 366 |
+
f"""SELECT e.*, s.display_name as source_name
|
| 367 |
+
FROM entries e
|
| 368 |
+
LEFT JOIN sources s ON e.source_id = s.id
|
| 369 |
+
WHERE {where}
|
| 370 |
+
ORDER BY e.normalised_score DESC, e.raw_score DESC
|
| 371 |
+
LIMIT ?""",
|
| 372 |
+
params,
|
| 373 |
+
).fetchall()
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
def random_entry(topic=None, style=None, tier=None):
|
| 377 |
+
with get_db() as conn:
|
| 378 |
+
conditions = []
|
| 379 |
+
params = []
|
| 380 |
+
|
| 381 |
+
if topic:
|
| 382 |
+
conditions.append("e.topic = ?")
|
| 383 |
+
params.append(topic)
|
| 384 |
+
|
| 385 |
+
if style:
|
| 386 |
+
conditions.append(
|
| 387 |
+
"""e.id IN (SELECT es.entry_id FROM entry_styles es
|
| 388 |
+
JOIN styles s ON s.id = es.style_id
|
| 389 |
+
WHERE s.name = ?)"""
|
| 390 |
+
)
|
| 391 |
+
params.append(style)
|
| 392 |
+
|
| 393 |
+
if tier:
|
| 394 |
+
conditions.append("e.quality_tier = ?")
|
| 395 |
+
params.append(tier)
|
| 396 |
+
else:
|
| 397 |
+
conditions.append("e.quality_tier IN ('S', 'A')")
|
| 398 |
+
|
| 399 |
+
where = " AND ".join(conditions) if conditions else "1=1"
|
| 400 |
+
|
| 401 |
+
return conn.execute(
|
| 402 |
+
f"""SELECT e.*, s.display_name as source_name
|
| 403 |
+
FROM entries e
|
| 404 |
+
LEFT JOIN sources s ON e.source_id = s.id
|
| 405 |
+
WHERE {where}
|
| 406 |
+
ORDER BY RANDOM()
|
| 407 |
+
LIMIT 1""",
|
| 408 |
+
params,
|
| 409 |
+
).fetchone()
|
| 410 |
+
|
| 411 |
+
|
| 412 |
+
# --- Curation ---
|
| 413 |
+
|
| 414 |
+
def rate_entry(entry_id, rating):
|
| 415 |
+
with get_db() as conn:
|
| 416 |
+
conn.execute(
|
| 417 |
+
"UPDATE entries SET ally_rating = ? WHERE id = ?",
|
| 418 |
+
(rating, entry_id),
|
| 419 |
+
)
|
| 420 |
+
|
| 421 |
+
|
| 422 |
+
def favourite_entry(entry_id):
|
| 423 |
+
with get_db() as conn:
|
| 424 |
+
current = conn.execute(
|
| 425 |
+
"SELECT favourite FROM entries WHERE id = ?", (entry_id,)
|
| 426 |
+
).fetchone()
|
| 427 |
+
if current:
|
| 428 |
+
new_val = 0 if current["favourite"] else 1
|
| 429 |
+
conn.execute(
|
| 430 |
+
"UPDATE entries SET favourite = ? WHERE id = ?",
|
| 431 |
+
(new_val, entry_id),
|
| 432 |
+
)
|
| 433 |
+
return new_val
|
| 434 |
+
return None
|
| 435 |
+
|
| 436 |
+
|
| 437 |
+
def add_notes(entry_id, notes):
|
| 438 |
+
with get_db() as conn:
|
| 439 |
+
conn.execute(
|
| 440 |
+
"UPDATE entries SET ally_notes = ? WHERE id = ?",
|
| 441 |
+
(notes, entry_id),
|
| 442 |
+
)
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
def classify_entry(entry_id, topic=None, style_names=None):
|
| 446 |
+
with get_db() as conn:
|
| 447 |
+
if topic:
|
| 448 |
+
conn.execute(
|
| 449 |
+
"UPDATE entries SET topic = ? WHERE id = ?",
|
| 450 |
+
(topic, entry_id),
|
| 451 |
+
)
|
| 452 |
+
# Update FTS
|
| 453 |
+
entry = conn.execute(
|
| 454 |
+
"SELECT text, author FROM entries WHERE id = ?",
|
| 455 |
+
(entry_id,),
|
| 456 |
+
).fetchone()
|
| 457 |
+
if entry:
|
| 458 |
+
conn.execute(
|
| 459 |
+
"DELETE FROM entries_fts WHERE rowid = ?", (entry_id,)
|
| 460 |
+
)
|
| 461 |
+
conn.execute(
|
| 462 |
+
"""INSERT INTO entries_fts(rowid, text, author, topic)
|
| 463 |
+
VALUES (?, ?, ?, ?)""",
|
| 464 |
+
(entry_id, entry["text"], entry["author"], topic),
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
+
if style_names:
|
| 468 |
+
# Clear existing styles
|
| 469 |
+
conn.execute(
|
| 470 |
+
"DELETE FROM entry_styles WHERE entry_id = ?", (entry_id,)
|
| 471 |
+
)
|
| 472 |
+
for name in style_names:
|
| 473 |
+
style = conn.execute(
|
| 474 |
+
"SELECT id FROM styles WHERE name = ?", (name,)
|
| 475 |
+
).fetchone()
|
| 476 |
+
if style:
|
| 477 |
+
conn.execute(
|
| 478 |
+
"INSERT INTO entry_styles (entry_id, style_id) VALUES (?, ?)",
|
| 479 |
+
(entry_id, style["id"]),
|
| 480 |
+
)
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
# --- Scrape Log ---
|
| 484 |
+
|
| 485 |
+
def log_scrape(source_id, entries_found, new_entries, status="success"):
|
| 486 |
+
with get_db() as conn:
|
| 487 |
+
conn.execute(
|
| 488 |
+
"""INSERT INTO scrape_log
|
| 489 |
+
(source_id, entries_found, new_entries, status)
|
| 490 |
+
VALUES (?, ?, ?, ?)""",
|
| 491 |
+
(source_id, entries_found, new_entries, status),
|
| 492 |
+
)
|
| 493 |
+
|
| 494 |
+
|
| 495 |
+
# --- Stats ---
|
| 496 |
+
|
| 497 |
+
def get_stats():
|
| 498 |
+
with get_db() as conn:
|
| 499 |
+
total = conn.execute("SELECT COUNT(*) as n FROM entries").fetchone()["n"]
|
| 500 |
+
by_platform = conn.execute(
|
| 501 |
+
"SELECT platform, COUNT(*) as n FROM entries GROUP BY platform ORDER BY n DESC"
|
| 502 |
+
).fetchall()
|
| 503 |
+
by_topic = conn.execute(
|
| 504 |
+
"SELECT topic, COUNT(*) as n FROM entries WHERE topic IS NOT NULL GROUP BY topic ORDER BY n DESC"
|
| 505 |
+
).fetchall()
|
| 506 |
+
by_tier = conn.execute(
|
| 507 |
+
"SELECT quality_tier, COUNT(*) as n FROM entries GROUP BY quality_tier ORDER BY quality_tier"
|
| 508 |
+
).fetchall()
|
| 509 |
+
favourites = conn.execute(
|
| 510 |
+
"SELECT COUNT(*) as n FROM entries WHERE favourite = 1"
|
| 511 |
+
).fetchone()["n"]
|
| 512 |
+
sources = conn.execute(
|
| 513 |
+
"SELECT COUNT(*) as n FROM sources"
|
| 514 |
+
).fetchone()["n"]
|
| 515 |
+
return {
|
| 516 |
+
"total_entries": total,
|
| 517 |
+
"by_platform": by_platform,
|
| 518 |
+
"by_topic": by_topic,
|
| 519 |
+
"by_tier": by_tier,
|
| 520 |
+
"favourites": favourites,
|
| 521 |
+
"total_sources": sources,
|
| 522 |
+
}
|
scraper/extract_jokes.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Extract standalone one-liner jokes from long-form articles.
|
| 2 |
+
|
| 3 |
+
Two-stage pipeline:
|
| 4 |
+
Stage 1: Regex/heuristic filtering (6,500 sentences → ~300 candidates)
|
| 5 |
+
Stage 2: Scoring candidates for standalone-ness (requires manual or AI review)
|
| 6 |
+
|
| 7 |
+
For now, Stage 1 runs automatically. Stage 2 candidates are stored with
|
| 8 |
+
content_type='extracted_candidate' for review in the web dashboard.
|
| 9 |
+
"""
|
| 10 |
+
import re
|
| 11 |
+
import logging
|
| 12 |
+
from scraper.db import get_db, insert_entry
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger("joke-corpus")
|
| 15 |
+
|
| 16 |
+
# Minimum article length to bother extracting from
|
| 17 |
+
MIN_ARTICLE_LENGTH = 500
|
| 18 |
+
|
| 19 |
+
# Sentence length bounds for one-liners
|
| 20 |
+
MIN_JOKE_LENGTH = 40
|
| 21 |
+
MAX_JOKE_LENGTH = 300
|
| 22 |
+
|
| 23 |
+
# Patterns that strongly indicate a standalone joke (Frankie Boyle style)
|
| 24 |
+
STRONG_PATTERNS = [
|
| 25 |
+
re.compile(r"looks?\s+like\s+", re.IGNORECASE), # "looks like a..."
|
| 26 |
+
re.compile(r"look\s+as\s+if\s+", re.IGNORECASE), # "look as if..."
|
| 27 |
+
re.compile(r"the\s+face\s+of\s+", re.IGNORECASE), # "the face of someone who..."
|
| 28 |
+
re.compile(r"the\s+kind\s+of\s+\w+\s+who", re.IGNORECASE), # "the kind of man who..."
|
| 29 |
+
re.compile(r"the\s+sort\s+of\s+\w+\s+who", re.IGNORECASE),
|
| 30 |
+
re.compile(r"so\s+\w+\s+that\s+", re.IGNORECASE), # "so X that..."
|
| 31 |
+
re.compile(r"like\s+a\s+.+\s+in\s+a\s+", re.IGNORECASE), # "like a X in a Y"
|
| 32 |
+
re.compile(r"like\s+watching\s+", re.IGNORECASE), # "like watching..."
|
| 33 |
+
re.compile(r"imagine\s+", re.IGNORECASE), # "Imagine..."
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
# Weak signals — help score but don't guarantee standalone-ness
|
| 37 |
+
WEAK_PATTERNS = [
|
| 38 |
+
re.compile(r"\(.*\)", re.IGNORECASE), # parenthetical aside
|
| 39 |
+
re.compile(r"—.+—", re.IGNORECASE), # em-dash aside
|
| 40 |
+
re.compile(r"perhaps\s+", re.IGNORECASE), # dry understatement
|
| 41 |
+
re.compile(r"presumably\s+", re.IGNORECASE),
|
| 42 |
+
re.compile(r"naturally\s+", re.IGNORECASE),
|
| 43 |
+
re.compile(r"of\s+course\s+", re.IGNORECASE),
|
| 44 |
+
]
|
| 45 |
+
|
| 46 |
+
# Sentences starting with these are likely context-dependent
|
| 47 |
+
SKIP_STARTS = re.compile(
|
| 48 |
+
r"^(But |And |Yet |So |Or |However,|Meanwhile,|"
|
| 49 |
+
r"He |She |It |They |This |That |These |Those |"
|
| 50 |
+
r"His |Her |Its |Their |"
|
| 51 |
+
r"The (article|piece|column|book|report|story|film|show) )",
|
| 52 |
+
re.IGNORECASE,
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# Named entities / proper nouns that suggest the subject is identified
|
| 56 |
+
HAS_NAMED_SUBJECT = re.compile(
|
| 57 |
+
r"^[A-Z][a-z]+ [A-Z]|" # First Last at start
|
| 58 |
+
r"^(Boris|Trump|Corbyn|Farage|May|Johnson|Cameron|Starmer|Sunak|"
|
| 59 |
+
r"Blair|Thatcher|Obama|Biden|Putin|Musk|Patel|Gove|Rees-Mogg|"
|
| 60 |
+
r"Cummings|Hancock|Truss|Sturgeon|Salmond|"
|
| 61 |
+
r"Britain|England|Scotland|America|Labour|Tory|Conservative|"
|
| 62 |
+
r"The (Queen|King|PM|BBC|NHS|Guardian|Sun|Mail|Times))\b",
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _split_sentences(text):
|
| 67 |
+
"""Split article text into sentences, handling common abbreviations."""
|
| 68 |
+
# Remove markdown headers
|
| 69 |
+
text = re.sub(r"^#+\s+.*$", "", text, flags=re.MULTILINE)
|
| 70 |
+
# Remove URLs
|
| 71 |
+
text = re.sub(r"https?://\S+", "", text)
|
| 72 |
+
# Protect common abbreviations
|
| 73 |
+
text = text.replace("Mr.", "Mr").replace("Mrs.", "Mrs").replace("Ms.", "Ms")
|
| 74 |
+
text = text.replace("Dr.", "Dr").replace("St.", "St").replace("Prof.", "Prof")
|
| 75 |
+
text = text.replace("etc.", "etc").replace("eg.", "eg").replace("ie.", "ie")
|
| 76 |
+
text = text.replace("...", "…")
|
| 77 |
+
|
| 78 |
+
# Split on sentence endings
|
| 79 |
+
sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z"\'])', text)
|
| 80 |
+
return [s.strip() for s in sentences if s.strip()]
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def _score_candidate(sentence):
|
| 84 |
+
"""Score a sentence for standalone joke potential. Higher = more likely."""
|
| 85 |
+
score = 0
|
| 86 |
+
|
| 87 |
+
# Length sweet spot (80-200 chars is ideal for a one-liner)
|
| 88 |
+
length = len(sentence)
|
| 89 |
+
if 80 <= length <= 200:
|
| 90 |
+
score += 2
|
| 91 |
+
elif 60 <= length <= 250:
|
| 92 |
+
score += 1
|
| 93 |
+
|
| 94 |
+
# Strong pattern matches
|
| 95 |
+
for pattern in STRONG_PATTERNS:
|
| 96 |
+
if pattern.search(sentence):
|
| 97 |
+
score += 3
|
| 98 |
+
break # Only count once
|
| 99 |
+
|
| 100 |
+
# Weak pattern matches
|
| 101 |
+
for pattern in WEAK_PATTERNS:
|
| 102 |
+
if pattern.search(sentence):
|
| 103 |
+
score += 1
|
| 104 |
+
|
| 105 |
+
# Has a named subject (not pronoun-dependent)
|
| 106 |
+
if HAS_NAMED_SUBJECT.match(sentence):
|
| 107 |
+
score += 2
|
| 108 |
+
|
| 109 |
+
# Starts with skip words (context-dependent)
|
| 110 |
+
if SKIP_STARTS.match(sentence):
|
| 111 |
+
score -= 2
|
| 112 |
+
|
| 113 |
+
# Contains a quote (often setup-punchline)
|
| 114 |
+
if '"' in sentence or '\u201c' in sentence:
|
| 115 |
+
score += 1
|
| 116 |
+
|
| 117 |
+
# Ends with a strong punchline indicator
|
| 118 |
+
if sentence.rstrip().endswith((".", "…", "?")):
|
| 119 |
+
score += 1
|
| 120 |
+
|
| 121 |
+
return score
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def extract_from_article(entry_id, text, source_id, author=None,
|
| 125 |
+
topic=None, min_score=3):
|
| 126 |
+
"""Extract joke candidates from a long-form article.
|
| 127 |
+
|
| 128 |
+
Returns list of (sentence, score) tuples that were inserted.
|
| 129 |
+
"""
|
| 130 |
+
if len(text) < MIN_ARTICLE_LENGTH:
|
| 131 |
+
return []
|
| 132 |
+
|
| 133 |
+
# Get article title for attribution
|
| 134 |
+
title = ""
|
| 135 |
+
lines = text.split("\n")
|
| 136 |
+
for line in lines:
|
| 137 |
+
if line.strip().startswith("#"):
|
| 138 |
+
title = line.strip().lstrip("#").strip()
|
| 139 |
+
break
|
| 140 |
+
|
| 141 |
+
sentences = _split_sentences(text)
|
| 142 |
+
extracted = []
|
| 143 |
+
|
| 144 |
+
for sentence in sentences:
|
| 145 |
+
# Basic length filter
|
| 146 |
+
if len(sentence) < MIN_JOKE_LENGTH or len(sentence) > MAX_JOKE_LENGTH:
|
| 147 |
+
continue
|
| 148 |
+
|
| 149 |
+
score = _score_candidate(sentence)
|
| 150 |
+
if score >= min_score:
|
| 151 |
+
# Build attribution prefix if needed
|
| 152 |
+
attributed = sentence
|
| 153 |
+
|
| 154 |
+
# Store as extracted candidate
|
| 155 |
+
platform_id = f"extract-{entry_id}-{hash(sentence) & 0xFFFFFFFF:08x}"
|
| 156 |
+
eid = insert_entry(
|
| 157 |
+
source_id=source_id,
|
| 158 |
+
platform="extracted",
|
| 159 |
+
text=attributed,
|
| 160 |
+
platform_entry_id=platform_id,
|
| 161 |
+
author=author,
|
| 162 |
+
content_type="extracted_candidate",
|
| 163 |
+
topic=topic,
|
| 164 |
+
)
|
| 165 |
+
if eid:
|
| 166 |
+
extracted.append((sentence, score))
|
| 167 |
+
|
| 168 |
+
return extracted
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def run_extraction(min_score=3):
|
| 172 |
+
"""Extract jokes from all long-form articles in the corpus."""
|
| 173 |
+
with get_db() as conn:
|
| 174 |
+
# Find articles that haven't been extracted yet
|
| 175 |
+
articles = conn.execute("""
|
| 176 |
+
SELECT e.id, e.text, e.source_id, e.author, e.topic
|
| 177 |
+
FROM entries e
|
| 178 |
+
WHERE e.content_type = 'article'
|
| 179 |
+
AND LENGTH(e.text) > ?
|
| 180 |
+
AND e.id NOT IN (
|
| 181 |
+
SELECT DISTINCT CAST(
|
| 182 |
+
SUBSTR(platform_entry_id, 9,
|
| 183 |
+
INSTR(SUBSTR(platform_entry_id, 9), '-') - 1)
|
| 184 |
+
AS INTEGER)
|
| 185 |
+
FROM entries
|
| 186 |
+
WHERE platform = 'extracted'
|
| 187 |
+
)
|
| 188 |
+
""", (MIN_ARTICLE_LENGTH,)).fetchall()
|
| 189 |
+
|
| 190 |
+
total_extracted = 0
|
| 191 |
+
for article in articles:
|
| 192 |
+
extracted = extract_from_article(
|
| 193 |
+
entry_id=article["id"],
|
| 194 |
+
text=article["text"],
|
| 195 |
+
source_id=article["source_id"],
|
| 196 |
+
author=article["author"],
|
| 197 |
+
topic=article["topic"],
|
| 198 |
+
min_score=min_score,
|
| 199 |
+
)
|
| 200 |
+
if extracted:
|
| 201 |
+
logger.info(
|
| 202 |
+
f" Article {article['id']} ({article['author']}): "
|
| 203 |
+
f"extracted {len(extracted)} candidates"
|
| 204 |
+
)
|
| 205 |
+
total_extracted += len(extracted)
|
| 206 |
+
|
| 207 |
+
return total_extracted
|
scraper/platforms/__init__.py
ADDED
|
File without changes
|
scraper/platforms/bluesky.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Bluesky scraper via AT Protocol."""
|
| 2 |
+
import logging
|
| 3 |
+
from atproto import Client
|
| 4 |
+
from scraper.config import BLUESKY_HANDLE, BLUESKY_APP_PASSWORD
|
| 5 |
+
from scraper.db import insert_entry, get_source
|
| 6 |
+
|
| 7 |
+
logger = logging.getLogger("joke-corpus")
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def _get_client():
|
| 11 |
+
client = Client()
|
| 12 |
+
if BLUESKY_HANDLE and BLUESKY_APP_PASSWORD:
|
| 13 |
+
client.login(BLUESKY_HANDLE, BLUESKY_APP_PASSWORD)
|
| 14 |
+
return client
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def scrape_source(source, backfill=False):
|
| 18 |
+
"""Scrape a Bluesky account. Returns (found, new)."""
|
| 19 |
+
handle = source["handle_or_url"]
|
| 20 |
+
source_id = source["id"]
|
| 21 |
+
default_topic = source.get("default_topic")
|
| 22 |
+
|
| 23 |
+
client = _get_client()
|
| 24 |
+
|
| 25 |
+
found = 0
|
| 26 |
+
new = 0
|
| 27 |
+
cursor = None
|
| 28 |
+
|
| 29 |
+
# Paginate through posts
|
| 30 |
+
max_pages = 50 if backfill else 3
|
| 31 |
+
|
| 32 |
+
for _ in range(max_pages):
|
| 33 |
+
response = client.get_author_feed(
|
| 34 |
+
actor=handle,
|
| 35 |
+
limit=50,
|
| 36 |
+
cursor=cursor,
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
if not response.feed:
|
| 40 |
+
break
|
| 41 |
+
|
| 42 |
+
for item in response.feed:
|
| 43 |
+
post = item.post
|
| 44 |
+
record = post.record
|
| 45 |
+
|
| 46 |
+
# Skip reposts (we want original content)
|
| 47 |
+
if item.reason and hasattr(item.reason, "py_type") and "repost" in str(item.reason.py_type).lower():
|
| 48 |
+
continue
|
| 49 |
+
|
| 50 |
+
# Skip replies unless they're from the account itself
|
| 51 |
+
if record.reply and post.author.handle != handle.lstrip("@"):
|
| 52 |
+
continue
|
| 53 |
+
|
| 54 |
+
text = record.text
|
| 55 |
+
if not text or len(text.strip()) < 10:
|
| 56 |
+
continue
|
| 57 |
+
|
| 58 |
+
found += 1
|
| 59 |
+
|
| 60 |
+
# Engagement data
|
| 61 |
+
likes = post.like_count or 0
|
| 62 |
+
shares = post.repost_count or 0
|
| 63 |
+
replies = post.reply_count or 0
|
| 64 |
+
quotes = getattr(post, "quote_count", 0) or 0
|
| 65 |
+
|
| 66 |
+
entry_id = insert_entry(
|
| 67 |
+
source_id=source_id,
|
| 68 |
+
platform="bluesky",
|
| 69 |
+
text=text,
|
| 70 |
+
platform_entry_id=post.uri,
|
| 71 |
+
url=f"https://bsky.app/profile/{post.author.handle}/post/{post.uri.split('/')[-1]}",
|
| 72 |
+
author=post.author.display_name or post.author.handle,
|
| 73 |
+
content_type="post",
|
| 74 |
+
posted_at=record.created_at,
|
| 75 |
+
topic=default_topic,
|
| 76 |
+
likes=likes,
|
| 77 |
+
shares=shares,
|
| 78 |
+
saves=0,
|
| 79 |
+
quotes=quotes,
|
| 80 |
+
replies=replies,
|
| 81 |
+
)
|
| 82 |
+
if entry_id:
|
| 83 |
+
new += 1
|
| 84 |
+
|
| 85 |
+
cursor = response.cursor
|
| 86 |
+
if not cursor:
|
| 87 |
+
break
|
| 88 |
+
|
| 89 |
+
return found, new
|
scraper/platforms/guardian.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Guardian Open Platform API scraper."""
|
| 2 |
+
import logging
|
| 3 |
+
import requests
|
| 4 |
+
from scraper.config import GUARDIAN_API_KEY
|
| 5 |
+
from scraper.db import insert_entry
|
| 6 |
+
|
| 7 |
+
logger = logging.getLogger("joke-corpus")
|
| 8 |
+
|
| 9 |
+
BASE_URL = "https://content.guardianapis.com/search"
|
| 10 |
+
|
| 11 |
+
# Key comedy contributors
|
| 12 |
+
DEFAULT_CONTRIBUTORS = [
|
| 13 |
+
"profile/frankie-boyle",
|
| 14 |
+
"profile/marinahyde",
|
| 15 |
+
"profile/johncrace",
|
| 16 |
+
"profile/stewart-lee",
|
| 17 |
+
"profile/charliebrooker",
|
| 18 |
+
]
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def scrape_source(source, backfill=False):
|
| 22 |
+
"""Scrape Guardian articles by a contributor. Returns (found, new)."""
|
| 23 |
+
contributor_tag = source["handle_or_url"]
|
| 24 |
+
source_id = source["id"]
|
| 25 |
+
default_topic = source.get("default_topic")
|
| 26 |
+
|
| 27 |
+
if not GUARDIAN_API_KEY:
|
| 28 |
+
raise ValueError("GUARDIAN_API_KEY not set in .env")
|
| 29 |
+
|
| 30 |
+
found = 0
|
| 31 |
+
new = 0
|
| 32 |
+
page = 1
|
| 33 |
+
max_pages = 20 if backfill else 2
|
| 34 |
+
|
| 35 |
+
while page <= max_pages:
|
| 36 |
+
params = {
|
| 37 |
+
"api-key": GUARDIAN_API_KEY,
|
| 38 |
+
"tag": contributor_tag,
|
| 39 |
+
"show-fields": "bodyText,headline,byline,shortUrl",
|
| 40 |
+
"page-size": 50,
|
| 41 |
+
"page": page,
|
| 42 |
+
"order-by": "newest",
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
response = requests.get(BASE_URL, params=params, timeout=30)
|
| 46 |
+
response.raise_for_status()
|
| 47 |
+
data = response.json()
|
| 48 |
+
|
| 49 |
+
results = data.get("response", {}).get("results", [])
|
| 50 |
+
if not results:
|
| 51 |
+
break
|
| 52 |
+
|
| 53 |
+
for article in results:
|
| 54 |
+
fields = article.get("fields", {})
|
| 55 |
+
body = fields.get("bodyText", "")
|
| 56 |
+
headline = fields.get("headline", "")
|
| 57 |
+
|
| 58 |
+
if not body or len(body.strip()) < 50:
|
| 59 |
+
continue
|
| 60 |
+
|
| 61 |
+
text = f"# {headline}\n\n{body}"
|
| 62 |
+
found += 1
|
| 63 |
+
|
| 64 |
+
entry_id = insert_entry(
|
| 65 |
+
source_id=source_id,
|
| 66 |
+
platform="guardian",
|
| 67 |
+
text=text,
|
| 68 |
+
platform_entry_id=article["id"],
|
| 69 |
+
url=article.get("webUrl"),
|
| 70 |
+
author=fields.get("byline", contributor_tag),
|
| 71 |
+
content_type="article",
|
| 72 |
+
posted_at=article.get("webPublicationDate"),
|
| 73 |
+
topic=default_topic,
|
| 74 |
+
likes=None,
|
| 75 |
+
shares=None,
|
| 76 |
+
saves=None,
|
| 77 |
+
quotes=None,
|
| 78 |
+
replies=None,
|
| 79 |
+
)
|
| 80 |
+
if entry_id:
|
| 81 |
+
new += 1
|
| 82 |
+
|
| 83 |
+
total_pages = data.get("response", {}).get("pages", 1)
|
| 84 |
+
if page >= total_pages:
|
| 85 |
+
break
|
| 86 |
+
page += 1
|
| 87 |
+
|
| 88 |
+
return found, new
|
scraper/platforms/instagram.py
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Instagram scraper via Apify, with OCR for image-based text posts."""
|
| 2 |
+
import io
|
| 3 |
+
import logging
|
| 4 |
+
import re
|
| 5 |
+
import time
|
| 6 |
+
import requests
|
| 7 |
+
from scraper.config import APIFY_API_TOKEN, INSTAGRAM_MIN_LIKES_RATIO, INSTAGRAM_MIN_LIKES_ABSOLUTE
|
| 8 |
+
from scraper.db import insert_entry
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger("joke-corpus")
|
| 11 |
+
|
| 12 |
+
# Apify Instagram Post Scraper actor
|
| 13 |
+
ACTOR_ID = "apify~instagram-post-scraper"
|
| 14 |
+
APIFY_BASE = "https://api.apify.com/v2"
|
| 15 |
+
|
| 16 |
+
# Captions shorter than this trigger OCR on the image
|
| 17 |
+
OCR_CAPTION_THRESHOLD = 50
|
| 18 |
+
|
| 19 |
+
# Patterns that indicate a caption is just a watermark or tag spam
|
| 20 |
+
JUNK_CAPTION_RE = re.compile(
|
| 21 |
+
r"^(@\w+\s*)+$|" # Just @handles
|
| 22 |
+
r"^#\w+(\s+#\w+)*\s*$|" # Just hashtags
|
| 23 |
+
r"^(link in bio|follow|tag)\b", # Promo fluff
|
| 24 |
+
re.IGNORECASE,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# Patterns that indicate a caption is location metadata (Overheard accounts)
|
| 28 |
+
LOCATION_CAPTION_RE = re.compile(
|
| 29 |
+
r"^[\w\s,.'&-]+\.\s*[\w\s,.'&-]+\.\s*[^\w]*" # "Place. Area." pattern
|
| 30 |
+
r"|overheard\s+by\b"
|
| 31 |
+
r"|📥|👂|📍",
|
| 32 |
+
re.IGNORECASE,
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# --- Tweet text cleaner for OCR output ---
|
| 36 |
+
# Lines matching these are NOT part of the joke
|
| 37 |
+
|
| 38 |
+
# Twitter/X UI elements
|
| 39 |
+
_HANDLE_RE = re.compile(r"^@\w{1,20}$") # Standalone @handle
|
| 40 |
+
_NAME_HANDLE_RE = re.compile(r"^[\w\s.'-]+\s*@\w+", re.I) # "Name @handle" or "Name\n@handle"
|
| 41 |
+
_METRIC_RE = re.compile(
|
| 42 |
+
r"^\d[\d,.KkMm]*\s*$" # Just a number (likes/RTs)
|
| 43 |
+
r"|^\d[\d,.]*\s*(likes?|retweets?|replies|comments?|views?|reposts?)\b"
|
| 44 |
+
r"|^(reply|repost|like|share|bookmark)\s*$"
|
| 45 |
+
r"|^\d+:\d+\s*(AM|PM)" # Timestamps "3:42 PM"
|
| 46 |
+
r"|^\d{1,2}\s*(h|d|m|s|hr|min)\s*$" # Relative time "2h", "3d"
|
| 47 |
+
r"|^\d{1,2}/\d{1,2}/\d{2,4}$" # Dates
|
| 48 |
+
r"|^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d",
|
| 49 |
+
re.IGNORECASE,
|
| 50 |
+
)
|
| 51 |
+
_PLATFORM_RE = re.compile(
|
| 52 |
+
r"\bx\.com\b|\btwitter\.com\b|"
|
| 53 |
+
r"\bmemezar\b|\bmeme.zar\b|"
|
| 54 |
+
r"\bfuck.?jerry\b|\bdaquan\b|"
|
| 55 |
+
r"\bworldstar\b|\b9gag\b|"
|
| 56 |
+
r"\bladbible\b|\bunilad\b|"
|
| 57 |
+
r"\bbored\s*panda\b",
|
| 58 |
+
re.IGNORECASE,
|
| 59 |
+
)
|
| 60 |
+
_UI_RE = re.compile(
|
| 61 |
+
r"^(replying to|quote tweet|translate tweet|show this thread)\b"
|
| 62 |
+
r"|^(follow|following)\s*$"
|
| 63 |
+
r"|^\.\.\.\s*$"
|
| 64 |
+
r"|^(more|less)\s*$"
|
| 65 |
+
r"|^show\s+more\b"
|
| 66 |
+
r"|^translate\b",
|
| 67 |
+
re.IGNORECASE,
|
| 68 |
+
)
|
| 69 |
+
_WATERMARK_RE = re.compile(
|
| 70 |
+
r"^(posted|reposted|shared)\s+(by|via|from)\b"
|
| 71 |
+
r"|^(source|credit|via|from|originally)\s*[:@]"
|
| 72 |
+
r"|^(dm|send|tag)\s+(me|this|for)\b"
|
| 73 |
+
r"|^follow\s+(@|for)\b"
|
| 74 |
+
r"|^turn\s+on\s+(post\s+)?notifications\b",
|
| 75 |
+
re.IGNORECASE,
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _prefix_stripper(match):
|
| 80 |
+
"""Helper for regex: strip prefix only if it looks like OCR garbage, not real text."""
|
| 81 |
+
prefix = match.group(0)
|
| 82 |
+
# Keep it if it looks like a real sentence start (e.g. "I don't", "My mom")
|
| 83 |
+
words = prefix.strip().split()
|
| 84 |
+
if not words:
|
| 85 |
+
return prefix
|
| 86 |
+
# Common real-sentence starters — don't strip these
|
| 87 |
+
starters = {"i", "my", "me", "we", "he", "she", "they", "it", "the", "a", "an",
|
| 88 |
+
"when", "if", "so", "not", "but", "and", "or", "no", "why", "how",
|
| 89 |
+
"what", "who", "this", "that", "just", "do", "don't", "its", "im",
|
| 90 |
+
"you", "your", "can", "can't", "did", "is", "are", "was", "were",
|
| 91 |
+
"has", "have", "had", "be", "been", "being", "will", "would", "could",
|
| 92 |
+
"should", "shall", "may", "might", "must", "need", "dare",
|
| 93 |
+
"anyone", "everyone", "someone", "nobody", "people", "normalize"}
|
| 94 |
+
if words[0].lower().rstrip("'\"") in starters:
|
| 95 |
+
return prefix
|
| 96 |
+
# If first word is very short and not a real word, strip it
|
| 97 |
+
if len(words[0]) <= 2 and not words[0].lower() in {"i", "a", "an", "am", "as", "at",
|
| 98 |
+
"be", "by", "do", "go", "if",
|
| 99 |
+
"in", "is", "it", "me", "my",
|
| 100 |
+
"no", "of", "on", "or", "so",
|
| 101 |
+
"to", "up", "us", "we"}:
|
| 102 |
+
return ""
|
| 103 |
+
return prefix
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def _clean_tweet_ocr(raw_text):
|
| 107 |
+
"""Extract just the tweet/joke text from noisy OCR output.
|
| 108 |
+
|
| 109 |
+
Tweet screenshots have a predictable layout:
|
| 110 |
+
[Profile name] <- short, before @handle
|
| 111 |
+
[@handle] <- @username
|
| 112 |
+
[Tweet text — the joke]
|
| 113 |
+
[Timestamp / metrics / platform UI]
|
| 114 |
+
[Watermarks]
|
| 115 |
+
|
| 116 |
+
We strip everything that isn't the joke.
|
| 117 |
+
"""
|
| 118 |
+
if not raw_text:
|
| 119 |
+
return ""
|
| 120 |
+
|
| 121 |
+
lines = raw_text.split("\n")
|
| 122 |
+
stripped_lines = [(i, l.strip()) for i, l in enumerate(lines)]
|
| 123 |
+
stripped_lines = [(i, l) for i, l in stripped_lines if l]
|
| 124 |
+
|
| 125 |
+
# First pass: identify @handle lines so we can also kill the display name above
|
| 126 |
+
handle_indices = set()
|
| 127 |
+
for idx, (i, l) in enumerate(stripped_lines):
|
| 128 |
+
if _HANDLE_RE.match(l):
|
| 129 |
+
handle_indices.add(idx)
|
| 130 |
+
|
| 131 |
+
kept = []
|
| 132 |
+
for idx, (i, l) in enumerate(stripped_lines):
|
| 133 |
+
if not l or len(l) <= 2:
|
| 134 |
+
continue
|
| 135 |
+
|
| 136 |
+
# Skip standalone @handles
|
| 137 |
+
if idx in handle_indices:
|
| 138 |
+
continue
|
| 139 |
+
|
| 140 |
+
# Skip display name lines: short line right before an @handle
|
| 141 |
+
if (idx + 1) in handle_indices and len(l) < 50:
|
| 142 |
+
continue
|
| 143 |
+
|
| 144 |
+
# Lines containing @handle: strip the handle AND everything before it
|
| 145 |
+
# "il Bear v \ @imartois Only 40 B.C." → "Only 40 B.C."
|
| 146 |
+
# "Coop ©) A> @_JustCoop think something" → "think something"
|
| 147 |
+
if re.search(r"@[\w_]+", l):
|
| 148 |
+
# Remove @handle and everything before it
|
| 149 |
+
after = re.sub(r"^.*?@[\w_]+\s*", "", l).strip()
|
| 150 |
+
if after and len(after) >= 10:
|
| 151 |
+
l = after
|
| 152 |
+
elif len(l) < 60:
|
| 153 |
+
# Short line dominated by the handle — skip it entirely
|
| 154 |
+
continue
|
| 155 |
+
# else: long line where handle is embedded, keep it but strip the handle
|
| 156 |
+
else:
|
| 157 |
+
l = re.sub(r"@[\w_]+", "", l).strip()
|
| 158 |
+
|
| 159 |
+
# Skip metrics (likes, RTs, timestamps, dates)
|
| 160 |
+
if _METRIC_RE.match(l):
|
| 161 |
+
continue
|
| 162 |
+
|
| 163 |
+
# Skip lines with multiple engagement metrics like "1.2K Reposts 4.5K Likes"
|
| 164 |
+
if re.match(r"^[\d.,KkMm\s]*(reposts?|likes?|retweets?|views?|bookmarks?|replies|comments?)[\d.,KkMm\s]*(reposts?|likes?|retweets?|views?|bookmarks?|replies|comments?)?", l, re.I):
|
| 165 |
+
continue
|
| 166 |
+
|
| 167 |
+
# Skip platform watermarks
|
| 168 |
+
if _PLATFORM_RE.search(l):
|
| 169 |
+
continue
|
| 170 |
+
|
| 171 |
+
# Skip UI elements (replying to, follow, etc.)
|
| 172 |
+
if _UI_RE.match(l):
|
| 173 |
+
continue
|
| 174 |
+
|
| 175 |
+
# Skip repost/credit watermarks
|
| 176 |
+
if _WATERMARK_RE.match(l):
|
| 177 |
+
continue
|
| 178 |
+
|
| 179 |
+
# Skip lines that are just special chars or very short garbage
|
| 180 |
+
if len(re.sub(r"[^a-zA-Z]", "", l)) < 3:
|
| 181 |
+
continue
|
| 182 |
+
|
| 183 |
+
# Skip verified badge / dot separators OCR artifacts
|
| 184 |
+
if re.match(r"^[·•.\-_=~\s]+$", l):
|
| 185 |
+
continue
|
| 186 |
+
|
| 187 |
+
kept.append(l)
|
| 188 |
+
|
| 189 |
+
if not kept:
|
| 190 |
+
return ""
|
| 191 |
+
|
| 192 |
+
# Join and clean up
|
| 193 |
+
text = " ".join(kept)
|
| 194 |
+
|
| 195 |
+
# Remove any remaining @handles that look like attributions at the end
|
| 196 |
+
text = re.sub(r"\s*[-—]\s*@\w+\s*$", "", text)
|
| 197 |
+
|
| 198 |
+
# Remove stray pipe characters (OCR line-break artifacts)
|
| 199 |
+
text = re.sub(r"\s*\|\s*", " ", text)
|
| 200 |
+
|
| 201 |
+
# --- Aggressive post-processing for remaining noise ---
|
| 202 |
+
|
| 203 |
+
# Remove any @handles that survived line-level cleaning
|
| 204 |
+
text = re.sub(r"@[\w_]+", "", text)
|
| 205 |
+
|
| 206 |
+
# Remove "follow @account for X" promo suffixes
|
| 207 |
+
text = re.sub(r"\s*//?\s*follow\s+\S+.*$", "", text, flags=re.I)
|
| 208 |
+
|
| 209 |
+
# Remove garbled engagement metrics: "QO 692k Q629 & 2,198 (7 22.2k"
|
| 210 |
+
text = re.sub(
|
| 211 |
+
r"[QO©®™&()\[\]{}]+\s*[\d,.KkMm]+(?:\s*[QO©®™&()\[\]{}]+\s*[\d,.KkMm]+)*",
|
| 212 |
+
"", text
|
| 213 |
+
)
|
| 214 |
+
# Remove standalone metrics like "22.2k", "45.3K", "937"
|
| 215 |
+
text = re.sub(r"\b\d+[.,]?\d*[KkMm]\b", "", text)
|
| 216 |
+
|
| 217 |
+
# Remove common OCR artifacts for Twitter UI elements
|
| 218 |
+
text = re.sub(r"[©®™¥§†‡]+", " ", text)
|
| 219 |
+
text = re.sub(r"\s[<>=]+\s", " ", text)
|
| 220 |
+
|
| 221 |
+
# Remove "RT" prefix
|
| 222 |
+
text = re.sub(r"^RT\s+", "", text)
|
| 223 |
+
|
| 224 |
+
# Remove "OP" suffix (reddit-style)
|
| 225 |
+
text = re.sub(r"\s+OP\s*$", "", text)
|
| 226 |
+
|
| 227 |
+
# Remove "Reply" / "Repost" / "4d" type trailing UI text
|
| 228 |
+
text = re.sub(r"\s+\d+[dhms]\s+(Reply|Repost|Like)\b.*$", "", text, flags=re.I)
|
| 229 |
+
text = re.sub(r"\s+(Reply|Repost)\s*$", "", text, flags=re.I)
|
| 230 |
+
|
| 231 |
+
# Collapse whitespace
|
| 232 |
+
text = re.sub(r"\s{2,}", " ", text).strip()
|
| 233 |
+
|
| 234 |
+
# Strip leading garbage: short non-sentence fragments at the start
|
| 235 |
+
# e.g., "il Bear v", "Coop ) A", "bubs é", "a HIMBooOoo SLICE My ="
|
| 236 |
+
# Strategy: if the first ~30 chars have mostly non-alpha or are gibberish, strip them
|
| 237 |
+
text = re.sub(r"^[^a-zA-Z\"'(]*", "", text) # Strip leading non-letter chars
|
| 238 |
+
# Remove short (< 3 word) gibberish prefixes before the main sentence
|
| 239 |
+
text = re.sub(r"^(\S+\s+){0,3}(?=[A-Z][a-z])", _prefix_stripper, text)
|
| 240 |
+
|
| 241 |
+
# Collapse whitespace again
|
| 242 |
+
text = re.sub(r"\s{2,}", " ", text).strip()
|
| 243 |
+
|
| 244 |
+
# Quality gate: if the result is mostly non-alpha, it's garbage
|
| 245 |
+
alpha_chars = re.findall(r"[a-zA-Z]", text)
|
| 246 |
+
if len(text) > 0 and len(alpha_chars) / len(text) < 0.55:
|
| 247 |
+
return ""
|
| 248 |
+
|
| 249 |
+
# Quality gate: reject if too many OCR garble indicators
|
| 250 |
+
garble_chars = len(re.findall(r"[©®™¥§†‡<>={}\\|^~]", text))
|
| 251 |
+
if garble_chars > 2:
|
| 252 |
+
return ""
|
| 253 |
+
|
| 254 |
+
# Quality gate: reject very short results
|
| 255 |
+
if len(text) < 15:
|
| 256 |
+
return ""
|
| 257 |
+
|
| 258 |
+
# Quality gate: reject text with too many words jammed together (OCR garble)
|
| 259 |
+
# Words like "parentsjtake" and "henonimyatnidge" indicate failed OCR
|
| 260 |
+
words = text.split()
|
| 261 |
+
if words:
|
| 262 |
+
long_words = sum(1 for w in words if len(w) > 15)
|
| 263 |
+
medium_words = sum(1 for w in words if len(w) > 12)
|
| 264 |
+
if long_words >= 1 or medium_words >= 3:
|
| 265 |
+
return ""
|
| 266 |
+
|
| 267 |
+
return text
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
def _ocr_image(image_url):
|
| 271 |
+
"""Download an image and extract text via Tesseract OCR."""
|
| 272 |
+
try:
|
| 273 |
+
from PIL import Image
|
| 274 |
+
import pytesseract
|
| 275 |
+
except ImportError:
|
| 276 |
+
logger.warning("pytesseract/Pillow not installed — skipping OCR")
|
| 277 |
+
return ""
|
| 278 |
+
|
| 279 |
+
try:
|
| 280 |
+
resp = requests.get(image_url, timeout=15)
|
| 281 |
+
resp.raise_for_status()
|
| 282 |
+
img = Image.open(io.BytesIO(resp.content))
|
| 283 |
+
|
| 284 |
+
# Convert to grayscale for better OCR
|
| 285 |
+
img = img.convert("L")
|
| 286 |
+
|
| 287 |
+
# Run OCR
|
| 288 |
+
text = pytesseract.image_to_string(img, lang="eng")
|
| 289 |
+
|
| 290 |
+
# Clean up basic OCR noise, then run the tweet cleaner
|
| 291 |
+
text = text.strip()
|
| 292 |
+
text = re.sub(r"\n{3,}", "\n\n", text)
|
| 293 |
+
|
| 294 |
+
# Apply tweet text cleaner to extract just the joke
|
| 295 |
+
cleaned = _clean_tweet_ocr(text)
|
| 296 |
+
if cleaned and len(cleaned) >= 20:
|
| 297 |
+
return cleaned
|
| 298 |
+
|
| 299 |
+
# Fallback: basic cleanup without tweet cleaning
|
| 300 |
+
lines = [l for l in text.split("\n") if len(l.strip()) > 2]
|
| 301 |
+
text = "\n".join(lines).strip()
|
| 302 |
+
|
| 303 |
+
return text
|
| 304 |
+
except Exception as e:
|
| 305 |
+
logger.debug(f"OCR failed for {image_url}: {e}")
|
| 306 |
+
return ""
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def _is_junk_caption(caption):
|
| 310 |
+
"""Check if a caption is just watermarks, tags, promo, or location metadata."""
|
| 311 |
+
if not caption:
|
| 312 |
+
return True
|
| 313 |
+
cleaned = caption.strip()
|
| 314 |
+
if len(cleaned) < OCR_CAPTION_THRESHOLD:
|
| 315 |
+
return True
|
| 316 |
+
if JUNK_CAPTION_RE.match(cleaned):
|
| 317 |
+
return True
|
| 318 |
+
# Overheard-style accounts: caption is just location + "Overheard by..."
|
| 319 |
+
if LOCATION_CAPTION_RE.search(cleaned) and len(cleaned) < 200:
|
| 320 |
+
return True
|
| 321 |
+
return False
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
def _best_text(caption, image_url):
|
| 325 |
+
"""Pick the best text: caption if it's substantial, otherwise OCR the image."""
|
| 326 |
+
caption = (caption or "").strip()
|
| 327 |
+
|
| 328 |
+
if not _is_junk_caption(caption) and len(caption) >= OCR_CAPTION_THRESHOLD:
|
| 329 |
+
# Caption looks like real content — use it, but strip trailing hashtags
|
| 330 |
+
text = re.sub(r"\s*#\w+(\s+#\w+)*\s*$", "", caption).strip()
|
| 331 |
+
return text if len(text) >= 20 else caption
|
| 332 |
+
|
| 333 |
+
# Caption is junk — try OCR
|
| 334 |
+
if image_url:
|
| 335 |
+
ocr_text = _ocr_image(image_url)
|
| 336 |
+
if len(ocr_text) >= 20:
|
| 337 |
+
return ocr_text
|
| 338 |
+
|
| 339 |
+
# Fall back to caption if OCR failed but caption has something
|
| 340 |
+
if len(caption) >= 20:
|
| 341 |
+
return re.sub(r"\s*#\w+(\s+#\w+)*\s*$", "", caption).strip()
|
| 342 |
+
|
| 343 |
+
return ""
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
def _compute_likes_threshold(items):
|
| 347 |
+
"""Compute min likes threshold from the current batch's median."""
|
| 348 |
+
likes_values = sorted(
|
| 349 |
+
item.get("likesCount", 0) for item in items if item.get("likesCount")
|
| 350 |
+
)
|
| 351 |
+
if not likes_values:
|
| 352 |
+
return INSTAGRAM_MIN_LIKES_ABSOLUTE
|
| 353 |
+
median = likes_values[len(likes_values) // 2]
|
| 354 |
+
return max(INSTAGRAM_MIN_LIKES_ABSOLUTE, int(median * INSTAGRAM_MIN_LIKES_RATIO))
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
def scrape_source(source, backfill=False):
|
| 358 |
+
"""Scrape an Instagram account via Apify. Returns (found, new)."""
|
| 359 |
+
handle = source["handle_or_url"].lstrip("@")
|
| 360 |
+
source_id = source["id"]
|
| 361 |
+
default_topic = source.get("default_topic")
|
| 362 |
+
|
| 363 |
+
if not APIFY_API_TOKEN:
|
| 364 |
+
raise ValueError("APIFY_API_TOKEN not set in .env")
|
| 365 |
+
|
| 366 |
+
results_limit = 50 if backfill else 12
|
| 367 |
+
|
| 368 |
+
# Start the actor run
|
| 369 |
+
run_input = {
|
| 370 |
+
"username": [handle],
|
| 371 |
+
"resultsLimit": results_limit,
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
run_url = f"{APIFY_BASE}/acts/{ACTOR_ID}/runs"
|
| 375 |
+
headers = {"Authorization": f"Bearer {APIFY_API_TOKEN}"}
|
| 376 |
+
|
| 377 |
+
response = requests.post(
|
| 378 |
+
run_url,
|
| 379 |
+
json=run_input,
|
| 380 |
+
headers=headers,
|
| 381 |
+
timeout=30,
|
| 382 |
+
)
|
| 383 |
+
response.raise_for_status()
|
| 384 |
+
run_data = response.json()["data"]
|
| 385 |
+
run_id = run_data["id"]
|
| 386 |
+
|
| 387 |
+
# Poll for completion
|
| 388 |
+
status_url = f"{APIFY_BASE}/actor-runs/{run_id}"
|
| 389 |
+
for _ in range(60): # Max 5 minutes
|
| 390 |
+
time.sleep(5)
|
| 391 |
+
status_resp = requests.get(status_url, headers=headers, timeout=30)
|
| 392 |
+
status_resp.raise_for_status()
|
| 393 |
+
status = status_resp.json()["data"]["status"]
|
| 394 |
+
if status in ("SUCCEEDED", "FAILED", "ABORTED", "TIMED-OUT"):
|
| 395 |
+
break
|
| 396 |
+
|
| 397 |
+
if status != "SUCCEEDED":
|
| 398 |
+
logger.error(f"Apify run {run_id} ended with status: {status}")
|
| 399 |
+
return 0, 0
|
| 400 |
+
|
| 401 |
+
# Fetch results
|
| 402 |
+
dataset_id = run_data["defaultDatasetId"]
|
| 403 |
+
items_url = f"{APIFY_BASE}/datasets/{dataset_id}/items"
|
| 404 |
+
items_resp = requests.get(items_url, headers=headers, timeout=30)
|
| 405 |
+
items_resp.raise_for_status()
|
| 406 |
+
items = items_resp.json()
|
| 407 |
+
|
| 408 |
+
found = 0
|
| 409 |
+
new = 0
|
| 410 |
+
ocr_used = 0
|
| 411 |
+
skipped_low = 0
|
| 412 |
+
|
| 413 |
+
# Compute per-account engagement threshold
|
| 414 |
+
likes_threshold = _compute_likes_threshold(items)
|
| 415 |
+
logger.info(f" @{handle}: likes threshold = {likes_threshold}")
|
| 416 |
+
|
| 417 |
+
for item in items:
|
| 418 |
+
caption = item.get("caption") or ""
|
| 419 |
+
image_url = item.get("displayUrl") or ""
|
| 420 |
+
|
| 421 |
+
# Engagement gate: skip posts below the account's threshold
|
| 422 |
+
if item.get("likesCount", 0) < likes_threshold:
|
| 423 |
+
skipped_low += 1
|
| 424 |
+
continue
|
| 425 |
+
|
| 426 |
+
# Get best text (caption or OCR)
|
| 427 |
+
text = _best_text(caption, image_url)
|
| 428 |
+
if not text or len(text) < 15:
|
| 429 |
+
continue
|
| 430 |
+
|
| 431 |
+
# Track if OCR was used
|
| 432 |
+
if _is_junk_caption(caption) and image_url and len(text) >= 20:
|
| 433 |
+
ocr_used += 1
|
| 434 |
+
|
| 435 |
+
found += 1
|
| 436 |
+
shortcode = item.get("shortCode", "")
|
| 437 |
+
|
| 438 |
+
entry_id = insert_entry(
|
| 439 |
+
source_id=source_id,
|
| 440 |
+
platform="instagram",
|
| 441 |
+
text=text,
|
| 442 |
+
platform_entry_id=shortcode or item.get("id"),
|
| 443 |
+
url=f"https://www.instagram.com/p/{shortcode}/" if shortcode else None,
|
| 444 |
+
author=handle,
|
| 445 |
+
content_type="post",
|
| 446 |
+
posted_at=item.get("timestamp"),
|
| 447 |
+
topic=default_topic,
|
| 448 |
+
likes=item.get("likesCount", 0),
|
| 449 |
+
shares=0,
|
| 450 |
+
saves=0,
|
| 451 |
+
quotes=0,
|
| 452 |
+
replies=item.get("commentsCount", 0),
|
| 453 |
+
image_url=image_url or None,
|
| 454 |
+
)
|
| 455 |
+
if entry_id:
|
| 456 |
+
new += 1
|
| 457 |
+
|
| 458 |
+
if ocr_used:
|
| 459 |
+
logger.info(f" OCR extracted text from {ocr_used} images for @{handle}")
|
| 460 |
+
if skipped_low:
|
| 461 |
+
logger.info(f" @{handle}: skipped {skipped_low} low-engagement posts (threshold={likes_threshold})")
|
| 462 |
+
|
| 463 |
+
return found, new
|
scraper/platforms/reddit.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Reddit scraper via old.reddit.com public JSON (no API key required)."""
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
import requests
|
| 5 |
+
from scraper.config import REDDIT_MIN_SCORE
|
| 6 |
+
from scraper.db import insert_entry
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger("joke-corpus")
|
| 9 |
+
|
| 10 |
+
REDDIT_JSON_BASE = "https://old.reddit.com"
|
| 11 |
+
USER_AGENT = "joke-corpus/0.1 (comedy research project)"
|
| 12 |
+
REQUEST_DELAY = 2.0 # seconds between requests
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _fetch_json(url, params=None):
|
| 16 |
+
"""Fetch JSON from old.reddit.com with rate limiting."""
|
| 17 |
+
headers = {"User-Agent": USER_AGENT}
|
| 18 |
+
resp = requests.get(url, params=params, headers=headers, timeout=15)
|
| 19 |
+
resp.raise_for_status()
|
| 20 |
+
time.sleep(REQUEST_DELAY)
|
| 21 |
+
return resp.json()
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def _process_post(post, source_id, default_topic):
|
| 25 |
+
"""Process a single Reddit post. Returns 1 if new, 0 otherwise."""
|
| 26 |
+
if post.get("stickied") or post.get("over_18"):
|
| 27 |
+
return 0
|
| 28 |
+
|
| 29 |
+
# Engagement gate: skip low-scoring posts
|
| 30 |
+
if post.get("score", 0) < REDDIT_MIN_SCORE:
|
| 31 |
+
return 0
|
| 32 |
+
|
| 33 |
+
title = (post.get("title") or "").strip()
|
| 34 |
+
selftext = (post.get("selftext") or "").strip()
|
| 35 |
+
|
| 36 |
+
# Skip removed/deleted content
|
| 37 |
+
if selftext in ("[removed]", "[deleted]"):
|
| 38 |
+
selftext = ""
|
| 39 |
+
|
| 40 |
+
# For joke subreddits: title = setup, selftext = punchline
|
| 41 |
+
if selftext and title:
|
| 42 |
+
text = f"{title}\n\n{selftext}"
|
| 43 |
+
elif title:
|
| 44 |
+
text = title
|
| 45 |
+
else:
|
| 46 |
+
return 0
|
| 47 |
+
|
| 48 |
+
# Length gates
|
| 49 |
+
if len(text) < 20 or len(text) > 2000:
|
| 50 |
+
return 0
|
| 51 |
+
|
| 52 |
+
post_id = post.get("id", "")
|
| 53 |
+
|
| 54 |
+
entry_id = insert_entry(
|
| 55 |
+
source_id=source_id,
|
| 56 |
+
platform="reddit",
|
| 57 |
+
text=text,
|
| 58 |
+
platform_entry_id=post_id,
|
| 59 |
+
url=f"https://reddit.com{post.get('permalink', '')}",
|
| 60 |
+
author=post.get("author", "[deleted]"),
|
| 61 |
+
content_type="post",
|
| 62 |
+
posted_at=str(post.get("created_utc", "")),
|
| 63 |
+
topic=default_topic,
|
| 64 |
+
likes=post.get("score", 0),
|
| 65 |
+
shares=0,
|
| 66 |
+
saves=0,
|
| 67 |
+
quotes=0,
|
| 68 |
+
replies=post.get("num_comments", 0),
|
| 69 |
+
)
|
| 70 |
+
return 1 if entry_id else 0
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def scrape_source(source, backfill=False):
|
| 74 |
+
"""Scrape a Reddit subreddit via public JSON. Returns (found, new)."""
|
| 75 |
+
handle = source["handle_or_url"]
|
| 76 |
+
source_id = source["id"]
|
| 77 |
+
default_topic = source.get("default_topic")
|
| 78 |
+
|
| 79 |
+
# Normalize: "r/Jokes" -> "Jokes"
|
| 80 |
+
subreddit_name = handle.lstrip("r/").strip("/")
|
| 81 |
+
|
| 82 |
+
sort = "top"
|
| 83 |
+
url = f"{REDDIT_JSON_BASE}/r/{subreddit_name}/{sort}.json"
|
| 84 |
+
params = {"limit": 100, "raw_json": 1}
|
| 85 |
+
params["t"] = "month" if backfill else "week"
|
| 86 |
+
|
| 87 |
+
found = 0
|
| 88 |
+
new = 0
|
| 89 |
+
pages = 3 if backfill else 2
|
| 90 |
+
|
| 91 |
+
for page in range(pages):
|
| 92 |
+
try:
|
| 93 |
+
data = _fetch_json(url, params)
|
| 94 |
+
except Exception as e:
|
| 95 |
+
logger.error(f"Failed to fetch r/{subreddit_name} page {page+1}: {e}")
|
| 96 |
+
break
|
| 97 |
+
|
| 98 |
+
posts = data.get("data", {}).get("children", [])
|
| 99 |
+
if not posts:
|
| 100 |
+
break
|
| 101 |
+
|
| 102 |
+
for post_wrapper in posts:
|
| 103 |
+
post = post_wrapper.get("data", {})
|
| 104 |
+
found += 1
|
| 105 |
+
new += _process_post(post, source_id, default_topic)
|
| 106 |
+
|
| 107 |
+
# Pagination
|
| 108 |
+
after = data.get("data", {}).get("after")
|
| 109 |
+
if not after:
|
| 110 |
+
break
|
| 111 |
+
params["after"] = after
|
| 112 |
+
|
| 113 |
+
logger.info(f"r/{subreddit_name}: found={found}, new={new}")
|
| 114 |
+
return found, new
|
scraper/platforms/rss.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""RSS/Atom feed scraper for Substacks, satirical outlets, etc."""
|
| 2 |
+
import logging
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
import feedparser
|
| 5 |
+
from scraper.db import insert_entry
|
| 6 |
+
from scraper.utils import clean_text
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger("joke-corpus")
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def scrape_source(source, backfill=False):
|
| 12 |
+
"""Scrape an RSS feed. Returns (found, new)."""
|
| 13 |
+
feed_url = source["handle_or_url"]
|
| 14 |
+
source_id = source["id"]
|
| 15 |
+
default_topic = source.get("default_topic")
|
| 16 |
+
|
| 17 |
+
feed = feedparser.parse(feed_url)
|
| 18 |
+
|
| 19 |
+
if feed.bozo and not feed.entries:
|
| 20 |
+
logger.error(f"Feed error for {feed_url}: {feed.bozo_exception}")
|
| 21 |
+
return 0, 0
|
| 22 |
+
|
| 23 |
+
found = 0
|
| 24 |
+
new = 0
|
| 25 |
+
|
| 26 |
+
for entry in feed.entries:
|
| 27 |
+
title = entry.get("title", "")
|
| 28 |
+
|
| 29 |
+
# Try to get full content, fall back to summary
|
| 30 |
+
content = ""
|
| 31 |
+
if "content" in entry:
|
| 32 |
+
content = entry.content[0].get("value", "")
|
| 33 |
+
elif "summary" in entry:
|
| 34 |
+
content = entry.get("summary", "")
|
| 35 |
+
|
| 36 |
+
# Strip HTML tags (basic)
|
| 37 |
+
import re
|
| 38 |
+
content = re.sub(r"<[^>]+>", "", content)
|
| 39 |
+
content = clean_text(content)
|
| 40 |
+
|
| 41 |
+
if not content or len(content.strip()) < 20:
|
| 42 |
+
# For satirical outlets, the headline alone might be the joke
|
| 43 |
+
if title and len(title.strip()) >= 10:
|
| 44 |
+
content = title
|
| 45 |
+
else:
|
| 46 |
+
continue
|
| 47 |
+
|
| 48 |
+
text = f"{title}\n\n{content}" if title and content != title else content
|
| 49 |
+
found += 1
|
| 50 |
+
|
| 51 |
+
# Parse date
|
| 52 |
+
posted_at = None
|
| 53 |
+
if "published_parsed" in entry and entry.published_parsed:
|
| 54 |
+
try:
|
| 55 |
+
posted_at = datetime(*entry.published_parsed[:6]).isoformat()
|
| 56 |
+
except (TypeError, ValueError):
|
| 57 |
+
pass
|
| 58 |
+
|
| 59 |
+
# Generate a stable ID from link or title
|
| 60 |
+
entry_id_str = entry.get("id") or entry.get("link") or title
|
| 61 |
+
|
| 62 |
+
entry_id = insert_entry(
|
| 63 |
+
source_id=source_id,
|
| 64 |
+
platform="rss",
|
| 65 |
+
text=text,
|
| 66 |
+
platform_entry_id=entry_id_str,
|
| 67 |
+
url=entry.get("link"),
|
| 68 |
+
author=entry.get("author", source.get("display_name")),
|
| 69 |
+
content_type="article",
|
| 70 |
+
posted_at=posted_at,
|
| 71 |
+
topic=default_topic,
|
| 72 |
+
)
|
| 73 |
+
if entry_id:
|
| 74 |
+
new += 1
|
| 75 |
+
|
| 76 |
+
return found, new
|
scraper/platforms/x_twitter.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""X/Twitter scraper via Apify (no Twitter API key required)."""
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
import requests
|
| 5 |
+
from scraper.config import APIFY_API_TOKEN, TWITTER_MIN_FAVES
|
| 6 |
+
from scraper.db import insert_entry
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger("joke-corpus")
|
| 9 |
+
|
| 10 |
+
ACTOR_ID = "apidojo~tweet-scraper"
|
| 11 |
+
APIFY_BASE = "https://api.apify.com/v2"
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def scrape_source(source, backfill=False):
|
| 15 |
+
"""Scrape an X/Twitter account via Apify. Returns (found, new)."""
|
| 16 |
+
handle = source["handle_or_url"].lstrip("@")
|
| 17 |
+
source_id = source["id"]
|
| 18 |
+
default_topic = source.get("default_topic")
|
| 19 |
+
|
| 20 |
+
if not APIFY_API_TOKEN:
|
| 21 |
+
raise ValueError("APIFY_API_TOKEN not set in .env")
|
| 22 |
+
|
| 23 |
+
max_items = 50 if backfill else 20
|
| 24 |
+
|
| 25 |
+
# Use search with min_faves to pre-filter — only pay for quality tweets
|
| 26 |
+
search_query = f"from:{handle} min_faves:{TWITTER_MIN_FAVES}"
|
| 27 |
+
|
| 28 |
+
run_input = {
|
| 29 |
+
"searchTerms": [search_query],
|
| 30 |
+
"maxItems": max_items,
|
| 31 |
+
"sort": "Latest",
|
| 32 |
+
"tweetLanguage": "en",
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
headers = {"Authorization": f"Bearer {APIFY_API_TOKEN}"}
|
| 36 |
+
run_url = f"{APIFY_BASE}/acts/{ACTOR_ID}/runs"
|
| 37 |
+
|
| 38 |
+
response = requests.post(run_url, json=run_input, headers=headers, timeout=30)
|
| 39 |
+
response.raise_for_status()
|
| 40 |
+
run_data = response.json()["data"]
|
| 41 |
+
run_id = run_data["id"]
|
| 42 |
+
|
| 43 |
+
logger.info(f"Started Apify run {run_id} for @{handle}")
|
| 44 |
+
|
| 45 |
+
# Poll for completion (max 5 minutes)
|
| 46 |
+
status_url = f"{APIFY_BASE}/actor-runs/{run_id}"
|
| 47 |
+
for _ in range(60):
|
| 48 |
+
time.sleep(5)
|
| 49 |
+
status_resp = requests.get(status_url, headers=headers, timeout=30)
|
| 50 |
+
status_resp.raise_for_status()
|
| 51 |
+
status = status_resp.json()["data"]["status"]
|
| 52 |
+
if status in ("SUCCEEDED", "FAILED", "ABORTED", "TIMED-OUT"):
|
| 53 |
+
break
|
| 54 |
+
|
| 55 |
+
if status != "SUCCEEDED":
|
| 56 |
+
logger.error(f"Apify run {run_id} for @{handle} ended with status: {status}")
|
| 57 |
+
return 0, 0
|
| 58 |
+
|
| 59 |
+
# Fetch results
|
| 60 |
+
dataset_id = run_data["defaultDatasetId"]
|
| 61 |
+
items_url = f"{APIFY_BASE}/datasets/{dataset_id}/items"
|
| 62 |
+
items_resp = requests.get(items_url, headers=headers, timeout=30)
|
| 63 |
+
items_resp.raise_for_status()
|
| 64 |
+
items = items_resp.json()
|
| 65 |
+
|
| 66 |
+
found = 0
|
| 67 |
+
new = 0
|
| 68 |
+
|
| 69 |
+
for item in items:
|
| 70 |
+
# Skip retweets and replies to others
|
| 71 |
+
if item.get("isRetweet"):
|
| 72 |
+
continue
|
| 73 |
+
if item.get("isReply"):
|
| 74 |
+
# Allow self-replies (threads) but skip replies to others
|
| 75 |
+
author_info = item.get("author", {})
|
| 76 |
+
reply_to = item.get("inReplyToUserId") or item.get("conversationUserId")
|
| 77 |
+
author_id = author_info.get("id")
|
| 78 |
+
if reply_to and str(reply_to) != str(author_id):
|
| 79 |
+
continue
|
| 80 |
+
|
| 81 |
+
text = item.get("fullText") or item.get("text") or ""
|
| 82 |
+
text = text.strip()
|
| 83 |
+
|
| 84 |
+
# Skip very short or empty
|
| 85 |
+
if not text or len(text) < 15:
|
| 86 |
+
continue
|
| 87 |
+
|
| 88 |
+
# Skip tweets that are just links
|
| 89 |
+
if text.startswith("http") and " " not in text:
|
| 90 |
+
continue
|
| 91 |
+
|
| 92 |
+
# Engagement sanity check (search pre-filter should handle this)
|
| 93 |
+
if (item.get("likeCount") or 0) < TWITTER_MIN_FAVES:
|
| 94 |
+
continue
|
| 95 |
+
|
| 96 |
+
found += 1
|
| 97 |
+
author_info = item.get("author", {})
|
| 98 |
+
tweet_id = item.get("id") or item.get("tweetId") or ""
|
| 99 |
+
|
| 100 |
+
entry_id = insert_entry(
|
| 101 |
+
source_id=source_id,
|
| 102 |
+
platform="x_twitter",
|
| 103 |
+
text=text,
|
| 104 |
+
platform_entry_id=str(tweet_id),
|
| 105 |
+
url=item.get("url") or f"https://x.com/{handle}/status/{tweet_id}",
|
| 106 |
+
author=author_info.get("userName") or handle,
|
| 107 |
+
content_type="post",
|
| 108 |
+
posted_at=item.get("createdAt"),
|
| 109 |
+
topic=default_topic,
|
| 110 |
+
likes=item.get("likeCount", 0),
|
| 111 |
+
shares=item.get("retweetCount", 0),
|
| 112 |
+
saves=item.get("bookmarkCount", 0),
|
| 113 |
+
quotes=item.get("quoteCount", 0),
|
| 114 |
+
replies=item.get("replyCount", 0),
|
| 115 |
+
)
|
| 116 |
+
if entry_id:
|
| 117 |
+
new += 1
|
| 118 |
+
|
| 119 |
+
logger.info(f"@{handle}: found={found}, new={new}")
|
| 120 |
+
return found, new
|
scraper/platforms/youtube.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""YouTube scraper — transcripts + comments."""
|
| 2 |
+
import logging
|
| 3 |
+
from googleapiclient.discovery import build
|
| 4 |
+
from scraper.config import YOUTUBE_API_KEY
|
| 5 |
+
from scraper.db import insert_entry
|
| 6 |
+
|
| 7 |
+
logger = logging.getLogger("joke-corpus")
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def _get_service():
|
| 11 |
+
if not YOUTUBE_API_KEY:
|
| 12 |
+
raise ValueError("YOUTUBE_API_KEY not set in .env")
|
| 13 |
+
return build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def _get_transcript(video_id):
|
| 17 |
+
"""Try to get transcript via youtube-transcript-api."""
|
| 18 |
+
try:
|
| 19 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 20 |
+
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
|
| 21 |
+
return " ".join(entry["text"] for entry in transcript_list)
|
| 22 |
+
except Exception as e:
|
| 23 |
+
logger.debug(f"No transcript for {video_id}: {e}")
|
| 24 |
+
return None
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def scrape_source(source, backfill=False):
|
| 28 |
+
"""Scrape a YouTube channel. Returns (found, new)."""
|
| 29 |
+
channel_id_or_url = source["handle_or_url"]
|
| 30 |
+
source_id = source["id"]
|
| 31 |
+
default_topic = source.get("default_topic")
|
| 32 |
+
|
| 33 |
+
service = _get_service()
|
| 34 |
+
|
| 35 |
+
found = 0
|
| 36 |
+
new = 0
|
| 37 |
+
max_results = 50 if backfill else 10
|
| 38 |
+
|
| 39 |
+
# If it's a channel ID, search for videos
|
| 40 |
+
# If it starts with @, resolve the channel first
|
| 41 |
+
channel_id = channel_id_or_url
|
| 42 |
+
if channel_id.startswith("@"):
|
| 43 |
+
# Search for channel by handle
|
| 44 |
+
search = service.search().list(
|
| 45 |
+
q=channel_id, type="channel", part="id", maxResults=1
|
| 46 |
+
).execute()
|
| 47 |
+
if search["items"]:
|
| 48 |
+
channel_id = search["items"][0]["id"]["channelId"]
|
| 49 |
+
else:
|
| 50 |
+
logger.error(f"Channel not found: {channel_id_or_url}")
|
| 51 |
+
return 0, 0
|
| 52 |
+
|
| 53 |
+
# Get recent videos
|
| 54 |
+
page_token = None
|
| 55 |
+
pages = 5 if backfill else 1
|
| 56 |
+
|
| 57 |
+
for _ in range(pages):
|
| 58 |
+
search_response = service.search().list(
|
| 59 |
+
channelId=channel_id,
|
| 60 |
+
type="video",
|
| 61 |
+
part="id,snippet",
|
| 62 |
+
maxResults=max_results,
|
| 63 |
+
order="date",
|
| 64 |
+
pageToken=page_token,
|
| 65 |
+
).execute()
|
| 66 |
+
|
| 67 |
+
video_ids = [
|
| 68 |
+
item["id"]["videoId"]
|
| 69 |
+
for item in search_response.get("items", [])
|
| 70 |
+
if item["id"].get("videoId")
|
| 71 |
+
]
|
| 72 |
+
|
| 73 |
+
if not video_ids:
|
| 74 |
+
break
|
| 75 |
+
|
| 76 |
+
# Get video stats
|
| 77 |
+
stats_response = service.videos().list(
|
| 78 |
+
id=",".join(video_ids),
|
| 79 |
+
part="statistics",
|
| 80 |
+
).execute()
|
| 81 |
+
|
| 82 |
+
stats_map = {}
|
| 83 |
+
for item in stats_response.get("items", []):
|
| 84 |
+
stats_map[item["id"]] = item.get("statistics", {})
|
| 85 |
+
|
| 86 |
+
for item in search_response.get("items", []):
|
| 87 |
+
if not item["id"].get("videoId"):
|
| 88 |
+
continue
|
| 89 |
+
|
| 90 |
+
video_id = item["id"]["videoId"]
|
| 91 |
+
snippet = item["snippet"]
|
| 92 |
+
|
| 93 |
+
# Try to get transcript
|
| 94 |
+
transcript = _get_transcript(video_id)
|
| 95 |
+
|
| 96 |
+
title = snippet.get("title", "")
|
| 97 |
+
description = snippet.get("description", "")
|
| 98 |
+
|
| 99 |
+
if transcript:
|
| 100 |
+
text = f"# {title}\n\n{transcript}"
|
| 101 |
+
content_type = "transcript_chunk"
|
| 102 |
+
elif description and len(description) > 30:
|
| 103 |
+
text = f"# {title}\n\n{description}"
|
| 104 |
+
content_type = "post"
|
| 105 |
+
else:
|
| 106 |
+
text = title
|
| 107 |
+
content_type = "post"
|
| 108 |
+
|
| 109 |
+
if len(text.strip()) < 20:
|
| 110 |
+
continue
|
| 111 |
+
|
| 112 |
+
found += 1
|
| 113 |
+
stats = stats_map.get(video_id, {})
|
| 114 |
+
|
| 115 |
+
entry_id = insert_entry(
|
| 116 |
+
source_id=source_id,
|
| 117 |
+
platform="youtube",
|
| 118 |
+
text=text,
|
| 119 |
+
platform_entry_id=video_id,
|
| 120 |
+
url=f"https://www.youtube.com/watch?v={video_id}",
|
| 121 |
+
author=snippet.get("channelTitle"),
|
| 122 |
+
content_type=content_type,
|
| 123 |
+
posted_at=snippet.get("publishedAt"),
|
| 124 |
+
topic=default_topic,
|
| 125 |
+
likes=int(stats.get("likeCount", 0)),
|
| 126 |
+
shares=0,
|
| 127 |
+
saves=0,
|
| 128 |
+
quotes=0,
|
| 129 |
+
replies=int(stats.get("commentCount", 0)),
|
| 130 |
+
)
|
| 131 |
+
if entry_id:
|
| 132 |
+
new += 1
|
| 133 |
+
|
| 134 |
+
# Grab top comments
|
| 135 |
+
try:
|
| 136 |
+
comments_response = service.commentThreads().list(
|
| 137 |
+
videoId=video_id,
|
| 138 |
+
part="snippet",
|
| 139 |
+
maxResults=5,
|
| 140 |
+
order="relevance",
|
| 141 |
+
).execute()
|
| 142 |
+
|
| 143 |
+
for thread in comments_response.get("items", []):
|
| 144 |
+
comment = thread["snippet"]["topLevelComment"]["snippet"]
|
| 145 |
+
comment_text = comment.get("textDisplay", "")
|
| 146 |
+
if not comment_text or len(comment_text.strip()) < 10:
|
| 147 |
+
continue
|
| 148 |
+
|
| 149 |
+
found += 1
|
| 150 |
+
entry_id = insert_entry(
|
| 151 |
+
source_id=source_id,
|
| 152 |
+
platform="youtube",
|
| 153 |
+
text=comment_text,
|
| 154 |
+
platform_entry_id=thread["id"],
|
| 155 |
+
url=f"https://www.youtube.com/watch?v={video_id}",
|
| 156 |
+
author=comment.get("authorDisplayName"),
|
| 157 |
+
content_type="reply",
|
| 158 |
+
posted_at=comment.get("publishedAt"),
|
| 159 |
+
topic=default_topic,
|
| 160 |
+
likes=int(comment.get("likeCount", 0)),
|
| 161 |
+
)
|
| 162 |
+
if entry_id:
|
| 163 |
+
new += 1
|
| 164 |
+
except Exception as e:
|
| 165 |
+
logger.debug(f"Comments disabled or error for {video_id}: {e}")
|
| 166 |
+
|
| 167 |
+
page_token = search_response.get("nextPageToken")
|
| 168 |
+
if not page_token:
|
| 169 |
+
break
|
| 170 |
+
|
| 171 |
+
return found, new
|
scraper/scoring.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from scraper.db import get_db
|
| 2 |
+
|
| 3 |
+
SIGNAL_WEIGHTS = {
|
| 4 |
+
"saves": 5,
|
| 5 |
+
"shares": 4,
|
| 6 |
+
"quotes": 3,
|
| 7 |
+
"likes": 1,
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
TIER_THRESHOLDS = {
|
| 11 |
+
"S": 10.0, # > 10x median
|
| 12 |
+
"A": 5.0, # 5-10x median
|
| 13 |
+
"B": 2.0, # 2-5x median
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def calculate_raw_score(likes=0, shares=0, saves=0, quotes=0, **_kwargs):
|
| 18 |
+
return (
|
| 19 |
+
(saves or 0) * SIGNAL_WEIGHTS["saves"]
|
| 20 |
+
+ (shares or 0) * SIGNAL_WEIGHTS["shares"]
|
| 21 |
+
+ (quotes or 0) * SIGNAL_WEIGHTS["quotes"]
|
| 22 |
+
+ (likes or 0) * SIGNAL_WEIGHTS["likes"]
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def assign_tier(normalised_score):
|
| 27 |
+
if normalised_score >= TIER_THRESHOLDS["S"]:
|
| 28 |
+
return "S"
|
| 29 |
+
elif normalised_score >= TIER_THRESHOLDS["A"]:
|
| 30 |
+
return "A"
|
| 31 |
+
elif normalised_score >= TIER_THRESHOLDS["B"]:
|
| 32 |
+
return "B"
|
| 33 |
+
return "C"
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def score_entry(entry_id):
|
| 37 |
+
with get_db() as conn:
|
| 38 |
+
entry = conn.execute(
|
| 39 |
+
"SELECT * FROM entries WHERE id = ?", (entry_id,)
|
| 40 |
+
).fetchone()
|
| 41 |
+
if not entry:
|
| 42 |
+
return
|
| 43 |
+
|
| 44 |
+
# Curated jokes are hand-picked quality — auto-assign tier A
|
| 45 |
+
if entry["platform"] == "curated":
|
| 46 |
+
conn.execute(
|
| 47 |
+
"UPDATE entries SET quality_tier = 'A' WHERE id = ?",
|
| 48 |
+
(entry_id,),
|
| 49 |
+
)
|
| 50 |
+
return 0, 0, "A"
|
| 51 |
+
|
| 52 |
+
raw = calculate_raw_score(
|
| 53 |
+
likes=entry["likes"],
|
| 54 |
+
shares=entry["shares"],
|
| 55 |
+
saves=entry["saves"],
|
| 56 |
+
quotes=entry["quotes"],
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# Get source median
|
| 60 |
+
median = 1.0
|
| 61 |
+
if entry["source_id"]:
|
| 62 |
+
source = conn.execute(
|
| 63 |
+
"SELECT median_engagement FROM sources WHERE id = ?",
|
| 64 |
+
(entry["source_id"],),
|
| 65 |
+
).fetchone()
|
| 66 |
+
if source and source["median_engagement"] > 0:
|
| 67 |
+
median = source["median_engagement"]
|
| 68 |
+
|
| 69 |
+
normalised = raw / median
|
| 70 |
+
tier = assign_tier(normalised)
|
| 71 |
+
|
| 72 |
+
conn.execute(
|
| 73 |
+
"""UPDATE entries
|
| 74 |
+
SET raw_score = ?, normalised_score = ?, quality_tier = ?
|
| 75 |
+
WHERE id = ?""",
|
| 76 |
+
(raw, normalised, tier, entry_id),
|
| 77 |
+
)
|
| 78 |
+
return raw, normalised, tier
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def score_all_for_source(source_id):
|
| 82 |
+
with get_db() as conn:
|
| 83 |
+
entries = conn.execute(
|
| 84 |
+
"SELECT id FROM entries WHERE source_id = ?",
|
| 85 |
+
(source_id,),
|
| 86 |
+
).fetchall()
|
| 87 |
+
|
| 88 |
+
scored = 0
|
| 89 |
+
for entry in entries:
|
| 90 |
+
result = score_entry(entry["id"])
|
| 91 |
+
if result:
|
| 92 |
+
scored += 1
|
| 93 |
+
return scored
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def rescore_source(source_id):
|
| 97 |
+
"""Recalculate median for a source, then rescore all its entries."""
|
| 98 |
+
from scraper.db import update_source_median
|
| 99 |
+
update_source_median(source_id)
|
| 100 |
+
return score_all_for_source(source_id)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def calculate_elo(winner_elo, loser_elo, k=32):
|
| 104 |
+
"""Calculate new ELO ratings after a head-to-head matchup.
|
| 105 |
+
|
| 106 |
+
K=32 for volatile early ratings. Returns (new_winner_elo, new_loser_elo).
|
| 107 |
+
"""
|
| 108 |
+
expected_w = 1 / (1 + 10 ** ((loser_elo - winner_elo) / 400))
|
| 109 |
+
expected_l = 1 / (1 + 10 ** ((winner_elo - loser_elo) / 400))
|
| 110 |
+
new_w = winner_elo + k * (1 - expected_w)
|
| 111 |
+
new_l = loser_elo + k * (0 - expected_l)
|
| 112 |
+
return round(new_w, 1), round(new_l, 1)
|
scraper/static/__init__.py
ADDED
|
File without changes
|
scraper/static/import_datasets.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""One-time import of pre-built comedy datasets."""
|
| 2 |
+
import csv
|
| 3 |
+
import json
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import click
|
| 9 |
+
import requests
|
| 10 |
+
|
| 11 |
+
from scraper.db import insert_entry, add_source, get_db
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger("joke-corpus")
|
| 14 |
+
|
| 15 |
+
DATASETS_DIR = Path(__file__).parent.parent.parent / "datasets"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def run():
|
| 19 |
+
"""Run all dataset imports."""
|
| 20 |
+
DATASETS_DIR.mkdir(exist_ok=True)
|
| 21 |
+
click.echo("Importing pre-built datasets...\n")
|
| 22 |
+
|
| 23 |
+
imported = 0
|
| 24 |
+
imported += _import_short_jokes()
|
| 25 |
+
imported += _import_rjokes()
|
| 26 |
+
|
| 27 |
+
click.echo(f"\nDone. Total new entries: {imported}")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _import_short_jokes():
|
| 31 |
+
"""Import the Fraser/short-jokes dataset from HuggingFace."""
|
| 32 |
+
click.echo("--- Fraser/short-jokes (HuggingFace) ---")
|
| 33 |
+
|
| 34 |
+
url = "https://raw.githubusercontent.com/Fraser-Greenlee/my-huggingface-datasets/master/data/short-jokes/train.txt"
|
| 35 |
+
local_path = DATASETS_DIR / "shortjokes.txt"
|
| 36 |
+
|
| 37 |
+
if not local_path.exists():
|
| 38 |
+
click.echo(" Downloading...")
|
| 39 |
+
response = requests.get(url, stream=True, timeout=120)
|
| 40 |
+
response.raise_for_status()
|
| 41 |
+
with open(local_path, "wb") as f:
|
| 42 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 43 |
+
f.write(chunk)
|
| 44 |
+
click.echo(" Downloaded.")
|
| 45 |
+
|
| 46 |
+
# Create source
|
| 47 |
+
source_id = _ensure_source(
|
| 48 |
+
category="dataset",
|
| 49 |
+
platform="huggingface",
|
| 50 |
+
handle="Fraser/short-jokes",
|
| 51 |
+
display_name="Short Jokes (231K)",
|
| 52 |
+
default_topic="apolitical",
|
| 53 |
+
notes="231K short jokes from HuggingFace",
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
new = 0
|
| 57 |
+
with open(local_path, "r", encoding="utf-8") as f:
|
| 58 |
+
for i, line in enumerate(f):
|
| 59 |
+
joke = line.strip()
|
| 60 |
+
if not joke or len(joke) < 10:
|
| 61 |
+
continue
|
| 62 |
+
|
| 63 |
+
entry_id = insert_entry(
|
| 64 |
+
source_id=source_id,
|
| 65 |
+
platform="huggingface",
|
| 66 |
+
text=joke,
|
| 67 |
+
platform_entry_id=f"shortjokes-{i}",
|
| 68 |
+
content_type="joke",
|
| 69 |
+
topic="apolitical",
|
| 70 |
+
)
|
| 71 |
+
if entry_id:
|
| 72 |
+
new += 1
|
| 73 |
+
|
| 74 |
+
if i % 10000 == 0 and i > 0:
|
| 75 |
+
click.echo(f" Processed {i}...")
|
| 76 |
+
|
| 77 |
+
click.echo(f" Imported {new} new jokes.")
|
| 78 |
+
return new
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _import_rjokes():
|
| 82 |
+
"""Import the r/Jokes rated dataset."""
|
| 83 |
+
click.echo("--- r/Jokes Rated Dataset ---")
|
| 84 |
+
|
| 85 |
+
# This dataset is on GitHub — try downloading
|
| 86 |
+
url = "https://raw.githubusercontent.com/orionw/rJokesData/master/data/test.tsv"
|
| 87 |
+
local_path = DATASETS_DIR / "rjokes_test.tsv"
|
| 88 |
+
|
| 89 |
+
if not local_path.exists():
|
| 90 |
+
click.echo(" Downloading test split...")
|
| 91 |
+
try:
|
| 92 |
+
response = requests.get(url, timeout=60)
|
| 93 |
+
response.raise_for_status()
|
| 94 |
+
with open(local_path, "wb") as f:
|
| 95 |
+
f.write(response.content)
|
| 96 |
+
click.echo(" Downloaded.")
|
| 97 |
+
except Exception as e:
|
| 98 |
+
click.echo(f" Download failed: {e}. Skipping.")
|
| 99 |
+
return 0
|
| 100 |
+
|
| 101 |
+
source_id = _ensure_source(
|
| 102 |
+
category="dataset",
|
| 103 |
+
platform="github",
|
| 104 |
+
handle="orionw/rJokesData",
|
| 105 |
+
display_name="r/Jokes Rated",
|
| 106 |
+
default_topic="apolitical",
|
| 107 |
+
notes="Reddit jokes with quality ratings",
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
new = 0
|
| 111 |
+
with open(local_path, "r", encoding="utf-8") as f:
|
| 112 |
+
reader = csv.reader(f, delimiter="\t")
|
| 113 |
+
for i, row in enumerate(reader):
|
| 114 |
+
if len(row) < 2:
|
| 115 |
+
continue
|
| 116 |
+
joke = row[0] if len(row) > 0 else ""
|
| 117 |
+
if not joke or len(joke.strip()) < 10:
|
| 118 |
+
continue
|
| 119 |
+
|
| 120 |
+
# Score from dataset if available
|
| 121 |
+
score = None
|
| 122 |
+
try:
|
| 123 |
+
score = int(row[1]) if len(row) > 1 else None
|
| 124 |
+
except (ValueError, IndexError):
|
| 125 |
+
pass
|
| 126 |
+
|
| 127 |
+
entry_id = insert_entry(
|
| 128 |
+
source_id=source_id,
|
| 129 |
+
platform="github",
|
| 130 |
+
text=joke.strip(),
|
| 131 |
+
platform_entry_id=f"rjokes-{i}",
|
| 132 |
+
content_type="joke",
|
| 133 |
+
topic="apolitical",
|
| 134 |
+
likes=score,
|
| 135 |
+
)
|
| 136 |
+
if entry_id:
|
| 137 |
+
new += 1
|
| 138 |
+
|
| 139 |
+
click.echo(f" Imported {new} new jokes.")
|
| 140 |
+
return new
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def _ensure_source(category, platform, handle, display_name=None,
|
| 144 |
+
default_topic=None, notes=None):
|
| 145 |
+
"""Get or create a source, returning its ID."""
|
| 146 |
+
with get_db() as conn:
|
| 147 |
+
existing = conn.execute(
|
| 148 |
+
"SELECT id FROM sources WHERE platform = ? AND handle_or_url = ?",
|
| 149 |
+
(platform, handle),
|
| 150 |
+
).fetchone()
|
| 151 |
+
if existing:
|
| 152 |
+
return existing["id"]
|
| 153 |
+
|
| 154 |
+
return add_source(
|
| 155 |
+
category=category,
|
| 156 |
+
platform=platform,
|
| 157 |
+
handle_or_url=handle,
|
| 158 |
+
display_name=display_name,
|
| 159 |
+
default_topic=default_topic,
|
| 160 |
+
notes=notes,
|
| 161 |
+
)
|
scraper/utils.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import logging
|
| 3 |
+
from functools import wraps
|
| 4 |
+
|
| 5 |
+
logger = logging.getLogger("joke-corpus")
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def rate_limit(calls_per_second=1.0):
|
| 9 |
+
"""Decorator to rate-limit function calls."""
|
| 10 |
+
min_interval = 1.0 / calls_per_second
|
| 11 |
+
|
| 12 |
+
def decorator(func):
|
| 13 |
+
last_called = [0.0]
|
| 14 |
+
|
| 15 |
+
@wraps(func)
|
| 16 |
+
def wrapper(*args, **kwargs):
|
| 17 |
+
elapsed = time.time() - last_called[0]
|
| 18 |
+
if elapsed < min_interval:
|
| 19 |
+
time.sleep(min_interval - elapsed)
|
| 20 |
+
last_called[0] = time.time()
|
| 21 |
+
return func(*args, **kwargs)
|
| 22 |
+
|
| 23 |
+
return wrapper
|
| 24 |
+
|
| 25 |
+
return decorator
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def retry(max_attempts=3, backoff_base=2.0, exceptions=(Exception,)):
|
| 29 |
+
"""Decorator for exponential backoff retry."""
|
| 30 |
+
|
| 31 |
+
def decorator(func):
|
| 32 |
+
@wraps(func)
|
| 33 |
+
def wrapper(*args, **kwargs):
|
| 34 |
+
for attempt in range(max_attempts):
|
| 35 |
+
try:
|
| 36 |
+
return func(*args, **kwargs)
|
| 37 |
+
except exceptions as e:
|
| 38 |
+
if attempt == max_attempts - 1:
|
| 39 |
+
raise
|
| 40 |
+
wait = backoff_base ** attempt
|
| 41 |
+
logger.warning(
|
| 42 |
+
f"{func.__name__} attempt {attempt + 1} failed: {e}. "
|
| 43 |
+
f"Retrying in {wait:.1f}s..."
|
| 44 |
+
)
|
| 45 |
+
time.sleep(wait)
|
| 46 |
+
|
| 47 |
+
return wrapper
|
| 48 |
+
|
| 49 |
+
return decorator
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def clean_text(text):
|
| 53 |
+
"""Basic text cleaning for stored entries."""
|
| 54 |
+
if not text:
|
| 55 |
+
return ""
|
| 56 |
+
# Collapse whitespace
|
| 57 |
+
text = " ".join(text.split())
|
| 58 |
+
# Strip leading/trailing
|
| 59 |
+
text = text.strip()
|
| 60 |
+
return text
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def truncate(text, max_len=300):
|
| 64 |
+
"""Truncate text for display, adding ellipsis."""
|
| 65 |
+
if not text or len(text) <= max_len:
|
| 66 |
+
return text or ""
|
| 67 |
+
return text[:max_len - 1] + "..."
|
static/snorter-hero.png
ADDED
|
Git LFS Details
|
static/snorter-logo.png
ADDED
|
Git LFS Details
|
templates/index.html
ADDED
|
@@ -0,0 +1,1345 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Joke Corpus</title>
|
| 7 |
+
<style>
|
| 8 |
+
:root {
|
| 9 |
+
--bg: #0d1117;
|
| 10 |
+
--bg-card: #161b22;
|
| 11 |
+
--bg-hover: #1c2333;
|
| 12 |
+
--border: #30363d;
|
| 13 |
+
--text: #e6edf3;
|
| 14 |
+
--text-dim: #8b949e;
|
| 15 |
+
--text-bright: #f0f6fc;
|
| 16 |
+
--accent: #58a6ff;
|
| 17 |
+
--gold: #f0c040;
|
| 18 |
+
--silver: #b0b8c0;
|
| 19 |
+
--bronze: #cd7f32;
|
| 20 |
+
--grey: #484f58;
|
| 21 |
+
--red: #f85149;
|
| 22 |
+
--green: #3fb950;
|
| 23 |
+
--purple: #bc8cff;
|
| 24 |
+
--pink: #f778ba;
|
| 25 |
+
--orange: #f0883e;
|
| 26 |
+
--cyan: #39d2c0;
|
| 27 |
+
|
| 28 |
+
--topic-left: #58a6ff;
|
| 29 |
+
--topic-right: #f85149;
|
| 30 |
+
--topic-apolitical: #8b949e;
|
| 31 |
+
--topic-health: #3fb950;
|
| 32 |
+
--topic-science: #bc8cff;
|
| 33 |
+
--topic-culture: #f0883e;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
* { margin: 0; padding: 0; box-sizing: border-box; }
|
| 37 |
+
|
| 38 |
+
body {
|
| 39 |
+
font-family: 'SF Mono', 'Fira Code', 'Cascadia Code', monospace;
|
| 40 |
+
background: var(--bg);
|
| 41 |
+
color: var(--text);
|
| 42 |
+
line-height: 1.5;
|
| 43 |
+
font-size: 14px;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
.container {
|
| 47 |
+
max-width: 900px;
|
| 48 |
+
margin: 0 auto;
|
| 49 |
+
padding: 20px;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
/* --- Header --- */
|
| 53 |
+
header {
|
| 54 |
+
display: flex;
|
| 55 |
+
justify-content: space-between;
|
| 56 |
+
align-items: center;
|
| 57 |
+
padding: 16px 0;
|
| 58 |
+
border-bottom: 1px solid var(--border);
|
| 59 |
+
margin-bottom: 16px;
|
| 60 |
+
flex-wrap: wrap;
|
| 61 |
+
gap: 12px;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
.logo {
|
| 65 |
+
font-size: 18px;
|
| 66 |
+
font-weight: 700;
|
| 67 |
+
color: var(--text-bright);
|
| 68 |
+
text-decoration: none;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
.logo span { color: var(--accent); }
|
| 72 |
+
|
| 73 |
+
nav {
|
| 74 |
+
display: flex;
|
| 75 |
+
gap: 16px;
|
| 76 |
+
align-items: center;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
nav a {
|
| 80 |
+
color: var(--text-dim);
|
| 81 |
+
text-decoration: none;
|
| 82 |
+
font-size: 13px;
|
| 83 |
+
transition: color 0.15s;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
nav a:hover, nav a.active { color: var(--accent); }
|
| 87 |
+
|
| 88 |
+
.search-form {
|
| 89 |
+
display: flex;
|
| 90 |
+
gap: 8px;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
.search-form input {
|
| 94 |
+
background: var(--bg-card);
|
| 95 |
+
border: 1px solid var(--border);
|
| 96 |
+
border-radius: 6px;
|
| 97 |
+
color: var(--text);
|
| 98 |
+
padding: 6px 12px;
|
| 99 |
+
font-family: inherit;
|
| 100 |
+
font-size: 13px;
|
| 101 |
+
width: 220px;
|
| 102 |
+
outline: none;
|
| 103 |
+
transition: border-color 0.15s;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
.search-form input:focus { border-color: var(--accent); }
|
| 107 |
+
|
| 108 |
+
.search-form button {
|
| 109 |
+
background: var(--accent);
|
| 110 |
+
color: var(--bg);
|
| 111 |
+
border: none;
|
| 112 |
+
border-radius: 6px;
|
| 113 |
+
padding: 6px 12px;
|
| 114 |
+
font-family: inherit;
|
| 115 |
+
font-size: 13px;
|
| 116 |
+
cursor: pointer;
|
| 117 |
+
font-weight: 600;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
/* --- Stats bar --- */
|
| 121 |
+
.stats-bar {
|
| 122 |
+
display: flex;
|
| 123 |
+
gap: 20px;
|
| 124 |
+
padding: 10px 0;
|
| 125 |
+
border-bottom: 1px solid var(--border);
|
| 126 |
+
margin-bottom: 16px;
|
| 127 |
+
flex-wrap: wrap;
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
.stat {
|
| 131 |
+
font-size: 13px;
|
| 132 |
+
color: var(--text-dim);
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
.stat strong {
|
| 136 |
+
color: var(--text-bright);
|
| 137 |
+
font-size: 16px;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
/* --- Topic tabs --- */
|
| 141 |
+
.topic-tabs {
|
| 142 |
+
display: flex;
|
| 143 |
+
gap: 4px;
|
| 144 |
+
padding: 8px 0;
|
| 145 |
+
border-bottom: 1px solid var(--border);
|
| 146 |
+
margin-bottom: 12px;
|
| 147 |
+
flex-wrap: wrap;
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
.topic-tab {
|
| 151 |
+
padding: 6px 14px;
|
| 152 |
+
border-radius: 20px;
|
| 153 |
+
font-size: 12px;
|
| 154 |
+
font-weight: 600;
|
| 155 |
+
text-decoration: none;
|
| 156 |
+
color: var(--text-dim);
|
| 157 |
+
background: var(--bg-card);
|
| 158 |
+
border: 1px solid var(--border);
|
| 159 |
+
transition: all 0.15s;
|
| 160 |
+
text-transform: capitalize;
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
.topic-tab:hover {
|
| 164 |
+
color: var(--text);
|
| 165 |
+
border-color: var(--text-dim);
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
.topic-tab.active {
|
| 169 |
+
color: var(--bg);
|
| 170 |
+
background: var(--accent);
|
| 171 |
+
border-color: var(--accent);
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
.topic-tab[data-topic="left_wing"].active { background: var(--topic-left); border-color: var(--topic-left); }
|
| 175 |
+
.topic-tab[data-topic="right_wing"].active { background: var(--topic-right); border-color: var(--topic-right); }
|
| 176 |
+
.topic-tab[data-topic="apolitical"].active { background: var(--topic-apolitical); border-color: var(--topic-apolitical); }
|
| 177 |
+
.topic-tab[data-topic="health"].active { background: var(--topic-health); border-color: var(--topic-health); }
|
| 178 |
+
.topic-tab[data-topic="science"].active { background: var(--topic-science); border-color: var(--topic-science); }
|
| 179 |
+
.topic-tab[data-topic="culture"].active { background: var(--topic-culture); border-color: var(--topic-culture); }
|
| 180 |
+
|
| 181 |
+
/* --- Filters row --- */
|
| 182 |
+
.filters-row {
|
| 183 |
+
display: flex;
|
| 184 |
+
gap: 8px;
|
| 185 |
+
padding: 8px 0;
|
| 186 |
+
margin-bottom: 16px;
|
| 187 |
+
flex-wrap: wrap;
|
| 188 |
+
align-items: center;
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
.filter-select {
|
| 192 |
+
background: var(--bg-card);
|
| 193 |
+
border: 1px solid var(--border);
|
| 194 |
+
border-radius: 6px;
|
| 195 |
+
color: var(--text);
|
| 196 |
+
padding: 6px 10px;
|
| 197 |
+
font-family: inherit;
|
| 198 |
+
font-size: 12px;
|
| 199 |
+
outline: none;
|
| 200 |
+
cursor: pointer;
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
.filter-select:focus { border-color: var(--accent); }
|
| 204 |
+
|
| 205 |
+
.filter-label {
|
| 206 |
+
font-size: 12px;
|
| 207 |
+
color: var(--text-dim);
|
| 208 |
+
margin-right: -4px;
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
.clear-filters {
|
| 212 |
+
font-size: 12px;
|
| 213 |
+
color: var(--text-dim);
|
| 214 |
+
text-decoration: none;
|
| 215 |
+
margin-left: auto;
|
| 216 |
+
transition: color 0.15s;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
.clear-filters:hover { color: var(--red); }
|
| 220 |
+
|
| 221 |
+
/* --- Entry cards --- */
|
| 222 |
+
.entries { display: flex; flex-direction: column; gap: 12px; }
|
| 223 |
+
|
| 224 |
+
.entry-card {
|
| 225 |
+
background: var(--bg-card);
|
| 226 |
+
border: 1px solid var(--border);
|
| 227 |
+
border-radius: 8px;
|
| 228 |
+
padding: 16px;
|
| 229 |
+
transition: border-color 0.15s;
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
.entry-card:hover { border-color: var(--text-dim); }
|
| 233 |
+
|
| 234 |
+
.entry-header {
|
| 235 |
+
display: flex;
|
| 236 |
+
align-items: center;
|
| 237 |
+
gap: 8px;
|
| 238 |
+
margin-bottom: 8px;
|
| 239 |
+
flex-wrap: wrap;
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
.tier-badge {
|
| 243 |
+
display: inline-flex;
|
| 244 |
+
align-items: center;
|
| 245 |
+
justify-content: center;
|
| 246 |
+
width: 24px;
|
| 247 |
+
height: 24px;
|
| 248 |
+
border-radius: 4px;
|
| 249 |
+
font-size: 12px;
|
| 250 |
+
font-weight: 700;
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
.tier-S { background: var(--gold); color: #000; }
|
| 254 |
+
.tier-A { background: var(--silver); color: #000; }
|
| 255 |
+
.tier-B { background: var(--bronze); color: #000; }
|
| 256 |
+
.tier-C { background: var(--grey); color: var(--text); }
|
| 257 |
+
|
| 258 |
+
.fav-star {
|
| 259 |
+
color: var(--gold);
|
| 260 |
+
cursor: pointer;
|
| 261 |
+
font-size: 16px;
|
| 262 |
+
user-select: none;
|
| 263 |
+
transition: transform 0.15s;
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
.fav-star:hover { transform: scale(1.2); }
|
| 267 |
+
.fav-star.inactive { color: var(--grey); }
|
| 268 |
+
|
| 269 |
+
.entry-source {
|
| 270 |
+
font-size: 12px;
|
| 271 |
+
color: var(--text-dim);
|
| 272 |
+
font-weight: 600;
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
.entry-author {
|
| 276 |
+
font-size: 12px;
|
| 277 |
+
color: var(--text-dim);
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
.topic-badge {
|
| 281 |
+
display: inline-block;
|
| 282 |
+
padding: 2px 8px;
|
| 283 |
+
border-radius: 10px;
|
| 284 |
+
font-size: 10px;
|
| 285 |
+
font-weight: 600;
|
| 286 |
+
text-transform: uppercase;
|
| 287 |
+
letter-spacing: 0.5px;
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
.topic-left_wing { background: rgba(88,166,255,0.15); color: var(--topic-left); }
|
| 291 |
+
.topic-right_wing { background: rgba(248,81,73,0.15); color: var(--topic-right); }
|
| 292 |
+
.topic-apolitical { background: rgba(139,148,158,0.15); color: var(--topic-apolitical); }
|
| 293 |
+
.topic-health { background: rgba(63,185,80,0.15); color: var(--topic-health); }
|
| 294 |
+
.topic-science { background: rgba(188,140,255,0.15); color: var(--topic-science); }
|
| 295 |
+
.topic-culture { background: rgba(240,136,62,0.15); color: var(--topic-culture); }
|
| 296 |
+
|
| 297 |
+
.style-tag {
|
| 298 |
+
display: inline-block;
|
| 299 |
+
padding: 1px 6px;
|
| 300 |
+
border-radius: 8px;
|
| 301 |
+
font-size: 10px;
|
| 302 |
+
color: var(--text-dim);
|
| 303 |
+
background: rgba(139,148,158,0.1);
|
| 304 |
+
border: 1px solid var(--border);
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
.entry-text {
|
| 308 |
+
margin: 10px 0;
|
| 309 |
+
white-space: pre-wrap;
|
| 310 |
+
word-break: break-word;
|
| 311 |
+
color: var(--text);
|
| 312 |
+
font-size: 13px;
|
| 313 |
+
line-height: 1.6;
|
| 314 |
+
max-height: 200px;
|
| 315 |
+
overflow: hidden;
|
| 316 |
+
position: relative;
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
.entry-text.expanded { max-height: none; }
|
| 320 |
+
|
| 321 |
+
.entry-text.truncated::after {
|
| 322 |
+
content: "";
|
| 323 |
+
position: absolute;
|
| 324 |
+
bottom: 0;
|
| 325 |
+
left: 0;
|
| 326 |
+
right: 0;
|
| 327 |
+
height: 40px;
|
| 328 |
+
background: linear-gradient(transparent, var(--bg-card));
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
.expand-btn {
|
| 332 |
+
font-size: 12px;
|
| 333 |
+
color: var(--accent);
|
| 334 |
+
cursor: pointer;
|
| 335 |
+
background: none;
|
| 336 |
+
border: none;
|
| 337 |
+
font-family: inherit;
|
| 338 |
+
padding: 4px 0;
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
.entry-meta {
|
| 342 |
+
display: flex;
|
| 343 |
+
gap: 12px;
|
| 344 |
+
font-size: 11px;
|
| 345 |
+
color: var(--text-dim);
|
| 346 |
+
margin-top: 4px;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
.entry-url a {
|
| 350 |
+
color: var(--accent);
|
| 351 |
+
text-decoration: none;
|
| 352 |
+
font-size: 11px;
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
.entry-url a:hover { text-decoration: underline; }
|
| 356 |
+
|
| 357 |
+
.entry-notes {
|
| 358 |
+
margin-top: 8px;
|
| 359 |
+
padding: 8px 12px;
|
| 360 |
+
background: rgba(88,166,255,0.05);
|
| 361 |
+
border-left: 2px solid var(--accent);
|
| 362 |
+
border-radius: 0 4px 4px 0;
|
| 363 |
+
font-size: 12px;
|
| 364 |
+
color: var(--text-dim);
|
| 365 |
+
font-style: italic;
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
/* --- Actions bar --- */
|
| 369 |
+
.entry-actions {
|
| 370 |
+
display: flex;
|
| 371 |
+
gap: 8px;
|
| 372 |
+
margin-top: 12px;
|
| 373 |
+
padding-top: 10px;
|
| 374 |
+
border-top: 1px solid var(--border);
|
| 375 |
+
flex-wrap: wrap;
|
| 376 |
+
align-items: center;
|
| 377 |
+
}
|
| 378 |
+
|
| 379 |
+
.action-btn {
|
| 380 |
+
background: var(--bg);
|
| 381 |
+
border: 1px solid var(--border);
|
| 382 |
+
border-radius: 6px;
|
| 383 |
+
color: var(--text-dim);
|
| 384 |
+
padding: 4px 10px;
|
| 385 |
+
font-family: inherit;
|
| 386 |
+
font-size: 11px;
|
| 387 |
+
cursor: pointer;
|
| 388 |
+
transition: all 0.15s;
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
.action-btn:hover {
|
| 392 |
+
color: var(--text);
|
| 393 |
+
border-color: var(--text-dim);
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
.action-btn.active {
|
| 397 |
+
color: var(--accent);
|
| 398 |
+
border-color: var(--accent);
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
.rating-stars {
|
| 402 |
+
display: inline-flex;
|
| 403 |
+
gap: 2px;
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
.rating-star {
|
| 407 |
+
cursor: pointer;
|
| 408 |
+
color: var(--grey);
|
| 409 |
+
font-size: 16px;
|
| 410 |
+
user-select: none;
|
| 411 |
+
transition: color 0.1s;
|
| 412 |
+
}
|
| 413 |
+
|
| 414 |
+
.rating-star.filled { color: var(--gold); }
|
| 415 |
+
.rating-star:hover { color: var(--gold); }
|
| 416 |
+
|
| 417 |
+
/* --- Inline forms --- */
|
| 418 |
+
.inline-form {
|
| 419 |
+
display: none;
|
| 420 |
+
margin-top: 8px;
|
| 421 |
+
padding: 10px;
|
| 422 |
+
background: var(--bg);
|
| 423 |
+
border: 1px solid var(--border);
|
| 424 |
+
border-radius: 6px;
|
| 425 |
+
}
|
| 426 |
+
|
| 427 |
+
.inline-form.visible { display: block; }
|
| 428 |
+
|
| 429 |
+
.inline-form label {
|
| 430 |
+
font-size: 11px;
|
| 431 |
+
color: var(--text-dim);
|
| 432 |
+
display: block;
|
| 433 |
+
margin-bottom: 4px;
|
| 434 |
+
}
|
| 435 |
+
|
| 436 |
+
.inline-form select, .inline-form textarea, .inline-form input {
|
| 437 |
+
background: var(--bg-card);
|
| 438 |
+
border: 1px solid var(--border);
|
| 439 |
+
border-radius: 4px;
|
| 440 |
+
color: var(--text);
|
| 441 |
+
padding: 6px 8px;
|
| 442 |
+
font-family: inherit;
|
| 443 |
+
font-size: 12px;
|
| 444 |
+
width: 100%;
|
| 445 |
+
outline: none;
|
| 446 |
+
margin-bottom: 8px;
|
| 447 |
+
}
|
| 448 |
+
|
| 449 |
+
.inline-form textarea { resize: vertical; min-height: 60px; }
|
| 450 |
+
|
| 451 |
+
.inline-form .form-row {
|
| 452 |
+
display: flex;
|
| 453 |
+
gap: 8px;
|
| 454 |
+
flex-wrap: wrap;
|
| 455 |
+
margin-bottom: 8px;
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
.inline-form .form-row label {
|
| 459 |
+
display: flex;
|
| 460 |
+
align-items: center;
|
| 461 |
+
gap: 4px;
|
| 462 |
+
font-size: 11px;
|
| 463 |
+
cursor: pointer;
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
.inline-form .form-row input[type="checkbox"] {
|
| 467 |
+
width: auto;
|
| 468 |
+
margin: 0;
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
.save-btn {
|
| 472 |
+
background: var(--accent);
|
| 473 |
+
color: var(--bg);
|
| 474 |
+
border: none;
|
| 475 |
+
border-radius: 4px;
|
| 476 |
+
padding: 5px 14px;
|
| 477 |
+
font-family: inherit;
|
| 478 |
+
font-size: 12px;
|
| 479 |
+
font-weight: 600;
|
| 480 |
+
cursor: pointer;
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
/* --- Sources page --- */
|
| 484 |
+
.sources-list { margin-top: 16px; }
|
| 485 |
+
|
| 486 |
+
.source-row {
|
| 487 |
+
display: flex;
|
| 488 |
+
align-items: center;
|
| 489 |
+
gap: 12px;
|
| 490 |
+
padding: 12px 16px;
|
| 491 |
+
background: var(--bg-card);
|
| 492 |
+
border: 1px solid var(--border);
|
| 493 |
+
border-radius: 8px;
|
| 494 |
+
margin-bottom: 8px;
|
| 495 |
+
}
|
| 496 |
+
|
| 497 |
+
.source-platform {
|
| 498 |
+
font-size: 12px;
|
| 499 |
+
font-weight: 700;
|
| 500 |
+
color: var(--accent);
|
| 501 |
+
min-width: 80px;
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
.source-handle {
|
| 505 |
+
font-size: 13px;
|
| 506 |
+
color: var(--text);
|
| 507 |
+
flex: 1;
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
.source-notes {
|
| 511 |
+
font-size: 11px;
|
| 512 |
+
color: var(--text-dim);
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
.source-last {
|
| 516 |
+
font-size: 11px;
|
| 517 |
+
color: var(--text-dim);
|
| 518 |
+
min-width: 120px;
|
| 519 |
+
text-align: right;
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
/* --- Random page --- */
|
| 523 |
+
.random-card {
|
| 524 |
+
background: var(--bg-card);
|
| 525 |
+
border: 2px solid var(--gold);
|
| 526 |
+
border-radius: 12px;
|
| 527 |
+
padding: 24px;
|
| 528 |
+
margin: 20px 0;
|
| 529 |
+
text-align: center;
|
| 530 |
+
}
|
| 531 |
+
|
| 532 |
+
.random-card .entry-text {
|
| 533 |
+
font-size: 15px;
|
| 534 |
+
max-height: none;
|
| 535 |
+
text-align: left;
|
| 536 |
+
}
|
| 537 |
+
|
| 538 |
+
.random-again {
|
| 539 |
+
display: inline-block;
|
| 540 |
+
margin-top: 16px;
|
| 541 |
+
padding: 8px 20px;
|
| 542 |
+
background: var(--accent);
|
| 543 |
+
color: var(--bg);
|
| 544 |
+
text-decoration: none;
|
| 545 |
+
border-radius: 6px;
|
| 546 |
+
font-weight: 600;
|
| 547 |
+
font-size: 13px;
|
| 548 |
+
}
|
| 549 |
+
|
| 550 |
+
/* --- Empty state --- */
|
| 551 |
+
.empty {
|
| 552 |
+
text-align: center;
|
| 553 |
+
padding: 60px 20px;
|
| 554 |
+
color: var(--text-dim);
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
.empty h3 { font-size: 16px; margin-bottom: 8px; color: var(--text); }
|
| 558 |
+
|
| 559 |
+
/* --- Pagination --- */
|
| 560 |
+
.pagination {
|
| 561 |
+
display: flex;
|
| 562 |
+
justify-content: center;
|
| 563 |
+
gap: 8px;
|
| 564 |
+
margin-top: 20px;
|
| 565 |
+
padding: 16px 0;
|
| 566 |
+
}
|
| 567 |
+
|
| 568 |
+
.pagination a {
|
| 569 |
+
padding: 6px 14px;
|
| 570 |
+
border-radius: 6px;
|
| 571 |
+
background: var(--bg-card);
|
| 572 |
+
border: 1px solid var(--border);
|
| 573 |
+
color: var(--text-dim);
|
| 574 |
+
text-decoration: none;
|
| 575 |
+
font-size: 13px;
|
| 576 |
+
transition: all 0.15s;
|
| 577 |
+
}
|
| 578 |
+
|
| 579 |
+
.pagination a:hover {
|
| 580 |
+
color: var(--text);
|
| 581 |
+
border-color: var(--accent);
|
| 582 |
+
}
|
| 583 |
+
|
| 584 |
+
/* --- Toast --- */
|
| 585 |
+
.toast {
|
| 586 |
+
position: fixed;
|
| 587 |
+
bottom: 20px;
|
| 588 |
+
right: 20px;
|
| 589 |
+
background: var(--green);
|
| 590 |
+
color: #000;
|
| 591 |
+
padding: 10px 20px;
|
| 592 |
+
border-radius: 8px;
|
| 593 |
+
font-size: 13px;
|
| 594 |
+
font-weight: 600;
|
| 595 |
+
opacity: 0;
|
| 596 |
+
transform: translateY(10px);
|
| 597 |
+
transition: all 0.3s;
|
| 598 |
+
pointer-events: none;
|
| 599 |
+
z-index: 100;
|
| 600 |
+
}
|
| 601 |
+
|
| 602 |
+
.toast.show {
|
| 603 |
+
opacity: 1;
|
| 604 |
+
transform: translateY(0);
|
| 605 |
+
}
|
| 606 |
+
|
| 607 |
+
/* --- Setup page --- */
|
| 608 |
+
.setup-header {
|
| 609 |
+
margin-bottom: 20px;
|
| 610 |
+
}
|
| 611 |
+
|
| 612 |
+
.setup-header h2 {
|
| 613 |
+
font-size: 16px;
|
| 614 |
+
margin-bottom: 4px;
|
| 615 |
+
}
|
| 616 |
+
|
| 617 |
+
.setup-header p {
|
| 618 |
+
font-size: 13px;
|
| 619 |
+
color: var(--text-dim);
|
| 620 |
+
}
|
| 621 |
+
|
| 622 |
+
.setup-progress {
|
| 623 |
+
display: flex;
|
| 624 |
+
gap: 6px;
|
| 625 |
+
margin: 16px 0;
|
| 626 |
+
align-items: center;
|
| 627 |
+
}
|
| 628 |
+
|
| 629 |
+
.progress-dot {
|
| 630 |
+
width: 12px;
|
| 631 |
+
height: 12px;
|
| 632 |
+
border-radius: 50%;
|
| 633 |
+
background: var(--grey);
|
| 634 |
+
transition: background 0.2s;
|
| 635 |
+
}
|
| 636 |
+
|
| 637 |
+
.progress-dot.done { background: var(--green); }
|
| 638 |
+
|
| 639 |
+
.progress-label {
|
| 640 |
+
font-size: 12px;
|
| 641 |
+
color: var(--text-dim);
|
| 642 |
+
margin-left: 8px;
|
| 643 |
+
}
|
| 644 |
+
|
| 645 |
+
.platform-cards {
|
| 646 |
+
display: flex;
|
| 647 |
+
flex-direction: column;
|
| 648 |
+
gap: 12px;
|
| 649 |
+
}
|
| 650 |
+
|
| 651 |
+
.platform-card {
|
| 652 |
+
background: var(--bg-card);
|
| 653 |
+
border: 1px solid var(--border);
|
| 654 |
+
border-radius: 8px;
|
| 655 |
+
overflow: hidden;
|
| 656 |
+
transition: border-color 0.15s;
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
.platform-card:hover { border-color: var(--text-dim); }
|
| 660 |
+
.platform-card.is-configured { border-left: 3px solid var(--green); }
|
| 661 |
+
.platform-card.no-fields { border-left: 3px solid var(--green); }
|
| 662 |
+
|
| 663 |
+
.platform-card-header {
|
| 664 |
+
display: flex;
|
| 665 |
+
align-items: center;
|
| 666 |
+
gap: 12px;
|
| 667 |
+
padding: 14px 16px;
|
| 668 |
+
cursor: pointer;
|
| 669 |
+
user-select: none;
|
| 670 |
+
}
|
| 671 |
+
|
| 672 |
+
.platform-card-header:hover { background: var(--bg-hover); }
|
| 673 |
+
|
| 674 |
+
.platform-status {
|
| 675 |
+
width: 10px;
|
| 676 |
+
height: 10px;
|
| 677 |
+
border-radius: 50%;
|
| 678 |
+
flex-shrink: 0;
|
| 679 |
+
}
|
| 680 |
+
|
| 681 |
+
.platform-status.configured { background: var(--green); }
|
| 682 |
+
.platform-status.not-configured { background: var(--red); }
|
| 683 |
+
.platform-status.no-key-needed { background: var(--green); }
|
| 684 |
+
|
| 685 |
+
.platform-name {
|
| 686 |
+
font-size: 14px;
|
| 687 |
+
font-weight: 600;
|
| 688 |
+
color: var(--text-bright);
|
| 689 |
+
flex: 1;
|
| 690 |
+
}
|
| 691 |
+
|
| 692 |
+
.platform-cost {
|
| 693 |
+
font-size: 11px;
|
| 694 |
+
padding: 2px 8px;
|
| 695 |
+
border-radius: 10px;
|
| 696 |
+
font-weight: 600;
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
.cost-free { background: rgba(63,185,80,0.15); color: var(--green); }
|
| 700 |
+
.cost-paid { background: rgba(248,81,73,0.15); color: var(--red); }
|
| 701 |
+
.cost-freemium { background: rgba(240,136,62,0.15); color: var(--orange); }
|
| 702 |
+
|
| 703 |
+
.platform-toggle {
|
| 704 |
+
color: var(--text-dim);
|
| 705 |
+
font-size: 12px;
|
| 706 |
+
transition: transform 0.2s;
|
| 707 |
+
}
|
| 708 |
+
|
| 709 |
+
.platform-toggle.open { transform: rotate(90deg); }
|
| 710 |
+
|
| 711 |
+
.platform-body {
|
| 712 |
+
display: none;
|
| 713 |
+
padding: 0 16px 16px 16px;
|
| 714 |
+
border-top: 1px solid var(--border);
|
| 715 |
+
}
|
| 716 |
+
|
| 717 |
+
.platform-body.visible { display: block; }
|
| 718 |
+
|
| 719 |
+
.platform-description {
|
| 720 |
+
font-size: 12px;
|
| 721 |
+
color: var(--text-dim);
|
| 722 |
+
margin: 12px 0;
|
| 723 |
+
line-height: 1.5;
|
| 724 |
+
}
|
| 725 |
+
|
| 726 |
+
.setup-steps {
|
| 727 |
+
margin: 12px 0;
|
| 728 |
+
padding: 0;
|
| 729 |
+
counter-reset: step;
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
.setup-steps li {
|
| 733 |
+
list-style: none;
|
| 734 |
+
padding: 6px 0 6px 32px;
|
| 735 |
+
position: relative;
|
| 736 |
+
font-size: 12px;
|
| 737 |
+
color: var(--text);
|
| 738 |
+
line-height: 1.5;
|
| 739 |
+
}
|
| 740 |
+
|
| 741 |
+
.setup-steps li::before {
|
| 742 |
+
counter-increment: step;
|
| 743 |
+
content: counter(step);
|
| 744 |
+
position: absolute;
|
| 745 |
+
left: 0;
|
| 746 |
+
width: 22px;
|
| 747 |
+
height: 22px;
|
| 748 |
+
border-radius: 50%;
|
| 749 |
+
background: var(--bg);
|
| 750 |
+
border: 1px solid var(--border);
|
| 751 |
+
display: flex;
|
| 752 |
+
align-items: center;
|
| 753 |
+
justify-content: center;
|
| 754 |
+
font-size: 11px;
|
| 755 |
+
font-weight: 600;
|
| 756 |
+
color: var(--text-dim);
|
| 757 |
+
}
|
| 758 |
+
|
| 759 |
+
.setup-link {
|
| 760 |
+
display: inline-block;
|
| 761 |
+
margin: 8px 0 12px;
|
| 762 |
+
padding: 6px 14px;
|
| 763 |
+
background: var(--accent);
|
| 764 |
+
color: var(--bg);
|
| 765 |
+
text-decoration: none;
|
| 766 |
+
border-radius: 6px;
|
| 767 |
+
font-size: 12px;
|
| 768 |
+
font-weight: 600;
|
| 769 |
+
transition: opacity 0.15s;
|
| 770 |
+
}
|
| 771 |
+
|
| 772 |
+
.setup-link:hover { opacity: 0.85; }
|
| 773 |
+
|
| 774 |
+
.setup-fields {
|
| 775 |
+
display: flex;
|
| 776 |
+
flex-direction: column;
|
| 777 |
+
gap: 10px;
|
| 778 |
+
margin-top: 12px;
|
| 779 |
+
padding-top: 12px;
|
| 780 |
+
border-top: 1px solid var(--border);
|
| 781 |
+
}
|
| 782 |
+
|
| 783 |
+
.setup-field label {
|
| 784 |
+
font-size: 11px;
|
| 785 |
+
color: var(--text-dim);
|
| 786 |
+
display: block;
|
| 787 |
+
margin-bottom: 4px;
|
| 788 |
+
}
|
| 789 |
+
|
| 790 |
+
.setup-field input {
|
| 791 |
+
background: var(--bg);
|
| 792 |
+
border: 1px solid var(--border);
|
| 793 |
+
border-radius: 4px;
|
| 794 |
+
color: var(--text);
|
| 795 |
+
padding: 8px 10px;
|
| 796 |
+
font-family: inherit;
|
| 797 |
+
font-size: 13px;
|
| 798 |
+
width: 100%;
|
| 799 |
+
outline: none;
|
| 800 |
+
transition: border-color 0.15s;
|
| 801 |
+
}
|
| 802 |
+
|
| 803 |
+
.setup-field input:focus { border-color: var(--accent); }
|
| 804 |
+
|
| 805 |
+
.setup-field input.has-value { border-color: var(--green); }
|
| 806 |
+
|
| 807 |
+
.setup-actions {
|
| 808 |
+
display: flex;
|
| 809 |
+
gap: 8px;
|
| 810 |
+
margin-top: 12px;
|
| 811 |
+
align-items: center;
|
| 812 |
+
}
|
| 813 |
+
|
| 814 |
+
.setup-save {
|
| 815 |
+
background: var(--accent);
|
| 816 |
+
color: var(--bg);
|
| 817 |
+
border: none;
|
| 818 |
+
border-radius: 6px;
|
| 819 |
+
padding: 8px 18px;
|
| 820 |
+
font-family: inherit;
|
| 821 |
+
font-size: 12px;
|
| 822 |
+
font-weight: 600;
|
| 823 |
+
cursor: pointer;
|
| 824 |
+
transition: opacity 0.15s;
|
| 825 |
+
}
|
| 826 |
+
|
| 827 |
+
.setup-save:hover { opacity: 0.85; }
|
| 828 |
+
|
| 829 |
+
.setup-test {
|
| 830 |
+
background: transparent;
|
| 831 |
+
border: 1px solid var(--border);
|
| 832 |
+
border-radius: 6px;
|
| 833 |
+
padding: 7px 14px;
|
| 834 |
+
font-family: inherit;
|
| 835 |
+
font-size: 12px;
|
| 836 |
+
color: var(--text-dim);
|
| 837 |
+
cursor: pointer;
|
| 838 |
+
transition: all 0.15s;
|
| 839 |
+
}
|
| 840 |
+
|
| 841 |
+
.setup-test:hover {
|
| 842 |
+
color: var(--text);
|
| 843 |
+
border-color: var(--text-dim);
|
| 844 |
+
}
|
| 845 |
+
|
| 846 |
+
.test-result {
|
| 847 |
+
font-size: 12px;
|
| 848 |
+
margin-left: 8px;
|
| 849 |
+
transition: opacity 0.3s;
|
| 850 |
+
}
|
| 851 |
+
|
| 852 |
+
.test-result.success { color: var(--green); }
|
| 853 |
+
.test-result.fail { color: var(--red); }
|
| 854 |
+
.test-result.loading { color: var(--text-dim); }
|
| 855 |
+
|
| 856 |
+
/* --- Responsive --- */
|
| 857 |
+
@media (max-width: 640px) {
|
| 858 |
+
.container { padding: 12px; }
|
| 859 |
+
header { flex-direction: column; align-items: flex-start; }
|
| 860 |
+
.search-form input { width: 160px; }
|
| 861 |
+
.stats-bar { gap: 12px; }
|
| 862 |
+
.entry-actions { gap: 4px; }
|
| 863 |
+
}
|
| 864 |
+
</style>
|
| 865 |
+
</head>
|
| 866 |
+
<body>
|
| 867 |
+
|
| 868 |
+
<div class="container">
|
| 869 |
+
|
| 870 |
+
<!-- Header -->
|
| 871 |
+
<header>
|
| 872 |
+
<a href="/" class="logo">JOKE <span>CORPUS</span></a>
|
| 873 |
+
<nav>
|
| 874 |
+
<a href="/" class="{{ 'active' if not view|default('') }}">Dashboard</a>
|
| 875 |
+
<a href="/sources" class="{{ 'active' if view|default('') == 'sources' }}">Sources</a>
|
| 876 |
+
<a href="/random" class="{{ 'active' if view|default('') == 'random' }}">Random</a>
|
| 877 |
+
<a href="/setup" class="{{ 'active' if view|default('') == 'setup' }}">Setup</a>
|
| 878 |
+
</nav>
|
| 879 |
+
<form class="search-form" action="/" method="GET">
|
| 880 |
+
<input type="text" name="q" placeholder="Search..." value="{{ current_q }}">
|
| 881 |
+
<button type="submit">Go</button>
|
| 882 |
+
</form>
|
| 883 |
+
</header>
|
| 884 |
+
|
| 885 |
+
<!-- Stats bar -->
|
| 886 |
+
<div class="stats-bar">
|
| 887 |
+
<div class="stat"><strong>{{ stats.total_entries }}</strong> entries</div>
|
| 888 |
+
<div class="stat"><strong>{{ stats.total_sources }}</strong> sources</div>
|
| 889 |
+
<div class="stat"><strong>{{ stats.favourites }}</strong> favs</div>
|
| 890 |
+
{% for t in stats.by_tier %}
|
| 891 |
+
<div class="stat"><strong>{{ t.n }}</strong> {{ t.quality_tier }}-tier</div>
|
| 892 |
+
{% endfor %}
|
| 893 |
+
</div>
|
| 894 |
+
|
| 895 |
+
{% if view|default('') == 'sources' %}
|
| 896 |
+
<!-- ==================== SOURCES PAGE ==================== -->
|
| 897 |
+
<h2 style="margin-bottom: 16px; font-size: 16px;">Sources ({{ sources|length }})</h2>
|
| 898 |
+
<div class="sources-list">
|
| 899 |
+
{% for s in sources %}
|
| 900 |
+
<div class="source-row">
|
| 901 |
+
<div class="source-platform">{{ s.platform }}</div>
|
| 902 |
+
<div class="source-handle">{{ s.handle_or_url }}
|
| 903 |
+
{% if s.default_topic %}
|
| 904 |
+
<span class="topic-badge topic-{{ s.default_topic }}">{{ s.default_topic|replace('_', ' ') }}</span>
|
| 905 |
+
{% endif %}
|
| 906 |
+
</div>
|
| 907 |
+
{% if s.notes %}
|
| 908 |
+
<div class="source-notes">{{ s.notes }}</div>
|
| 909 |
+
{% endif %}
|
| 910 |
+
<div class="source-last">{{ s.last_scraped or 'never scraped' }}</div>
|
| 911 |
+
</div>
|
| 912 |
+
{% endfor %}
|
| 913 |
+
</div>
|
| 914 |
+
|
| 915 |
+
{% elif view|default('') == 'setup' %}
|
| 916 |
+
<!-- ==================== SETUP PAGE ==================== -->
|
| 917 |
+
<div class="setup-header">
|
| 918 |
+
<h2>Platform Setup</h2>
|
| 919 |
+
<p>Connect your API keys to start scraping. Work through each platform — most are free.</p>
|
| 920 |
+
</div>
|
| 921 |
+
|
| 922 |
+
<div class="setup-progress">
|
| 923 |
+
{% for p in platforms_setup %}
|
| 924 |
+
<div class="progress-dot {{ 'done' if p.configured }}" title="{{ p.name }}"></div>
|
| 925 |
+
{% endfor %}
|
| 926 |
+
<span class="progress-label">{{ configured_count }}/{{ total_platforms }} configured</span>
|
| 927 |
+
</div>
|
| 928 |
+
|
| 929 |
+
<div class="platform-cards">
|
| 930 |
+
{% for p in platforms_setup %}
|
| 931 |
+
<div class="platform-card {{ 'is-configured' if p.configured and p.fields else '' }} {{ 'no-fields' if not p.fields else '' }}" id="platform-{{ p.id }}">
|
| 932 |
+
<div class="platform-card-header" onclick="togglePlatform('{{ p.id }}')">
|
| 933 |
+
<div class="platform-status {{ 'configured' if p.configured and p.fields else 'no-key-needed' if not p.fields else 'not-configured' }}"></div>
|
| 934 |
+
<div class="platform-name">{{ p.name }}</div>
|
| 935 |
+
<span class="platform-cost {{ 'cost-free' if p.cost == 'Free' else 'cost-paid' if '$' in p.cost and '/mo' in p.cost and '200' in p.cost else 'cost-freemium' }}">{{ p.cost }}</span>
|
| 936 |
+
<span class="platform-toggle" id="toggle-{{ p.id }}">▶</span>
|
| 937 |
+
</div>
|
| 938 |
+
|
| 939 |
+
<div class="platform-body" id="body-{{ p.id }}">
|
| 940 |
+
<p class="platform-description">{{ p.description }}</p>
|
| 941 |
+
|
| 942 |
+
{% if p.signup_steps %}
|
| 943 |
+
<ol class="setup-steps">
|
| 944 |
+
{% for step in p.signup_steps %}
|
| 945 |
+
<li>{{ step }}</li>
|
| 946 |
+
{% endfor %}
|
| 947 |
+
</ol>
|
| 948 |
+
{% endif %}
|
| 949 |
+
|
| 950 |
+
{% if p.signup_url %}
|
| 951 |
+
<a href="{{ p.signup_url }}" target="_blank" class="setup-link">Open {{ p.name }} setup page ↗</a>
|
| 952 |
+
{% endif %}
|
| 953 |
+
|
| 954 |
+
{% if p.fields %}
|
| 955 |
+
<div class="setup-fields">
|
| 956 |
+
{% for f in p.fields %}
|
| 957 |
+
<div class="setup-field">
|
| 958 |
+
<label for="field-{{ f.key }}">{{ f.label }}</label>
|
| 959 |
+
<input type="{{ f.type }}"
|
| 960 |
+
id="field-{{ f.key }}"
|
| 961 |
+
data-key="{{ f.key }}"
|
| 962 |
+
data-platform="{{ p.id }}"
|
| 963 |
+
value="{{ p.field_values[f.key] }}"
|
| 964 |
+
class="{{ 'has-value' if p.field_values[f.key] }}"
|
| 965 |
+
placeholder="{{ f.label }}"
|
| 966 |
+
autocomplete="off">
|
| 967 |
+
</div>
|
| 968 |
+
{% endfor %}
|
| 969 |
+
</div>
|
| 970 |
+
|
| 971 |
+
<div class="setup-actions">
|
| 972 |
+
<button class="setup-save" onclick="savePlatform('{{ p.id }}')">Save Keys</button>
|
| 973 |
+
<button class="setup-test" onclick="testPlatform('{{ p.id }}')">Test Connection</button>
|
| 974 |
+
<span class="test-result" id="test-result-{{ p.id }}"></span>
|
| 975 |
+
</div>
|
| 976 |
+
{% endif %}
|
| 977 |
+
</div>
|
| 978 |
+
</div>
|
| 979 |
+
{% endfor %}
|
| 980 |
+
</div>
|
| 981 |
+
|
| 982 |
+
{% elif view|default('') == 'random' %}
|
| 983 |
+
<!-- ==================== RANDOM PAGE ==================== -->
|
| 984 |
+
{% if random_entry %}
|
| 985 |
+
<div class="random-card">
|
| 986 |
+
<div class="entry-header" style="justify-content: center;">
|
| 987 |
+
<span class="tier-badge tier-{{ random_entry.quality_tier }}">{{ random_entry.quality_tier }}</span>
|
| 988 |
+
{% if random_entry.favourite %}<span class="fav-star">★</span>{% endif %}
|
| 989 |
+
<span class="entry-source">{{ random_entry.source_name or random_entry.platform }}</span>
|
| 990 |
+
{% if random_entry.topic %}
|
| 991 |
+
<span class="topic-badge topic-{{ random_entry.topic }}">{{ random_entry.topic|replace('_', ' ') }}</span>
|
| 992 |
+
{% endif %}
|
| 993 |
+
</div>
|
| 994 |
+
<div class="entry-text">{{ random_entry.text }}</div>
|
| 995 |
+
{% if random_entry.url %}
|
| 996 |
+
<div class="entry-url"><a href="{{ random_entry.url }}" target="_blank">{{ random_entry.url }}</a></div>
|
| 997 |
+
{% endif %}
|
| 998 |
+
<a href="/random{% if current_topic %}?topic={{ current_topic }}{% endif %}{% if current_tier %}&tier={{ current_tier }}{% endif %}" class="random-again">Another one</a>
|
| 999 |
+
</div>
|
| 1000 |
+
{% else %}
|
| 1001 |
+
<div class="empty">
|
| 1002 |
+
<h3>Nothing found</h3>
|
| 1003 |
+
<p>No entries match those filters. Try broader criteria.</p>
|
| 1004 |
+
</div>
|
| 1005 |
+
{% endif %}
|
| 1006 |
+
|
| 1007 |
+
{% else %}
|
| 1008 |
+
<!-- ==================== DASHBOARD ==================== -->
|
| 1009 |
+
|
| 1010 |
+
<!-- Topic tabs -->
|
| 1011 |
+
<div class="topic-tabs">
|
| 1012 |
+
<a href="{{ _build_url(topic=None) }}" class="topic-tab {{ 'active' if not current_topic }}" data-topic="">All</a>
|
| 1013 |
+
{% for t in topics %}
|
| 1014 |
+
<a href="{{ _build_url(topic=t) }}" class="topic-tab {{ 'active' if current_topic == t }}" data-topic="{{ t }}">{{ t|replace('_', ' ') }}</a>
|
| 1015 |
+
{% endfor %}
|
| 1016 |
+
</div>
|
| 1017 |
+
|
| 1018 |
+
<!-- Filters row -->
|
| 1019 |
+
<div class="filters-row">
|
| 1020 |
+
<span class="filter-label">Tier:</span>
|
| 1021 |
+
<select class="filter-select" onchange="applyFilter('tier', this.value)">
|
| 1022 |
+
<option value="">All tiers</option>
|
| 1023 |
+
<option value="S" {{ 'selected' if current_tier == 'S' }}>S — Exceptional</option>
|
| 1024 |
+
<option value="A" {{ 'selected' if current_tier == 'A' }}>A — Strong</option>
|
| 1025 |
+
<option value="B" {{ 'selected' if current_tier == 'B' }}>B — Decent</option>
|
| 1026 |
+
<option value="C" {{ 'selected' if current_tier == 'C' }}>C — All</option>
|
| 1027 |
+
</select>
|
| 1028 |
+
|
| 1029 |
+
<span class="filter-label">Style:</span>
|
| 1030 |
+
<select class="filter-select" onchange="applyFilter('style', this.value)">
|
| 1031 |
+
<option value="">All styles</option>
|
| 1032 |
+
{% for s in styles %}
|
| 1033 |
+
<option value="{{ s }}" {{ 'selected' if current_style == s }}>{{ s|replace('_', ' ') }}</option>
|
| 1034 |
+
{% endfor %}
|
| 1035 |
+
</select>
|
| 1036 |
+
|
| 1037 |
+
<span class="filter-label">Platform:</span>
|
| 1038 |
+
<select class="filter-select" onchange="applyFilter('platform', this.value)">
|
| 1039 |
+
<option value="">All platforms</option>
|
| 1040 |
+
{% for p in platforms %}
|
| 1041 |
+
<option value="{{ p }}" {{ 'selected' if current_platform == p }}>{{ p }}</option>
|
| 1042 |
+
{% endfor %}
|
| 1043 |
+
</select>
|
| 1044 |
+
|
| 1045 |
+
{% if current_topic or current_style or current_tier or current_platform or current_q %}
|
| 1046 |
+
<a href="/" class="clear-filters">Clear all</a>
|
| 1047 |
+
{% endif %}
|
| 1048 |
+
</div>
|
| 1049 |
+
|
| 1050 |
+
<!-- Entries -->
|
| 1051 |
+
{% if entries %}
|
| 1052 |
+
<div class="entries">
|
| 1053 |
+
{% for e in entries %}
|
| 1054 |
+
<div class="entry-card" id="entry-{{ e.id }}">
|
| 1055 |
+
|
| 1056 |
+
<div class="entry-header">
|
| 1057 |
+
<span class="tier-badge tier-{{ e.quality_tier }}">{{ e.quality_tier }}</span>
|
| 1058 |
+
<span class="fav-star {{ '' if e.favourite else 'inactive' }}"
|
| 1059 |
+
onclick="toggleFav({{ e.id }}, this)"
|
| 1060 |
+
title="Toggle favourite">★</span>
|
| 1061 |
+
<span class="entry-source">{{ e.source_name or e.platform }}</span>
|
| 1062 |
+
{% if e.author %}
|
| 1063 |
+
<span class="entry-author">{{ e.author }}</span>
|
| 1064 |
+
{% endif %}
|
| 1065 |
+
{% if e.topic %}
|
| 1066 |
+
<span class="topic-badge topic-{{ e.topic }}">{{ e.topic|replace('_', ' ') }}</span>
|
| 1067 |
+
{% endif %}
|
| 1068 |
+
{% for s in e.styles %}
|
| 1069 |
+
<span class="style-tag">{{ s|replace('_', ' ') }}</span>
|
| 1070 |
+
{% endfor %}
|
| 1071 |
+
</div>
|
| 1072 |
+
|
| 1073 |
+
<div class="entry-text" id="text-{{ e.id }}">{{ e.text }}</div>
|
| 1074 |
+
{% if e.text|length > 400 %}
|
| 1075 |
+
<button class="expand-btn" onclick="toggleExpand({{ e.id }})">Show more</button>
|
| 1076 |
+
{% endif %}
|
| 1077 |
+
|
| 1078 |
+
{% if e.url %}
|
| 1079 |
+
<div class="entry-url"><a href="{{ e.url }}" target="_blank">{{ e.url[:80] }}{% if e.url|length > 80 %}...{% endif %}</a></div>
|
| 1080 |
+
{% endif %}
|
| 1081 |
+
|
| 1082 |
+
<div class="entry-meta">
|
| 1083 |
+
{% if e.likes is not none %}<span>{{ e.likes }} likes</span>{% endif %}
|
| 1084 |
+
{% if e.shares %}<span>{{ e.shares }} shares</span>{% endif %}
|
| 1085 |
+
{% if e.saves %}<span>{{ e.saves }} saves</span>{% endif %}
|
| 1086 |
+
{% if e.replies %}<span>{{ e.replies }} replies</span>{% endif %}
|
| 1087 |
+
{% if e.normalised_score %}<span>score: {{ "%.1f"|format(e.normalised_score) }}x</span>{% endif %}
|
| 1088 |
+
<span style="margin-left: auto; font-size: 10px;">id={{ e.id }}</span>
|
| 1089 |
+
</div>
|
| 1090 |
+
|
| 1091 |
+
{% if e.ally_notes %}
|
| 1092 |
+
<div class="entry-notes">{{ e.ally_notes }}</div>
|
| 1093 |
+
{% endif %}
|
| 1094 |
+
|
| 1095 |
+
<!-- Actions -->
|
| 1096 |
+
<div class="entry-actions">
|
| 1097 |
+
<div class="rating-stars" data-entry="{{ e.id }}">
|
| 1098 |
+
{% for i in range(1, 6) %}
|
| 1099 |
+
<span class="rating-star {{ 'filled' if e.ally_rating and i <= e.ally_rating }}"
|
| 1100 |
+
onclick="setRating({{ e.id }}, {{ i }})"
|
| 1101 |
+
data-star="{{ i }}">★</span>
|
| 1102 |
+
{% endfor %}
|
| 1103 |
+
</div>
|
| 1104 |
+
|
| 1105 |
+
<button class="action-btn" onclick="toggleForm('classify-{{ e.id }}')">Classify</button>
|
| 1106 |
+
<button class="action-btn" onclick="toggleForm('notes-{{ e.id }}')">Notes</button>
|
| 1107 |
+
</div>
|
| 1108 |
+
|
| 1109 |
+
<!-- Classify form -->
|
| 1110 |
+
<div class="inline-form" id="classify-{{ e.id }}">
|
| 1111 |
+
<label>Topic</label>
|
| 1112 |
+
<select id="classify-topic-{{ e.id }}">
|
| 1113 |
+
<option value="">— none —</option>
|
| 1114 |
+
{% for t in topics %}
|
| 1115 |
+
<option value="{{ t }}" {{ 'selected' if e.topic == t }}>{{ t|replace('_', ' ') }}</option>
|
| 1116 |
+
{% endfor %}
|
| 1117 |
+
</select>
|
| 1118 |
+
<label>Styles</label>
|
| 1119 |
+
<div class="form-row">
|
| 1120 |
+
{% for s in styles %}
|
| 1121 |
+
<label>
|
| 1122 |
+
<input type="checkbox" value="{{ s }}"
|
| 1123 |
+
{{ 'checked' if s in e.styles }}
|
| 1124 |
+
class="style-check-{{ e.id }}">
|
| 1125 |
+
{{ s|replace('_', ' ') }}
|
| 1126 |
+
</label>
|
| 1127 |
+
{% endfor %}
|
| 1128 |
+
</div>
|
| 1129 |
+
<button class="save-btn" onclick="saveClassify({{ e.id }})">Save</button>
|
| 1130 |
+
</div>
|
| 1131 |
+
|
| 1132 |
+
<!-- Notes form -->
|
| 1133 |
+
<div class="inline-form" id="notes-{{ e.id }}">
|
| 1134 |
+
<label>Notes</label>
|
| 1135 |
+
<textarea id="notes-text-{{ e.id }}">{{ e.ally_notes or '' }}</textarea>
|
| 1136 |
+
<button class="save-btn" onclick="saveNotes({{ e.id }})">Save</button>
|
| 1137 |
+
</div>
|
| 1138 |
+
|
| 1139 |
+
</div>
|
| 1140 |
+
{% endfor %}
|
| 1141 |
+
</div>
|
| 1142 |
+
|
| 1143 |
+
<!-- Pagination -->
|
| 1144 |
+
<div class="pagination">
|
| 1145 |
+
{% if current_page > 1 %}
|
| 1146 |
+
<a href="{{ _build_url(page=current_page - 1) }}">← Prev</a>
|
| 1147 |
+
{% endif %}
|
| 1148 |
+
{% if entries|length >= 50 %}
|
| 1149 |
+
<a href="{{ _build_url(page=current_page + 1) }}">Next →</a>
|
| 1150 |
+
{% endif %}
|
| 1151 |
+
</div>
|
| 1152 |
+
|
| 1153 |
+
{% else %}
|
| 1154 |
+
<div class="empty">
|
| 1155 |
+
<h3>No entries found</h3>
|
| 1156 |
+
<p>{% if current_q %}No results for "{{ current_q }}".{% else %}Add sources and run a scrape to populate the corpus.{% endif %}</p>
|
| 1157 |
+
</div>
|
| 1158 |
+
{% endif %}
|
| 1159 |
+
|
| 1160 |
+
{% endif %}
|
| 1161 |
+
|
| 1162 |
+
</div>
|
| 1163 |
+
|
| 1164 |
+
<!-- Toast notification -->
|
| 1165 |
+
<div class="toast" id="toast"></div>
|
| 1166 |
+
|
| 1167 |
+
<script>
|
| 1168 |
+
function toast(msg) {
|
| 1169 |
+
const t = document.getElementById('toast');
|
| 1170 |
+
t.textContent = msg;
|
| 1171 |
+
t.classList.add('show');
|
| 1172 |
+
setTimeout(() => t.classList.remove('show'), 2000);
|
| 1173 |
+
}
|
| 1174 |
+
|
| 1175 |
+
function toggleFav(id, el) {
|
| 1176 |
+
fetch('/api/fav/' + id, { method: 'POST' })
|
| 1177 |
+
.then(r => r.json())
|
| 1178 |
+
.then(data => {
|
| 1179 |
+
if (data.ok) {
|
| 1180 |
+
el.classList.toggle('inactive', !data.favourite);
|
| 1181 |
+
toast(data.favourite ? 'Favourited' : 'Unfavourited');
|
| 1182 |
+
}
|
| 1183 |
+
});
|
| 1184 |
+
}
|
| 1185 |
+
|
| 1186 |
+
function setRating(id, rating) {
|
| 1187 |
+
fetch('/api/rate/' + id, {
|
| 1188 |
+
method: 'POST',
|
| 1189 |
+
headers: { 'Content-Type': 'application/json' },
|
| 1190 |
+
body: JSON.stringify({ rating: rating })
|
| 1191 |
+
})
|
| 1192 |
+
.then(r => r.json())
|
| 1193 |
+
.then(data => {
|
| 1194 |
+
if (data.ok) {
|
| 1195 |
+
const stars = document.querySelectorAll(`.rating-stars[data-entry="${id}"] .rating-star`);
|
| 1196 |
+
stars.forEach(s => {
|
| 1197 |
+
s.classList.toggle('filled', parseInt(s.dataset.star) <= rating);
|
| 1198 |
+
});
|
| 1199 |
+
toast('Rated ' + rating + '/5');
|
| 1200 |
+
}
|
| 1201 |
+
});
|
| 1202 |
+
}
|
| 1203 |
+
|
| 1204 |
+
function toggleForm(formId) {
|
| 1205 |
+
const form = document.getElementById(formId);
|
| 1206 |
+
form.classList.toggle('visible');
|
| 1207 |
+
}
|
| 1208 |
+
|
| 1209 |
+
function saveClassify(id) {
|
| 1210 |
+
const topic = document.getElementById('classify-topic-' + id).value;
|
| 1211 |
+
const checkboxes = document.querySelectorAll('.style-check-' + id + ':checked');
|
| 1212 |
+
const styles = Array.from(checkboxes).map(cb => cb.value);
|
| 1213 |
+
|
| 1214 |
+
fetch('/api/classify/' + id, {
|
| 1215 |
+
method: 'POST',
|
| 1216 |
+
headers: { 'Content-Type': 'application/json' },
|
| 1217 |
+
body: JSON.stringify({ topic: topic || null, styles: styles })
|
| 1218 |
+
})
|
| 1219 |
+
.then(r => r.json())
|
| 1220 |
+
.then(data => {
|
| 1221 |
+
if (data.ok) {
|
| 1222 |
+
toast('Classified');
|
| 1223 |
+
// Refresh to update badges
|
| 1224 |
+
setTimeout(() => location.reload(), 500);
|
| 1225 |
+
}
|
| 1226 |
+
});
|
| 1227 |
+
}
|
| 1228 |
+
|
| 1229 |
+
function saveNotes(id) {
|
| 1230 |
+
const notes = document.getElementById('notes-text-' + id).value;
|
| 1231 |
+
fetch('/api/notes/' + id, {
|
| 1232 |
+
method: 'POST',
|
| 1233 |
+
headers: { 'Content-Type': 'application/json' },
|
| 1234 |
+
body: JSON.stringify({ notes: notes })
|
| 1235 |
+
})
|
| 1236 |
+
.then(r => r.json())
|
| 1237 |
+
.then(data => {
|
| 1238 |
+
if (data.ok) {
|
| 1239 |
+
toast('Notes saved');
|
| 1240 |
+
toggleForm('notes-' + id);
|
| 1241 |
+
}
|
| 1242 |
+
});
|
| 1243 |
+
}
|
| 1244 |
+
|
| 1245 |
+
function toggleExpand(id) {
|
| 1246 |
+
const text = document.getElementById('text-' + id);
|
| 1247 |
+
const btn = text.nextElementSibling;
|
| 1248 |
+
text.classList.toggle('expanded');
|
| 1249 |
+
text.classList.toggle('truncated');
|
| 1250 |
+
btn.textContent = text.classList.contains('expanded') ? 'Show less' : 'Show more';
|
| 1251 |
+
}
|
| 1252 |
+
|
| 1253 |
+
function applyFilter(key, value) {
|
| 1254 |
+
const url = new URL(window.location.href);
|
| 1255 |
+
if (value) {
|
| 1256 |
+
url.searchParams.set(key, value);
|
| 1257 |
+
} else {
|
| 1258 |
+
url.searchParams.delete(key);
|
| 1259 |
+
}
|
| 1260 |
+
url.searchParams.delete('page');
|
| 1261 |
+
window.location.href = url.toString();
|
| 1262 |
+
}
|
| 1263 |
+
|
| 1264 |
+
// Mark truncated entries on load
|
| 1265 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 1266 |
+
document.querySelectorAll('.entry-text').forEach(el => {
|
| 1267 |
+
if (el.scrollHeight > el.clientHeight) {
|
| 1268 |
+
el.classList.add('truncated');
|
| 1269 |
+
}
|
| 1270 |
+
});
|
| 1271 |
+
});
|
| 1272 |
+
|
| 1273 |
+
// --- Setup page ---
|
| 1274 |
+
|
| 1275 |
+
function togglePlatform(id) {
|
| 1276 |
+
const body = document.getElementById('body-' + id);
|
| 1277 |
+
const toggle = document.getElementById('toggle-' + id);
|
| 1278 |
+
body.classList.toggle('visible');
|
| 1279 |
+
toggle.classList.toggle('open');
|
| 1280 |
+
}
|
| 1281 |
+
|
| 1282 |
+
function savePlatform(platformId) {
|
| 1283 |
+
const inputs = document.querySelectorAll(`input[data-platform="${platformId}"]`);
|
| 1284 |
+
const data = {};
|
| 1285 |
+
inputs.forEach(input => {
|
| 1286 |
+
data[input.dataset.key] = input.value;
|
| 1287 |
+
});
|
| 1288 |
+
|
| 1289 |
+
fetch('/api/setup/' + platformId, {
|
| 1290 |
+
method: 'POST',
|
| 1291 |
+
headers: { 'Content-Type': 'application/json' },
|
| 1292 |
+
body: JSON.stringify(data)
|
| 1293 |
+
})
|
| 1294 |
+
.then(r => r.json())
|
| 1295 |
+
.then(result => {
|
| 1296 |
+
if (result.ok) {
|
| 1297 |
+
toast('Keys saved for ' + platformId);
|
| 1298 |
+
// Update field styling
|
| 1299 |
+
inputs.forEach(input => {
|
| 1300 |
+
input.classList.toggle('has-value', !!input.value);
|
| 1301 |
+
});
|
| 1302 |
+
// Update status dot
|
| 1303 |
+
const hasAnyValue = Array.from(inputs).some(i => !!i.value);
|
| 1304 |
+
const card = document.getElementById('platform-' + platformId);
|
| 1305 |
+
const dot = card.querySelector('.platform-status');
|
| 1306 |
+
if (hasAnyValue) {
|
| 1307 |
+
dot.classList.remove('not-configured');
|
| 1308 |
+
dot.classList.add('configured');
|
| 1309 |
+
card.classList.add('is-configured');
|
| 1310 |
+
} else {
|
| 1311 |
+
dot.classList.add('not-configured');
|
| 1312 |
+
dot.classList.remove('configured');
|
| 1313 |
+
card.classList.remove('is-configured');
|
| 1314 |
+
}
|
| 1315 |
+
} else {
|
| 1316 |
+
toast('Error: ' + (result.error || 'Unknown'));
|
| 1317 |
+
}
|
| 1318 |
+
});
|
| 1319 |
+
}
|
| 1320 |
+
|
| 1321 |
+
function testPlatform(platformId) {
|
| 1322 |
+
const result = document.getElementById('test-result-' + platformId);
|
| 1323 |
+
result.textContent = 'Testing...';
|
| 1324 |
+
result.className = 'test-result loading';
|
| 1325 |
+
|
| 1326 |
+
fetch('/api/test/' + platformId, { method: 'POST' })
|
| 1327 |
+
.then(r => r.json())
|
| 1328 |
+
.then(data => {
|
| 1329 |
+
if (data.ok) {
|
| 1330 |
+
result.textContent = data.message;
|
| 1331 |
+
result.className = 'test-result success';
|
| 1332 |
+
} else {
|
| 1333 |
+
result.textContent = data.error;
|
| 1334 |
+
result.className = 'test-result fail';
|
| 1335 |
+
}
|
| 1336 |
+
})
|
| 1337 |
+
.catch(err => {
|
| 1338 |
+
result.textContent = 'Request failed';
|
| 1339 |
+
result.className = 'test-result fail';
|
| 1340 |
+
});
|
| 1341 |
+
}
|
| 1342 |
+
</script>
|
| 1343 |
+
|
| 1344 |
+
</body>
|
| 1345 |
+
</html>
|
templates/tombola.html
ADDED
|
@@ -0,0 +1,1172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1, viewport-fit=cover">
|
| 6 |
+
<title>The Daily Snorter</title>
|
| 7 |
+
<meta name="description" content="10 jokes. We learn what makes you laugh. You get a comedy profile. Free.">
|
| 8 |
+
<style>
|
| 9 |
+
:root {
|
| 10 |
+
--bg: #0a0a0a;
|
| 11 |
+
--card: #151515;
|
| 12 |
+
--border: #222;
|
| 13 |
+
--text: #e8e8e8;
|
| 14 |
+
--muted: #777;
|
| 15 |
+
--accent: #f5c842;
|
| 16 |
+
--ha: #4ade80;
|
| 17 |
+
--nah: #ef4444;
|
| 18 |
+
--streak: #c084fc;
|
| 19 |
+
}
|
| 20 |
+
* { margin: 0; padding: 0; box-sizing: border-box; }
|
| 21 |
+
body {
|
| 22 |
+
font-family: 'Georgia', 'Times New Roman', serif;
|
| 23 |
+
background: var(--bg);
|
| 24 |
+
color: var(--text);
|
| 25 |
+
min-height: 100dvh;
|
| 26 |
+
display: flex;
|
| 27 |
+
flex-direction: column;
|
| 28 |
+
align-items: center;
|
| 29 |
+
overflow-x: hidden;
|
| 30 |
+
-webkit-font-smoothing: antialiased;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
/* Header */
|
| 34 |
+
.header { text-align: center; padding: 1.2rem 1rem 0.4rem; }
|
| 35 |
+
.header-logo {
|
| 36 |
+
width: 72px; height: 72px;
|
| 37 |
+
border-radius: 50%;
|
| 38 |
+
border: 2px solid var(--accent);
|
| 39 |
+
margin-bottom: 0.3rem;
|
| 40 |
+
opacity: 0.9;
|
| 41 |
+
}
|
| 42 |
+
.header h1 {
|
| 43 |
+
font-size: 2rem;
|
| 44 |
+
color: var(--accent);
|
| 45 |
+
letter-spacing: 0.04em;
|
| 46 |
+
margin-top: 0;
|
| 47 |
+
font-weight: normal;
|
| 48 |
+
}
|
| 49 |
+
.header p {
|
| 50 |
+
color: var(--muted);
|
| 51 |
+
font-size: 0.8rem;
|
| 52 |
+
margin-top: 0.15rem;
|
| 53 |
+
font-style: italic;
|
| 54 |
+
letter-spacing: 0.02em;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
/* Social proof */
|
| 58 |
+
.social-proof {
|
| 59 |
+
text-align: center;
|
| 60 |
+
color: var(--muted);
|
| 61 |
+
font-size: 0.7rem;
|
| 62 |
+
padding: 0.3rem;
|
| 63 |
+
opacity: 0.6;
|
| 64 |
+
}
|
| 65 |
+
.social-proof span { color: var(--accent); }
|
| 66 |
+
|
| 67 |
+
/* Progress bar */
|
| 68 |
+
.progress-wrap {
|
| 69 |
+
width: min(85vw, 400px);
|
| 70 |
+
margin: 0.6rem auto 0;
|
| 71 |
+
display: flex;
|
| 72 |
+
gap: 3px;
|
| 73 |
+
}
|
| 74 |
+
.progress-dot {
|
| 75 |
+
flex: 1; height: 3px;
|
| 76 |
+
background: var(--border);
|
| 77 |
+
border-radius: 2px;
|
| 78 |
+
transition: background 0.4s, box-shadow 0.4s;
|
| 79 |
+
}
|
| 80 |
+
.progress-dot.done { background: var(--accent); }
|
| 81 |
+
.progress-dot.current { background: var(--accent); opacity: 0.4; }
|
| 82 |
+
.progress-dot.done.laugh { background: var(--ha); box-shadow: 0 0 4px var(--ha); }
|
| 83 |
+
.progress-dot.done.meh { background: var(--nah); opacity: 0.4; }
|
| 84 |
+
|
| 85 |
+
/* Comedy DNA bar */
|
| 86 |
+
.dna-bar {
|
| 87 |
+
width: min(85vw, 400px);
|
| 88 |
+
margin: 0.5rem auto;
|
| 89 |
+
text-align: center;
|
| 90 |
+
}
|
| 91 |
+
.dna-label {
|
| 92 |
+
font-size: 0.6rem;
|
| 93 |
+
color: var(--muted);
|
| 94 |
+
text-transform: uppercase;
|
| 95 |
+
letter-spacing: 0.12em;
|
| 96 |
+
margin-bottom: 0.25rem;
|
| 97 |
+
}
|
| 98 |
+
.dna-tags {
|
| 99 |
+
display: flex;
|
| 100 |
+
flex-wrap: wrap;
|
| 101 |
+
gap: 0.25rem;
|
| 102 |
+
justify-content: center;
|
| 103 |
+
min-height: 1.2rem;
|
| 104 |
+
}
|
| 105 |
+
.dna-tag {
|
| 106 |
+
font-size: 0.6rem;
|
| 107 |
+
padding: 0.12rem 0.45rem;
|
| 108 |
+
border-radius: 10px;
|
| 109 |
+
background: var(--card);
|
| 110 |
+
border: 1px solid var(--border);
|
| 111 |
+
color: var(--muted);
|
| 112 |
+
transition: all 0.3s;
|
| 113 |
+
opacity: 0;
|
| 114 |
+
transform: scale(0.8);
|
| 115 |
+
}
|
| 116 |
+
.dna-tag.active {
|
| 117 |
+
opacity: 1;
|
| 118 |
+
transform: scale(1);
|
| 119 |
+
border-color: var(--accent);
|
| 120 |
+
color: var(--accent);
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
/* Streak notification */
|
| 124 |
+
.streak-notif {
|
| 125 |
+
position: fixed;
|
| 126 |
+
top: 0.8rem;
|
| 127 |
+
left: 50%;
|
| 128 |
+
transform: translateX(-50%) translateY(-80px);
|
| 129 |
+
background: var(--streak);
|
| 130 |
+
color: #fff;
|
| 131 |
+
font-size: 0.8rem;
|
| 132 |
+
font-weight: bold;
|
| 133 |
+
padding: 0.4rem 1rem;
|
| 134 |
+
border-radius: 16px;
|
| 135 |
+
z-index: 100;
|
| 136 |
+
transition: transform 0.4s cubic-bezier(0.34, 1.56, 0.64, 1);
|
| 137 |
+
pointer-events: none;
|
| 138 |
+
letter-spacing: 0.02em;
|
| 139 |
+
}
|
| 140 |
+
.streak-notif.show { transform: translateX(-50%) translateY(0); }
|
| 141 |
+
|
| 142 |
+
/* Stage */
|
| 143 |
+
.stage {
|
| 144 |
+
width: min(92vw, 520px);
|
| 145 |
+
margin: 0 auto;
|
| 146 |
+
position: relative;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
/* Joke card — swipe-ready */
|
| 150 |
+
.joke-card {
|
| 151 |
+
background: var(--card);
|
| 152 |
+
border: 1px solid var(--border);
|
| 153 |
+
border-radius: 16px;
|
| 154 |
+
padding: 2.2rem 1.8rem;
|
| 155 |
+
margin: 0.8rem 0;
|
| 156 |
+
min-height: 200px;
|
| 157 |
+
display: flex;
|
| 158 |
+
flex-direction: column;
|
| 159 |
+
justify-content: center;
|
| 160 |
+
position: relative;
|
| 161 |
+
touch-action: pan-y;
|
| 162 |
+
will-change: transform;
|
| 163 |
+
user-select: none;
|
| 164 |
+
-webkit-user-select: none;
|
| 165 |
+
cursor: grab;
|
| 166 |
+
}
|
| 167 |
+
.joke-card.dragging { cursor: grabbing; }
|
| 168 |
+
.joke-card.tint-left { box-shadow: -3px 0 24px rgba(239,68,68,0.25); }
|
| 169 |
+
.joke-card.tint-right { box-shadow: 3px 0 24px rgba(74,222,128,0.25); }
|
| 170 |
+
|
| 171 |
+
.joke-text {
|
| 172 |
+
font-size: 1.15rem;
|
| 173 |
+
line-height: 1.65;
|
| 174 |
+
text-align: center;
|
| 175 |
+
white-space: pre-line;
|
| 176 |
+
}
|
| 177 |
+
.joke-img {
|
| 178 |
+
max-width: 100%;
|
| 179 |
+
max-height: 280px;
|
| 180 |
+
border-radius: 10px;
|
| 181 |
+
margin: 0 auto 1rem;
|
| 182 |
+
display: none;
|
| 183 |
+
object-fit: contain;
|
| 184 |
+
}
|
| 185 |
+
.joke-img.show { display: block; }
|
| 186 |
+
.joke-source {
|
| 187 |
+
text-align: center;
|
| 188 |
+
color: var(--muted);
|
| 189 |
+
font-size: 0.7rem;
|
| 190 |
+
margin-top: 1.2rem;
|
| 191 |
+
font-style: italic;
|
| 192 |
+
letter-spacing: 0.02em;
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
/* Swipe exit animations */
|
| 196 |
+
@keyframes swipeLeft {
|
| 197 |
+
to { transform: translateX(-120vw) rotate(-15deg); opacity: 0; }
|
| 198 |
+
}
|
| 199 |
+
@keyframes swipeRight {
|
| 200 |
+
to { transform: translateX(120vw) rotate(15deg); opacity: 0; }
|
| 201 |
+
}
|
| 202 |
+
.joke-card.exit-left { animation: swipeLeft 0.3s ease-in forwards; pointer-events: none; }
|
| 203 |
+
.joke-card.exit-right { animation: swipeRight 0.3s ease-in forwards; pointer-events: none; }
|
| 204 |
+
|
| 205 |
+
/* Reaction buttons */
|
| 206 |
+
.reactions {
|
| 207 |
+
display: flex;
|
| 208 |
+
justify-content: center;
|
| 209 |
+
gap: 1rem;
|
| 210 |
+
margin-bottom: 0.8rem;
|
| 211 |
+
}
|
| 212 |
+
.react-btn {
|
| 213 |
+
flex: 1;
|
| 214 |
+
max-width: 160px;
|
| 215 |
+
border: none;
|
| 216 |
+
font-size: 1.1rem;
|
| 217 |
+
font-weight: bold;
|
| 218 |
+
padding: 0.75rem 1.2rem;
|
| 219 |
+
border-radius: 12px;
|
| 220 |
+
cursor: pointer;
|
| 221 |
+
transition: all 0.15s;
|
| 222 |
+
font-family: inherit;
|
| 223 |
+
user-select: none;
|
| 224 |
+
-webkit-tap-highlight-color: transparent;
|
| 225 |
+
letter-spacing: 0.03em;
|
| 226 |
+
}
|
| 227 |
+
.react-btn:active { transform: scale(0.95); }
|
| 228 |
+
.react-btn.nah {
|
| 229 |
+
background: var(--nah);
|
| 230 |
+
color: #fff;
|
| 231 |
+
}
|
| 232 |
+
.react-btn.nah:hover { background: #dc2626; }
|
| 233 |
+
.react-btn.ha {
|
| 234 |
+
background: var(--ha);
|
| 235 |
+
color: #000;
|
| 236 |
+
}
|
| 237 |
+
.react-btn.ha:hover { background: #22c55e; }
|
| 238 |
+
|
| 239 |
+
/* Share */
|
| 240 |
+
.share-row { text-align: center; margin-bottom: 0.4rem; height: 1.3rem; }
|
| 241 |
+
.share-btn {
|
| 242 |
+
font-size: 0.65rem;
|
| 243 |
+
color: var(--muted);
|
| 244 |
+
background: none;
|
| 245 |
+
border: 1px solid var(--border);
|
| 246 |
+
padding: 0.15rem 0.5rem;
|
| 247 |
+
border-radius: 10px;
|
| 248 |
+
cursor: pointer;
|
| 249 |
+
font-family: inherit;
|
| 250 |
+
opacity: 0;
|
| 251 |
+
transition: opacity 0.3s;
|
| 252 |
+
}
|
| 253 |
+
.share-btn.show { opacity: 1; }
|
| 254 |
+
.share-btn:hover { color: var(--accent); border-color: var(--accent); }
|
| 255 |
+
|
| 256 |
+
/* Mode selector (above card) */
|
| 257 |
+
.mode-selector {
|
| 258 |
+
display: flex;
|
| 259 |
+
justify-content: center;
|
| 260 |
+
gap: 0;
|
| 261 |
+
margin-bottom: 0.6rem;
|
| 262 |
+
background: var(--card);
|
| 263 |
+
border-radius: 12px;
|
| 264 |
+
border: 1px solid var(--border);
|
| 265 |
+
overflow: hidden;
|
| 266 |
+
}
|
| 267 |
+
.mode-pill {
|
| 268 |
+
flex: 1;
|
| 269 |
+
background: transparent;
|
| 270 |
+
border: none;
|
| 271 |
+
border-right: 1px solid var(--border);
|
| 272 |
+
color: var(--muted);
|
| 273 |
+
font-size: 0.95rem;
|
| 274 |
+
padding: 0.65rem 0.5rem;
|
| 275 |
+
cursor: pointer;
|
| 276 |
+
transition: all 0.2s;
|
| 277 |
+
font-family: inherit;
|
| 278 |
+
letter-spacing: 0.02em;
|
| 279 |
+
}
|
| 280 |
+
.mode-pill:last-child { border-right: none; }
|
| 281 |
+
.mode-pill:hover { color: var(--text); background: rgba(255,255,255,0.03); }
|
| 282 |
+
.mode-pill.active {
|
| 283 |
+
color: var(--accent);
|
| 284 |
+
background: rgba(245,200,66,0.1);
|
| 285 |
+
font-weight: bold;
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
/* Mood sub-selector */
|
| 289 |
+
.mood-bar {
|
| 290 |
+
display: none;
|
| 291 |
+
justify-content: center;
|
| 292 |
+
gap: 0.35rem;
|
| 293 |
+
margin-bottom: 0.4rem;
|
| 294 |
+
flex-wrap: wrap;
|
| 295 |
+
}
|
| 296 |
+
.mood-bar.show { display: flex; }
|
| 297 |
+
.mood-pill {
|
| 298 |
+
background: transparent;
|
| 299 |
+
border: 1px solid var(--border);
|
| 300 |
+
color: var(--muted);
|
| 301 |
+
font-size: 0.62rem;
|
| 302 |
+
padding: 0.2rem 0.6rem;
|
| 303 |
+
border-radius: 14px;
|
| 304 |
+
cursor: pointer;
|
| 305 |
+
transition: all 0.2s;
|
| 306 |
+
font-family: inherit;
|
| 307 |
+
letter-spacing: 0.03em;
|
| 308 |
+
}
|
| 309 |
+
.mood-pill:hover { border-color: var(--muted); color: var(--text); }
|
| 310 |
+
.mood-pill.active {
|
| 311 |
+
border-color: var(--streak);
|
| 312 |
+
color: var(--streak);
|
| 313 |
+
background: rgba(192,132,252,0.06);
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
.counter {
|
| 317 |
+
text-align: center;
|
| 318 |
+
color: var(--muted);
|
| 319 |
+
font-size: 0.7rem;
|
| 320 |
+
margin-bottom: 0.2rem;
|
| 321 |
+
letter-spacing: 0.03em;
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
/* Nudge bar */
|
| 325 |
+
.nudge-bar {
|
| 326 |
+
display: none;
|
| 327 |
+
text-align: center;
|
| 328 |
+
padding: 0.5rem;
|
| 329 |
+
margin-bottom: 0.4rem;
|
| 330 |
+
background: var(--card);
|
| 331 |
+
border: 1px solid var(--border);
|
| 332 |
+
border-radius: 8px;
|
| 333 |
+
font-size: 0.7rem;
|
| 334 |
+
color: var(--muted);
|
| 335 |
+
}
|
| 336 |
+
.nudge-bar.show { display: block; }
|
| 337 |
+
.nudge-bar input {
|
| 338 |
+
background: var(--bg);
|
| 339 |
+
border: 1px solid var(--border);
|
| 340 |
+
color: var(--text);
|
| 341 |
+
padding: 0.3rem 0.5rem;
|
| 342 |
+
border-radius: 6px;
|
| 343 |
+
font-size: 0.75rem;
|
| 344 |
+
font-family: inherit;
|
| 345 |
+
width: 170px;
|
| 346 |
+
margin: 0 0.2rem;
|
| 347 |
+
}
|
| 348 |
+
.nudge-bar input:focus { outline: none; border-color: var(--accent); }
|
| 349 |
+
.nudge-bar button {
|
| 350 |
+
background: var(--accent);
|
| 351 |
+
color: #000;
|
| 352 |
+
border: none;
|
| 353 |
+
padding: 0.3rem 0.6rem;
|
| 354 |
+
border-radius: 6px;
|
| 355 |
+
font-size: 0.7rem;
|
| 356 |
+
font-weight: bold;
|
| 357 |
+
cursor: pointer;
|
| 358 |
+
font-family: inherit;
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
/* ===== GATE ===== */
|
| 362 |
+
.gate {
|
| 363 |
+
display: none;
|
| 364 |
+
text-align: center;
|
| 365 |
+
padding: 1.5rem 1.5rem 2rem;
|
| 366 |
+
max-width: 480px;
|
| 367 |
+
margin: 0 auto;
|
| 368 |
+
}
|
| 369 |
+
.gate.visible { display: block; }
|
| 370 |
+
.gate h2 {
|
| 371 |
+
font-size: 1.5rem;
|
| 372 |
+
color: var(--accent);
|
| 373 |
+
margin-bottom: 0.6rem;
|
| 374 |
+
font-weight: normal;
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
.dna-profile {
|
| 378 |
+
background: var(--card);
|
| 379 |
+
border: 1px solid var(--border);
|
| 380 |
+
border-radius: 12px;
|
| 381 |
+
padding: 1rem 1.3rem;
|
| 382 |
+
margin: 0.6rem auto 1rem;
|
| 383 |
+
max-width: 340px;
|
| 384 |
+
text-align: left;
|
| 385 |
+
}
|
| 386 |
+
.dna-profile h3 {
|
| 387 |
+
font-size: 0.75rem;
|
| 388 |
+
color: var(--accent);
|
| 389 |
+
margin-bottom: 0.5rem;
|
| 390 |
+
text-transform: uppercase;
|
| 391 |
+
letter-spacing: 0.1em;
|
| 392 |
+
}
|
| 393 |
+
.dna-row {
|
| 394 |
+
display: flex;
|
| 395 |
+
justify-content: space-between;
|
| 396 |
+
align-items: center;
|
| 397 |
+
padding: 0.25rem 0;
|
| 398 |
+
font-size: 0.8rem;
|
| 399 |
+
}
|
| 400 |
+
.dna-row-label { color: var(--muted); }
|
| 401 |
+
.dna-row-value { color: var(--text); font-weight: bold; }
|
| 402 |
+
.dna-row-bar {
|
| 403 |
+
flex: 1;
|
| 404 |
+
margin: 0 0.6rem;
|
| 405 |
+
height: 3px;
|
| 406 |
+
background: var(--border);
|
| 407 |
+
border-radius: 2px;
|
| 408 |
+
overflow: hidden;
|
| 409 |
+
}
|
| 410 |
+
.dna-row-fill {
|
| 411 |
+
height: 100%;
|
| 412 |
+
border-radius: 2px;
|
| 413 |
+
transition: width 0.5s;
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
.gate-pitch {
|
| 417 |
+
color: var(--muted);
|
| 418 |
+
font-size: 0.85rem;
|
| 419 |
+
margin-bottom: 0.8rem;
|
| 420 |
+
line-height: 1.5;
|
| 421 |
+
}
|
| 422 |
+
.gate-pitch strong { color: var(--text); }
|
| 423 |
+
|
| 424 |
+
.gate-urgency {
|
| 425 |
+
font-size: 0.7rem;
|
| 426 |
+
color: var(--nah);
|
| 427 |
+
margin-bottom: 0.8rem;
|
| 428 |
+
opacity: 0.85;
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
.gate form {
|
| 432 |
+
display: flex;
|
| 433 |
+
flex-direction: column;
|
| 434 |
+
gap: 0.6rem;
|
| 435 |
+
max-width: 300px;
|
| 436 |
+
margin: 0 auto;
|
| 437 |
+
}
|
| 438 |
+
.gate input {
|
| 439 |
+
background: var(--card);
|
| 440 |
+
border: 1px solid var(--border);
|
| 441 |
+
color: var(--text);
|
| 442 |
+
padding: 0.65rem 0.9rem;
|
| 443 |
+
border-radius: 8px;
|
| 444 |
+
font-size: 0.95rem;
|
| 445 |
+
font-family: inherit;
|
| 446 |
+
}
|
| 447 |
+
.gate input:focus { outline: none; border-color: var(--accent); }
|
| 448 |
+
.gate-cta {
|
| 449 |
+
background: var(--accent);
|
| 450 |
+
color: #000;
|
| 451 |
+
border: none;
|
| 452 |
+
padding: 0.75rem;
|
| 453 |
+
border-radius: 8px;
|
| 454 |
+
font-size: 1rem;
|
| 455 |
+
font-weight: bold;
|
| 456 |
+
cursor: pointer;
|
| 457 |
+
font-family: inherit;
|
| 458 |
+
transition: transform 0.1s;
|
| 459 |
+
}
|
| 460 |
+
.gate-cta:hover { transform: scale(1.02); }
|
| 461 |
+
.gate-cta:active { transform: scale(0.98); }
|
| 462 |
+
|
| 463 |
+
.gate-sub {
|
| 464 |
+
font-size: 0.65rem;
|
| 465 |
+
color: var(--muted);
|
| 466 |
+
margin-top: 0.2rem;
|
| 467 |
+
opacity: 0.6;
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
.skip-wrap { margin-top: 1rem; height: 1.3rem; }
|
| 471 |
+
.skip {
|
| 472 |
+
color: var(--muted);
|
| 473 |
+
font-size: 0.6rem;
|
| 474 |
+
cursor: pointer;
|
| 475 |
+
text-decoration: none;
|
| 476 |
+
opacity: 0;
|
| 477 |
+
transition: opacity 0.5s;
|
| 478 |
+
}
|
| 479 |
+
.skip.show { opacity: 0.4; }
|
| 480 |
+
.skip:hover { opacity: 0.7; }
|
| 481 |
+
|
| 482 |
+
/* Freeze overlay */
|
| 483 |
+
.freeze-overlay {
|
| 484 |
+
position: absolute;
|
| 485 |
+
inset: 0;
|
| 486 |
+
background: rgba(10,10,10,0.88);
|
| 487 |
+
border-radius: 16px;
|
| 488 |
+
display: flex;
|
| 489 |
+
flex-direction: column;
|
| 490 |
+
align-items: center;
|
| 491 |
+
justify-content: center;
|
| 492 |
+
z-index: 10;
|
| 493 |
+
opacity: 0;
|
| 494 |
+
pointer-events: none;
|
| 495 |
+
transition: opacity 0.3s;
|
| 496 |
+
}
|
| 497 |
+
.freeze-overlay.active { opacity: 1; pointer-events: auto; }
|
| 498 |
+
.freeze-timer {
|
| 499 |
+
font-size: 1.8rem;
|
| 500 |
+
color: var(--accent);
|
| 501 |
+
font-weight: bold;
|
| 502 |
+
margin-bottom: 0.3rem;
|
| 503 |
+
}
|
| 504 |
+
.freeze-msg {
|
| 505 |
+
color: var(--muted);
|
| 506 |
+
font-size: 0.75rem;
|
| 507 |
+
text-align: center;
|
| 508 |
+
line-height: 1.4;
|
| 509 |
+
}
|
| 510 |
+
.freeze-msg a { color: var(--accent); cursor: pointer; text-decoration: underline; }
|
| 511 |
+
|
| 512 |
+
/* Post-signup */
|
| 513 |
+
.unlocked {
|
| 514 |
+
display: none;
|
| 515 |
+
text-align: center;
|
| 516 |
+
padding: 2rem;
|
| 517 |
+
max-width: 480px;
|
| 518 |
+
margin: 0 auto;
|
| 519 |
+
}
|
| 520 |
+
.unlocked.visible { display: block; }
|
| 521 |
+
.unlocked h2 { color: var(--ha); margin-bottom: 0.3rem; font-size: 1.4rem; font-weight: normal; }
|
| 522 |
+
.unlocked p { color: var(--muted); font-size: 0.85rem; }
|
| 523 |
+
.unlocked .bonus { color: var(--accent); font-weight: bold; }
|
| 524 |
+
|
| 525 |
+
/* Footer */
|
| 526 |
+
.footer {
|
| 527 |
+
margin-top: auto;
|
| 528 |
+
padding: 1.2rem;
|
| 529 |
+
text-align: center;
|
| 530 |
+
color: var(--muted);
|
| 531 |
+
font-size: 0.6rem;
|
| 532 |
+
}
|
| 533 |
+
.footer a {
|
| 534 |
+
color: var(--accent);
|
| 535 |
+
text-decoration: none;
|
| 536 |
+
font-size: 0.72rem;
|
| 537 |
+
}
|
| 538 |
+
.footer a:hover { text-decoration: underline; }
|
| 539 |
+
|
| 540 |
+
/* Animations */
|
| 541 |
+
@keyframes slideUp {
|
| 542 |
+
from { opacity: 0; transform: translateY(16px); }
|
| 543 |
+
to { opacity: 1; transform: translateY(0); }
|
| 544 |
+
}
|
| 545 |
+
.gate.visible { animation: slideUp 0.4s ease-out; }
|
| 546 |
+
|
| 547 |
+
/* Responsive */
|
| 548 |
+
@media (max-width: 380px) {
|
| 549 |
+
.joke-text { font-size: 1.05rem; }
|
| 550 |
+
.joke-card { padding: 1.8rem 1.3rem; }
|
| 551 |
+
.react-btn { padding: 0.4rem 0.8rem; font-size: 0.75rem; }
|
| 552 |
+
}
|
| 553 |
+
</style>
|
| 554 |
+
</head>
|
| 555 |
+
<body>
|
| 556 |
+
|
| 557 |
+
<div class="header">
|
| 558 |
+
<img src="/static/snorter-logo.png" alt="The Daily Snorter" class="header-logo">
|
| 559 |
+
<h1>The Daily Snorter</h1>
|
| 560 |
+
<p>10 jokes. We figure out your sense of humour.</p>
|
| 561 |
+
</div>
|
| 562 |
+
|
| 563 |
+
<div class="social-proof" id="socialProof">
|
| 564 |
+
<span id="proofCount">2,847</span> comedy profiles built this week
|
| 565 |
+
</div>
|
| 566 |
+
|
| 567 |
+
<!-- Progress dots -->
|
| 568 |
+
<div class="progress-wrap" id="progress"></div>
|
| 569 |
+
|
| 570 |
+
<!-- Comedy DNA bar -->
|
| 571 |
+
<div class="dna-bar" id="dnaBar">
|
| 572 |
+
<div class="dna-label">Your comedy DNA</div>
|
| 573 |
+
<div class="dna-tags" id="dnaTags"></div>
|
| 574 |
+
</div>
|
| 575 |
+
|
| 576 |
+
<!-- Streak notification -->
|
| 577 |
+
<div class="streak-notif" id="streakNotif"></div>
|
| 578 |
+
|
| 579 |
+
<div class="stage" id="stage">
|
| 580 |
+
<!-- Mode selector (above the card) -->
|
| 581 |
+
<div class="mode-selector" id="modeSelector">
|
| 582 |
+
<button class="mode-pill active" data-pool="classics" onclick="setPool('classics')">Classics</button>
|
| 583 |
+
<button class="mode-pill" data-pool="dadjokes" onclick="setPool('dadjokes')">Dad Jokes</button>
|
| 584 |
+
<button class="mode-pill" data-pool="fresh" onclick="setPool('fresh')">Fresh</button>
|
| 585 |
+
</div>
|
| 586 |
+
|
| 587 |
+
<!-- Mood sub-selector (visible only when Fresh selected) -->
|
| 588 |
+
<div class="mood-bar" id="moodBar">
|
| 589 |
+
<button class="mood-pill active" data-mood="any" onclick="setMood('any')">All</button>
|
| 590 |
+
<button class="mood-pill" data-mood="dark" onclick="setMood('dark')">Dark</button>
|
| 591 |
+
<button class="mood-pill" data-mood="political" onclick="setMood('political')">Political</button>
|
| 592 |
+
<button class="mood-pill" data-mood="clean" onclick="setMood('clean')">Clean</button>
|
| 593 |
+
</div>
|
| 594 |
+
|
| 595 |
+
<div class="joke-card" id="jokeCard">
|
| 596 |
+
<img class="joke-img" id="jokeImg" alt="">
|
| 597 |
+
<div class="joke-text" id="jokeText">Loading...</div>
|
| 598 |
+
<div class="joke-source" id="jokeSource"></div>
|
| 599 |
+
<div class="freeze-overlay" id="freezeOverlay">
|
| 600 |
+
<div class="freeze-timer" id="freezeTimer"></div>
|
| 601 |
+
<div class="freeze-msg">Skip the wait — <a onclick="scrollToNudge()">drop your email</a></div>
|
| 602 |
+
</div>
|
| 603 |
+
</div>
|
| 604 |
+
|
| 605 |
+
<!-- Reaction buttons -->
|
| 606 |
+
<div class="reactions" id="reactions">
|
| 607 |
+
<button class="react-btn nah" onclick="react('nah')">Nah</button>
|
| 608 |
+
<button class="react-btn ha" onclick="react('ha')">Ha!</button>
|
| 609 |
+
</div>
|
| 610 |
+
|
| 611 |
+
<div class="share-row">
|
| 612 |
+
<button class="share-btn" id="shareBtn" onclick="shareJoke()">Share this one</button>
|
| 613 |
+
</div>
|
| 614 |
+
|
| 615 |
+
<!-- Nudge bar -->
|
| 616 |
+
<div class="nudge-bar" id="nudgeBar">
|
| 617 |
+
No wait, instant jokes:
|
| 618 |
+
<input type="email" id="nudgeEmail" placeholder="Email" autocomplete="email">
|
| 619 |
+
<button onclick="nudgeSignup()">Go</button>
|
| 620 |
+
</div>
|
| 621 |
+
|
| 622 |
+
<div class="counter" id="counter"></div>
|
| 623 |
+
</div>
|
| 624 |
+
|
| 625 |
+
<!-- Email gate -->
|
| 626 |
+
<div class="gate stage" id="gate">
|
| 627 |
+
<h2>Your Comedy DNA</h2>
|
| 628 |
+
<div class="dna-profile" id="dnaProfile"></div>
|
| 629 |
+
<p class="gate-pitch">
|
| 630 |
+
We've mapped your sense of humour.<br>
|
| 631 |
+
<strong>Save your profile</strong> and get 3 matched jokes<br>in your inbox every morning.
|
| 632 |
+
</p>
|
| 633 |
+
<p class="gate-urgency" id="gateUrgency">Profile data expires in 24 hours</p>
|
| 634 |
+
<form onsubmit="return signup(event)">
|
| 635 |
+
<input type="email" id="signupEmail" placeholder="Email" required autocomplete="email">
|
| 636 |
+
<button type="submit" class="gate-cta">Save my comedy profile</button>
|
| 637 |
+
</form>
|
| 638 |
+
<p class="gate-sub">Free. Unsubscribe anytime. No spam, just laughs.</p>
|
| 639 |
+
<div class="skip-wrap">
|
| 640 |
+
<span class="skip" id="skipLink" onclick="skipGate()">just let me keep going</span>
|
| 641 |
+
</div>
|
| 642 |
+
</div>
|
| 643 |
+
|
| 644 |
+
<!-- Post-signup -->
|
| 645 |
+
<div class="unlocked stage" id="unlocked">
|
| 646 |
+
<h2>Profile saved.</h2>
|
| 647 |
+
<p>First email drops tomorrow morning.</p>
|
| 648 |
+
<p style="margin-top:0.5rem">Meanwhile, <span class="bonus">unlimited mode unlocked.</span></p>
|
| 649 |
+
</div>
|
| 650 |
+
|
| 651 |
+
<div class="footer">
|
| 652 |
+
<a href="/worldcup">Joke World Cup — vote on head-to-head matchups</a>
|
| 653 |
+
<p style="margin-top:0.2rem">Sources attributed. Curated with questionable taste.</p>
|
| 654 |
+
</div>
|
| 655 |
+
|
| 656 |
+
<script>
|
| 657 |
+
// ===== CONFIG =====
|
| 658 |
+
const GATE_AT = 10;
|
| 659 |
+
const FREEZE_BASE = 3;
|
| 660 |
+
const FREEZE_INCREMENT = 1;
|
| 661 |
+
const FREEZE_MAX = 8;
|
| 662 |
+
const SWIPE_THRESHOLD = 80;
|
| 663 |
+
const ROTATION_FACTOR = 0.08;
|
| 664 |
+
|
| 665 |
+
// ===== STATE =====
|
| 666 |
+
let jokeCount = 0;
|
| 667 |
+
let currentJoke = null;
|
| 668 |
+
let currentMood = 'any';
|
| 669 |
+
let currentPool = 'classics';
|
| 670 |
+
let gated = false;
|
| 671 |
+
let seenIds = new Set();
|
| 672 |
+
let laughStreak = 0;
|
| 673 |
+
let postGateCount = 0;
|
| 674 |
+
let reactions = [];
|
| 675 |
+
let prefs = { ha: 0, nah: 0, total: 0, topics: {}, streakMax: 0 };
|
| 676 |
+
let swipeLocked = false;
|
| 677 |
+
|
| 678 |
+
// ===== SWIPE ENGINE =====
|
| 679 |
+
let touchStartX = 0;
|
| 680 |
+
let touchStartY = 0;
|
| 681 |
+
let isDragging = false;
|
| 682 |
+
let cardEl = null;
|
| 683 |
+
|
| 684 |
+
function initSwipe() {
|
| 685 |
+
cardEl = document.getElementById('jokeCard');
|
| 686 |
+
|
| 687 |
+
cardEl.addEventListener('touchstart', onDragStart, { passive: true });
|
| 688 |
+
cardEl.addEventListener('touchmove', onDragMove, { passive: false });
|
| 689 |
+
cardEl.addEventListener('touchend', onDragEnd);
|
| 690 |
+
|
| 691 |
+
cardEl.addEventListener('mousedown', onDragStart);
|
| 692 |
+
document.addEventListener('mousemove', onDragMove);
|
| 693 |
+
document.addEventListener('mouseup', onDragEnd);
|
| 694 |
+
|
| 695 |
+
document.addEventListener('keydown', onKeyDown);
|
| 696 |
+
}
|
| 697 |
+
|
| 698 |
+
function getPos(e) {
|
| 699 |
+
if (e.touches && e.touches.length > 0)
|
| 700 |
+
return { x: e.touches[0].clientX, y: e.touches[0].clientY };
|
| 701 |
+
return { x: e.clientX, y: e.clientY };
|
| 702 |
+
}
|
| 703 |
+
|
| 704 |
+
function onDragStart(e) {
|
| 705 |
+
if (gated || swipeLocked) return;
|
| 706 |
+
const pos = getPos(e);
|
| 707 |
+
touchStartX = pos.x;
|
| 708 |
+
touchStartY = pos.y;
|
| 709 |
+
isDragging = true;
|
| 710 |
+
cardEl.classList.add('dragging');
|
| 711 |
+
cardEl.style.transition = 'none';
|
| 712 |
+
}
|
| 713 |
+
|
| 714 |
+
function onDragMove(e) {
|
| 715 |
+
if (!isDragging) return;
|
| 716 |
+
const pos = getPos(e);
|
| 717 |
+
const dx = pos.x - touchStartX;
|
| 718 |
+
const rot = dx * ROTATION_FACTOR;
|
| 719 |
+
|
| 720 |
+
cardEl.style.transform = 'translateX(' + dx + 'px) rotate(' + rot + 'deg)';
|
| 721 |
+
|
| 722 |
+
cardEl.classList.remove('tint-left', 'tint-right');
|
| 723 |
+
|
| 724 |
+
if (dx > SWIPE_THRESHOLD * 0.5) {
|
| 725 |
+
cardEl.classList.add('tint-right');
|
| 726 |
+
} else if (dx < -SWIPE_THRESHOLD * 0.5) {
|
| 727 |
+
cardEl.classList.add('tint-left');
|
| 728 |
+
}
|
| 729 |
+
|
| 730 |
+
if (Math.abs(dx) > 10 && e.cancelable) e.preventDefault();
|
| 731 |
+
}
|
| 732 |
+
|
| 733 |
+
function onDragEnd(e) {
|
| 734 |
+
if (!isDragging) return;
|
| 735 |
+
isDragging = false;
|
| 736 |
+
cardEl.classList.remove('dragging');
|
| 737 |
+
|
| 738 |
+
const pos = e.changedTouches
|
| 739 |
+
? { x: e.changedTouches[0].clientX, y: e.changedTouches[0].clientY }
|
| 740 |
+
: { x: e.clientX, y: e.clientY };
|
| 741 |
+
|
| 742 |
+
const dx = pos.x - touchStartX;
|
| 743 |
+
|
| 744 |
+
cardEl.classList.remove('tint-left', 'tint-right');
|
| 745 |
+
|
| 746 |
+
if (dx > SWIPE_THRESHOLD) {
|
| 747 |
+
triggerSwipe('exit-right', 'ha');
|
| 748 |
+
} else if (dx < -SWIPE_THRESHOLD) {
|
| 749 |
+
triggerSwipe('exit-left', 'nah');
|
| 750 |
+
} else {
|
| 751 |
+
snapBack();
|
| 752 |
+
}
|
| 753 |
+
}
|
| 754 |
+
|
| 755 |
+
function triggerSwipe(exitClass, reaction) {
|
| 756 |
+
swipeLocked = true;
|
| 757 |
+
cardEl.classList.add(exitClass);
|
| 758 |
+
setTimeout(() => {
|
| 759 |
+
resetCard();
|
| 760 |
+
react(reaction);
|
| 761 |
+
swipeLocked = false;
|
| 762 |
+
}, 300);
|
| 763 |
+
}
|
| 764 |
+
|
| 765 |
+
function snapBack() {
|
| 766 |
+
cardEl.style.transition = 'transform 0.3s cubic-bezier(0.34, 1.56, 0.64, 1)';
|
| 767 |
+
cardEl.style.transform = 'translateX(0) translateY(0) rotate(0)';
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
function resetCard() {
|
| 771 |
+
cardEl.classList.remove('exit-left', 'exit-right',
|
| 772 |
+
'tint-left', 'tint-right', 'dragging');
|
| 773 |
+
cardEl.style.transform = '';
|
| 774 |
+
cardEl.style.transition = '';
|
| 775 |
+
}
|
| 776 |
+
|
| 777 |
+
function onKeyDown(e) {
|
| 778 |
+
if (gated || swipeLocked) return;
|
| 779 |
+
if (e.key === 'ArrowLeft') {
|
| 780 |
+
swipeLocked = true;
|
| 781 |
+
cardEl.classList.add('exit-left');
|
| 782 |
+
setTimeout(() => { resetCard(); react('nah'); swipeLocked = false; }, 300);
|
| 783 |
+
} else if (e.key === 'ArrowRight') {
|
| 784 |
+
swipeLocked = true;
|
| 785 |
+
cardEl.classList.add('exit-right');
|
| 786 |
+
setTimeout(() => { resetCard(); react('ha'); swipeLocked = false; }, 300);
|
| 787 |
+
}
|
| 788 |
+
}
|
| 789 |
+
|
| 790 |
+
// ===== LOCAL STORAGE =====
|
| 791 |
+
const STORAGE_KEY = 'snort_user';
|
| 792 |
+
function getUser() {
|
| 793 |
+
try { return JSON.parse(localStorage.getItem(STORAGE_KEY)) || {}; }
|
| 794 |
+
catch { return {}; }
|
| 795 |
+
}
|
| 796 |
+
function saveUser(data) {
|
| 797 |
+
localStorage.setItem(STORAGE_KEY, JSON.stringify({ ...getUser(), ...data }));
|
| 798 |
+
}
|
| 799 |
+
function isSignedUp() { return !!getUser().email; }
|
| 800 |
+
saveUser({ visited: true, lastVisit: Date.now() });
|
| 801 |
+
|
| 802 |
+
// ===== DNA STYLES =====
|
| 803 |
+
const DNA_STYLES = [
|
| 804 |
+
{ key: 'easy_laugh', label: 'Easy Laugh', test: () => prefs.ha > prefs.total * 0.7 },
|
| 805 |
+
{ key: 'tough_crowd', label: 'Tough Crowd', test: () => prefs.nah > prefs.total * 0.6 },
|
| 806 |
+
{ key: 'dark_fan', label: 'Dark Side', test: () => (prefs.topics['left_wing']||0) + (prefs.topics['right_wing']||0) > 2 },
|
| 807 |
+
{ key: 'clean_machine', label: 'Clean Machine', test: () => (prefs.topics['apolitical']||0) > prefs.total * 0.5 },
|
| 808 |
+
{ key: 'streaker', label: 'On a Roll', test: () => prefs.streakMax >= 3 },
|
| 809 |
+
];
|
| 810 |
+
|
| 811 |
+
// ===== PROGRESS =====
|
| 812 |
+
const progressEl = document.getElementById('progress');
|
| 813 |
+
for (let i = 0; i < GATE_AT; i++) {
|
| 814 |
+
const dot = document.createElement('div');
|
| 815 |
+
dot.className = 'progress-dot';
|
| 816 |
+
dot.id = 'dot-' + i;
|
| 817 |
+
progressEl.appendChild(dot);
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
const proofBase = 2400 + Math.floor(Math.random() * 900);
|
| 821 |
+
document.getElementById('proofCount').textContent = proofBase.toLocaleString();
|
| 822 |
+
|
| 823 |
+
if (isSignedUp()) {
|
| 824 |
+
document.getElementById('progress').style.display = 'none';
|
| 825 |
+
document.getElementById('dnaBar').style.display = 'none';
|
| 826 |
+
document.getElementById('socialProof').style.display = 'none';
|
| 827 |
+
}
|
| 828 |
+
|
| 829 |
+
function updateProgress() {
|
| 830 |
+
if (isSignedUp()) return;
|
| 831 |
+
for (let i = 0; i < GATE_AT; i++) {
|
| 832 |
+
const dot = document.getElementById('dot-' + i);
|
| 833 |
+
if (i < jokeCount) {
|
| 834 |
+
const r = reactions[i];
|
| 835 |
+
dot.className = r && r.reaction === 'nah' ? 'progress-dot done meh' : 'progress-dot done laugh';
|
| 836 |
+
} else if (i === jokeCount) {
|
| 837 |
+
dot.className = 'progress-dot current';
|
| 838 |
+
} else {
|
| 839 |
+
dot.className = 'progress-dot';
|
| 840 |
+
}
|
| 841 |
+
}
|
| 842 |
+
}
|
| 843 |
+
|
| 844 |
+
function updateCounter() {
|
| 845 |
+
if (isSignedUp()) return;
|
| 846 |
+
if (jokeCount > 0 && jokeCount < GATE_AT) {
|
| 847 |
+
document.getElementById('counter').textContent = jokeCount + ' of ' + GATE_AT;
|
| 848 |
+
} else {
|
| 849 |
+
document.getElementById('counter').textContent = '';
|
| 850 |
+
}
|
| 851 |
+
}
|
| 852 |
+
|
| 853 |
+
function updateDnaTags() {
|
| 854 |
+
const container = document.getElementById('dnaTags');
|
| 855 |
+
container.innerHTML = '';
|
| 856 |
+
DNA_STYLES.forEach(s => {
|
| 857 |
+
if (s.test()) {
|
| 858 |
+
const tag = document.createElement('span');
|
| 859 |
+
tag.className = 'dna-tag active';
|
| 860 |
+
tag.textContent = s.label;
|
| 861 |
+
container.appendChild(tag);
|
| 862 |
+
}
|
| 863 |
+
});
|
| 864 |
+
}
|
| 865 |
+
|
| 866 |
+
function showStreak(msg) {
|
| 867 |
+
const el = document.getElementById('streakNotif');
|
| 868 |
+
el.textContent = msg;
|
| 869 |
+
el.classList.add('show');
|
| 870 |
+
setTimeout(() => el.classList.remove('show'), 1800);
|
| 871 |
+
}
|
| 872 |
+
|
| 873 |
+
// ===== FREEZE =====
|
| 874 |
+
function getFreezeSeconds() {
|
| 875 |
+
if (isSignedUp()) return 0;
|
| 876 |
+
if (jokeCount < GATE_AT) return 0;
|
| 877 |
+
return Math.min(FREEZE_BASE + (postGateCount * FREEZE_INCREMENT), FREEZE_MAX);
|
| 878 |
+
}
|
| 879 |
+
|
| 880 |
+
function runFreeze() {
|
| 881 |
+
return new Promise(resolve => {
|
| 882 |
+
const seconds = getFreezeSeconds();
|
| 883 |
+
if (seconds <= 0) { resolve(); return; }
|
| 884 |
+
const overlay = document.getElementById('freezeOverlay');
|
| 885 |
+
const timerEl = document.getElementById('freezeTimer');
|
| 886 |
+
overlay.classList.add('active');
|
| 887 |
+
let remaining = seconds;
|
| 888 |
+
timerEl.textContent = remaining;
|
| 889 |
+
const interval = setInterval(() => {
|
| 890 |
+
remaining--;
|
| 891 |
+
if (remaining <= 0) {
|
| 892 |
+
clearInterval(interval);
|
| 893 |
+
overlay.classList.remove('active');
|
| 894 |
+
resolve();
|
| 895 |
+
} else {
|
| 896 |
+
timerEl.textContent = remaining;
|
| 897 |
+
}
|
| 898 |
+
}, 1000);
|
| 899 |
+
});
|
| 900 |
+
}
|
| 901 |
+
|
| 902 |
+
function scrollToNudge() {
|
| 903 |
+
document.getElementById('nudgeEmail').focus();
|
| 904 |
+
document.getElementById('freezeOverlay').classList.remove('active');
|
| 905 |
+
}
|
| 906 |
+
|
| 907 |
+
// ===== LOAD JOKE =====
|
| 908 |
+
async function loadJoke() {
|
| 909 |
+
document.getElementById('shareBtn').classList.remove('show');
|
| 910 |
+
document.getElementById('jokeImg').classList.remove('show');
|
| 911 |
+
|
| 912 |
+
await runFreeze();
|
| 913 |
+
|
| 914 |
+
try {
|
| 915 |
+
const params = new URLSearchParams();
|
| 916 |
+
if (currentMood !== 'any') params.set('mood', currentMood);
|
| 917 |
+
params.set('pool', currentPool);
|
| 918 |
+
const qs = params.toString();
|
| 919 |
+
const jokeUrl = '/api/tombola/joke' + (qs ? '?' + qs : '');
|
| 920 |
+
|
| 921 |
+
const resp = await fetch(jokeUrl);
|
| 922 |
+
const data = await resp.json();
|
| 923 |
+
|
| 924 |
+
if (data.error) {
|
| 925 |
+
document.getElementById('jokeText').textContent = 'No more jokes right now. Check back soon.';
|
| 926 |
+
document.getElementById('jokeSource').textContent = '';
|
| 927 |
+
return;
|
| 928 |
+
}
|
| 929 |
+
|
| 930 |
+
// Avoid repeats
|
| 931 |
+
if (seenIds.has(data.id)) {
|
| 932 |
+
const resp2 = await fetch(jokeUrl);
|
| 933 |
+
const data2 = await resp2.json();
|
| 934 |
+
if (!data2.error && !seenIds.has(data2.id)) {
|
| 935 |
+
currentJoke = data2;
|
| 936 |
+
seenIds.add(data2.id);
|
| 937 |
+
} else {
|
| 938 |
+
currentJoke = data;
|
| 939 |
+
seenIds.add(data.id);
|
| 940 |
+
}
|
| 941 |
+
} else {
|
| 942 |
+
currentJoke = data;
|
| 943 |
+
seenIds.add(data.id);
|
| 944 |
+
}
|
| 945 |
+
|
| 946 |
+
let text = currentJoke.text;
|
| 947 |
+
text = text.replace(/'/g, "'")
|
| 948 |
+
.replace(/&/g, "&")
|
| 949 |
+
.replace(/</g, "<")
|
| 950 |
+
.replace(/>/g, ">")
|
| 951 |
+
.replace(/"/g, '"')
|
| 952 |
+
.replace(/<br\s*\/?>/g, "\n");
|
| 953 |
+
|
| 954 |
+
const imgEl = document.getElementById('jokeImg');
|
| 955 |
+
if (currentJoke.image_url) {
|
| 956 |
+
imgEl.src = currentJoke.image_url;
|
| 957 |
+
imgEl.classList.add('show');
|
| 958 |
+
} else {
|
| 959 |
+
imgEl.classList.remove('show');
|
| 960 |
+
imgEl.src = '';
|
| 961 |
+
}
|
| 962 |
+
|
| 963 |
+
document.getElementById('jokeText').textContent = text;
|
| 964 |
+
document.getElementById('jokeSource').textContent =
|
| 965 |
+
currentJoke.source ? '\u2014 ' + currentJoke.source : '';
|
| 966 |
+
} catch (e) {
|
| 967 |
+
document.getElementById('jokeText').textContent = 'Something went wrong. Try again.';
|
| 968 |
+
}
|
| 969 |
+
|
| 970 |
+
// Fade-in new card
|
| 971 |
+
resetCard();
|
| 972 |
+
cardEl.style.opacity = '0';
|
| 973 |
+
cardEl.style.transform = 'translateY(8px)';
|
| 974 |
+
requestAnimationFrame(() => {
|
| 975 |
+
cardEl.style.transition = 'opacity 0.25s, transform 0.25s';
|
| 976 |
+
cardEl.style.opacity = '1';
|
| 977 |
+
cardEl.style.transform = 'translateY(0)';
|
| 978 |
+
setTimeout(() => { cardEl.style.transition = ''; }, 300);
|
| 979 |
+
});
|
| 980 |
+
}
|
| 981 |
+
|
| 982 |
+
// ===== REACT =====
|
| 983 |
+
function react(reaction) {
|
| 984 |
+
if (gated) return;
|
| 985 |
+
|
| 986 |
+
prefs[reaction] = (prefs[reaction] || 0) + 1;
|
| 987 |
+
prefs.total++;
|
| 988 |
+
|
| 989 |
+
if (currentJoke && currentJoke.topic) {
|
| 990 |
+
prefs.topics[currentJoke.topic] = (prefs.topics[currentJoke.topic] || 0) + 1;
|
| 991 |
+
}
|
| 992 |
+
|
| 993 |
+
reactions.push({
|
| 994 |
+
id: currentJoke ? currentJoke.id : null,
|
| 995 |
+
topic: currentJoke ? currentJoke.topic : null,
|
| 996 |
+
source: currentJoke ? currentJoke.source : null,
|
| 997 |
+
reaction: reaction,
|
| 998 |
+
});
|
| 999 |
+
|
| 1000 |
+
if (reaction === 'ha') {
|
| 1001 |
+
laughStreak++;
|
| 1002 |
+
if (laughStreak > prefs.streakMax) prefs.streakMax = laughStreak;
|
| 1003 |
+
if (laughStreak === 3) showStreak('3 in a row!');
|
| 1004 |
+
else if (laughStreak === 5) showStreak('5 streak!');
|
| 1005 |
+
else if (laughStreak === 7) showStreak('Unstoppable!');
|
| 1006 |
+
document.getElementById('shareBtn').classList.add('show');
|
| 1007 |
+
} else {
|
| 1008 |
+
laughStreak = 0;
|
| 1009 |
+
}
|
| 1010 |
+
|
| 1011 |
+
jokeCount++;
|
| 1012 |
+
updateProgress();
|
| 1013 |
+
updateCounter();
|
| 1014 |
+
updateDnaTags();
|
| 1015 |
+
|
| 1016 |
+
if (jokeCount >= GATE_AT && !gated && !isSignedUp()) {
|
| 1017 |
+
gated = true;
|
| 1018 |
+
setTimeout(showGate, 400);
|
| 1019 |
+
return;
|
| 1020 |
+
}
|
| 1021 |
+
|
| 1022 |
+
if (jokeCount > GATE_AT && !isSignedUp()) {
|
| 1023 |
+
postGateCount++;
|
| 1024 |
+
}
|
| 1025 |
+
|
| 1026 |
+
loadJoke();
|
| 1027 |
+
}
|
| 1028 |
+
|
| 1029 |
+
// ===== DNA PROFILE =====
|
| 1030 |
+
function buildDnaProfile() {
|
| 1031 |
+
const container = document.getElementById('dnaProfile');
|
| 1032 |
+
const total = prefs.total || 1;
|
| 1033 |
+
const laughs = prefs.ha || 0;
|
| 1034 |
+
const hitRate = Math.round((laughs / total) * 100);
|
| 1035 |
+
|
| 1036 |
+
let type = 'The Generalist';
|
| 1037 |
+
if (hitRate > 80) type = 'The Easy Laugh';
|
| 1038 |
+
else if (hitRate < 30) type = 'The Critic';
|
| 1039 |
+
else if (prefs.nah > total * 0.5) type = 'The Tough Crowd';
|
| 1040 |
+
|
| 1041 |
+
const activeTags = DNA_STYLES.filter(s => s.test()).map(s => s.label);
|
| 1042 |
+
|
| 1043 |
+
let html = '<h3>Comedy Profile</h3>';
|
| 1044 |
+
html += '<div class="dna-row"><span class="dna-row-label">Type</span><span class="dna-row-value">' + type + '</span></div>';
|
| 1045 |
+
html += '<div class="dna-row"><span class="dna-row-label">Hit rate</span>';
|
| 1046 |
+
html += '<div class="dna-row-bar"><div class="dna-row-fill" style="width:' + hitRate + '%;background:var(--ha)"></div></div>';
|
| 1047 |
+
html += '<span class="dna-row-value">' + hitRate + '%</span></div>';
|
| 1048 |
+
html += '<div class="dna-row"><span class="dna-row-label">Laughed at</span><span class="dna-row-value">' + laughs + '/' + total + '</span></div>';
|
| 1049 |
+
if (prefs.streakMax >= 2) {
|
| 1050 |
+
html += '<div class="dna-row"><span class="dna-row-label">Best streak</span><span class="dna-row-value">' + prefs.streakMax + ' in a row</span></div>';
|
| 1051 |
+
}
|
| 1052 |
+
if (activeTags.length > 0) {
|
| 1053 |
+
html += '<div class="dna-row" style="margin-top:0.3rem;flex-wrap:wrap;gap:0.25rem">';
|
| 1054 |
+
activeTags.forEach(t => {
|
| 1055 |
+
html += '<span style="font-size:0.65rem;padding:0.12rem 0.4rem;border-radius:10px;border:1px solid var(--accent);color:var(--accent)">' + t + '</span>';
|
| 1056 |
+
});
|
| 1057 |
+
html += '</div>';
|
| 1058 |
+
}
|
| 1059 |
+
container.innerHTML = html;
|
| 1060 |
+
}
|
| 1061 |
+
|
| 1062 |
+
// ===== GATE =====
|
| 1063 |
+
function showGate() {
|
| 1064 |
+
document.getElementById('stage').style.display = 'none';
|
| 1065 |
+
document.getElementById('dnaBar').style.display = 'none';
|
| 1066 |
+
buildDnaProfile();
|
| 1067 |
+
document.getElementById('gate').classList.add('visible');
|
| 1068 |
+
setTimeout(() => {
|
| 1069 |
+
document.getElementById('skipLink').classList.add('show');
|
| 1070 |
+
}, 10000);
|
| 1071 |
+
}
|
| 1072 |
+
|
| 1073 |
+
// ===== SIGNUP =====
|
| 1074 |
+
async function doSignup(email) {
|
| 1075 |
+
saveUser({ email: email, signedUpAt: Date.now() });
|
| 1076 |
+
try {
|
| 1077 |
+
await fetch('/api/tombola/signup', {
|
| 1078 |
+
method: 'POST',
|
| 1079 |
+
headers: { 'Content-Type': 'application/json' },
|
| 1080 |
+
body: JSON.stringify({ email, preferences: prefs, reactions }),
|
| 1081 |
+
});
|
| 1082 |
+
} catch (err) { /* best effort */ }
|
| 1083 |
+
}
|
| 1084 |
+
|
| 1085 |
+
async function signup(e) {
|
| 1086 |
+
e.preventDefault();
|
| 1087 |
+
const email = document.getElementById('signupEmail').value;
|
| 1088 |
+
await doSignup(email);
|
| 1089 |
+
document.getElementById('gate').classList.remove('visible');
|
| 1090 |
+
document.getElementById('unlocked').classList.add('visible');
|
| 1091 |
+
setTimeout(() => {
|
| 1092 |
+
document.getElementById('unlocked').classList.remove('visible');
|
| 1093 |
+
unlockTombola();
|
| 1094 |
+
}, 2500);
|
| 1095 |
+
}
|
| 1096 |
+
|
| 1097 |
+
async function nudgeSignup() {
|
| 1098 |
+
const email = document.getElementById('nudgeEmail').value;
|
| 1099 |
+
if (!email || !email.includes('@')) return;
|
| 1100 |
+
await doSignup(email);
|
| 1101 |
+
document.getElementById('nudgeBar').classList.remove('show');
|
| 1102 |
+
showStreak('Unlimited mode!');
|
| 1103 |
+
}
|
| 1104 |
+
|
| 1105 |
+
function skipGate() {
|
| 1106 |
+
document.getElementById('gate').classList.remove('visible');
|
| 1107 |
+
unlockTombola();
|
| 1108 |
+
}
|
| 1109 |
+
|
| 1110 |
+
function unlockTombola() {
|
| 1111 |
+
gated = false;
|
| 1112 |
+
document.getElementById('stage').style.display = 'block';
|
| 1113 |
+
document.getElementById('progress').style.display = 'none';
|
| 1114 |
+
document.getElementById('dnaBar').style.display = 'none';
|
| 1115 |
+
document.getElementById('socialProof').style.display = 'none';
|
| 1116 |
+
document.getElementById('counter').textContent = '';
|
| 1117 |
+
if (!isSignedUp()) {
|
| 1118 |
+
document.getElementById('nudgeBar').classList.add('show');
|
| 1119 |
+
}
|
| 1120 |
+
loadJoke();
|
| 1121 |
+
}
|
| 1122 |
+
|
| 1123 |
+
// ===== MODE / MOOD =====
|
| 1124 |
+
function setPool(pool) {
|
| 1125 |
+
currentPool = pool;
|
| 1126 |
+
document.querySelectorAll('.mode-pill').forEach(btn => {
|
| 1127 |
+
btn.classList.toggle('active', btn.dataset.pool === pool);
|
| 1128 |
+
});
|
| 1129 |
+
// Show mood bar only for Fresh
|
| 1130 |
+
const moodBar = document.getElementById('moodBar');
|
| 1131 |
+
if (pool === 'fresh') {
|
| 1132 |
+
moodBar.classList.add('show');
|
| 1133 |
+
} else {
|
| 1134 |
+
moodBar.classList.remove('show');
|
| 1135 |
+
currentMood = 'any';
|
| 1136 |
+
document.querySelectorAll('.mood-pill').forEach(btn => {
|
| 1137 |
+
btn.classList.toggle('active', btn.dataset.mood === 'any');
|
| 1138 |
+
});
|
| 1139 |
+
}
|
| 1140 |
+
loadJoke();
|
| 1141 |
+
}
|
| 1142 |
+
|
| 1143 |
+
function setMood(mood) {
|
| 1144 |
+
currentMood = mood;
|
| 1145 |
+
document.querySelectorAll('.mood-pill').forEach(btn => {
|
| 1146 |
+
btn.classList.toggle('active', btn.dataset.mood === mood);
|
| 1147 |
+
});
|
| 1148 |
+
loadJoke();
|
| 1149 |
+
}
|
| 1150 |
+
|
| 1151 |
+
// ===== SHARE =====
|
| 1152 |
+
function shareJoke() {
|
| 1153 |
+
if (!currentJoke) return;
|
| 1154 |
+
const text = currentJoke.text;
|
| 1155 |
+
if (navigator.share) {
|
| 1156 |
+
navigator.share({ text: text + '\n\nvia The Daily Snorter', url: window.location.href });
|
| 1157 |
+
} else {
|
| 1158 |
+
navigator.clipboard.writeText(text).then(() => {
|
| 1159 |
+
const btn = document.getElementById('shareBtn');
|
| 1160 |
+
btn.textContent = 'Copied!';
|
| 1161 |
+
setTimeout(() => { btn.textContent = 'Share this one'; }, 1500);
|
| 1162 |
+
});
|
| 1163 |
+
}
|
| 1164 |
+
}
|
| 1165 |
+
|
| 1166 |
+
// ===== INIT =====
|
| 1167 |
+
updateProgress();
|
| 1168 |
+
loadJoke();
|
| 1169 |
+
initSwipe();
|
| 1170 |
+
</script>
|
| 1171 |
+
</body>
|
| 1172 |
+
</html>
|
templates/worldcup.html
ADDED
|
@@ -0,0 +1,818 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 6 |
+
<title>Joke World Cup — The Daily Snorter</title>
|
| 7 |
+
<style>
|
| 8 |
+
:root {
|
| 9 |
+
--bg: #0f0f0f;
|
| 10 |
+
--card: #1a1a1a;
|
| 11 |
+
--border: #2a2a2a;
|
| 12 |
+
--text: #e0e0e0;
|
| 13 |
+
--muted: #888;
|
| 14 |
+
--accent: #f5c842;
|
| 15 |
+
--ha: #4ade80;
|
| 16 |
+
--nah: #ef4444;
|
| 17 |
+
--streak: #c084fc;
|
| 18 |
+
--winner: #f5c842;
|
| 19 |
+
}
|
| 20 |
+
* { margin: 0; padding: 0; box-sizing: border-box; }
|
| 21 |
+
body {
|
| 22 |
+
font-family: 'Georgia', serif;
|
| 23 |
+
background: var(--bg);
|
| 24 |
+
color: var(--text);
|
| 25 |
+
min-height: 100vh;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
/* Header */
|
| 29 |
+
.header {
|
| 30 |
+
text-align: center;
|
| 31 |
+
padding: 1.2rem 1rem 0.5rem;
|
| 32 |
+
}
|
| 33 |
+
.header h1 {
|
| 34 |
+
font-size: 2rem;
|
| 35 |
+
color: var(--accent);
|
| 36 |
+
letter-spacing: 0.05em;
|
| 37 |
+
}
|
| 38 |
+
.header .subtitle {
|
| 39 |
+
color: var(--muted);
|
| 40 |
+
font-size: 0.85rem;
|
| 41 |
+
margin-top: 0.2rem;
|
| 42 |
+
font-style: italic;
|
| 43 |
+
}
|
| 44 |
+
.header .tournament-name {
|
| 45 |
+
color: var(--streak);
|
| 46 |
+
font-size: 0.95rem;
|
| 47 |
+
margin-top: 0.4rem;
|
| 48 |
+
font-weight: bold;
|
| 49 |
+
}
|
| 50 |
+
.header .round-info {
|
| 51 |
+
color: var(--muted);
|
| 52 |
+
font-size: 0.8rem;
|
| 53 |
+
margin-top: 0.2rem;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/* Tab bar */
|
| 57 |
+
.tabs {
|
| 58 |
+
display: flex;
|
| 59 |
+
justify-content: center;
|
| 60 |
+
gap: 0.3rem;
|
| 61 |
+
padding: 0.8rem 1rem;
|
| 62 |
+
position: sticky;
|
| 63 |
+
top: 0;
|
| 64 |
+
background: var(--bg);
|
| 65 |
+
z-index: 10;
|
| 66 |
+
}
|
| 67 |
+
.tab-btn {
|
| 68 |
+
background: var(--card);
|
| 69 |
+
border: 1px solid var(--border);
|
| 70 |
+
color: var(--muted);
|
| 71 |
+
font-size: 0.85rem;
|
| 72 |
+
padding: 0.5rem 1.2rem;
|
| 73 |
+
border-radius: 20px;
|
| 74 |
+
cursor: pointer;
|
| 75 |
+
transition: all 0.15s;
|
| 76 |
+
font-family: inherit;
|
| 77 |
+
}
|
| 78 |
+
.tab-btn:hover, .tab-btn.active {
|
| 79 |
+
border-color: var(--accent);
|
| 80 |
+
color: var(--accent);
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
/* ===== VOTE VIEW ===== */
|
| 84 |
+
.vote-view { max-width: 700px; margin: 0 auto; padding: 0 1rem; }
|
| 85 |
+
|
| 86 |
+
.matchup-container {
|
| 87 |
+
display: grid;
|
| 88 |
+
grid-template-columns: 1fr 1fr;
|
| 89 |
+
gap: 1rem;
|
| 90 |
+
margin: 1rem 0;
|
| 91 |
+
}
|
| 92 |
+
@media (max-width: 500px) {
|
| 93 |
+
.matchup-container { grid-template-columns: 1fr; }
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
.joke-card {
|
| 97 |
+
background: var(--card);
|
| 98 |
+
border: 2px solid var(--border);
|
| 99 |
+
border-radius: 12px;
|
| 100 |
+
padding: 1.5rem;
|
| 101 |
+
cursor: pointer;
|
| 102 |
+
transition: all 0.2s;
|
| 103 |
+
display: flex;
|
| 104 |
+
flex-direction: column;
|
| 105 |
+
min-height: 180px;
|
| 106 |
+
position: relative;
|
| 107 |
+
}
|
| 108 |
+
.joke-card:hover {
|
| 109 |
+
border-color: var(--accent);
|
| 110 |
+
transform: translateY(-2px);
|
| 111 |
+
}
|
| 112 |
+
.joke-card .joke-text {
|
| 113 |
+
font-size: 1.05rem;
|
| 114 |
+
line-height: 1.6;
|
| 115 |
+
flex: 1;
|
| 116 |
+
white-space: pre-line;
|
| 117 |
+
}
|
| 118 |
+
.joke-card .joke-author {
|
| 119 |
+
color: var(--muted);
|
| 120 |
+
font-size: 0.75rem;
|
| 121 |
+
margin-top: 0.8rem;
|
| 122 |
+
text-align: right;
|
| 123 |
+
}
|
| 124 |
+
.joke-card .pick-label {
|
| 125 |
+
position: absolute;
|
| 126 |
+
top: 0.6rem;
|
| 127 |
+
right: 0.6rem;
|
| 128 |
+
font-size: 0.65rem;
|
| 129 |
+
color: var(--muted);
|
| 130 |
+
text-transform: uppercase;
|
| 131 |
+
letter-spacing: 0.05em;
|
| 132 |
+
opacity: 0;
|
| 133 |
+
transition: opacity 0.15s;
|
| 134 |
+
}
|
| 135 |
+
.joke-card:hover .pick-label { opacity: 1; }
|
| 136 |
+
|
| 137 |
+
/* Vote result state */
|
| 138 |
+
.joke-card.winner {
|
| 139 |
+
border-color: var(--winner);
|
| 140 |
+
background: #1a1a0f;
|
| 141 |
+
}
|
| 142 |
+
.joke-card.loser {
|
| 143 |
+
opacity: 0.5;
|
| 144 |
+
border-color: var(--border);
|
| 145 |
+
}
|
| 146 |
+
.joke-card.voted {
|
| 147 |
+
cursor: default;
|
| 148 |
+
transform: none;
|
| 149 |
+
}
|
| 150 |
+
.joke-card.voted:hover {
|
| 151 |
+
transform: none;
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
.vote-bar {
|
| 155 |
+
display: flex;
|
| 156 |
+
height: 6px;
|
| 157 |
+
border-radius: 3px;
|
| 158 |
+
overflow: hidden;
|
| 159 |
+
margin-top: 0.6rem;
|
| 160 |
+
background: var(--border);
|
| 161 |
+
opacity: 0;
|
| 162 |
+
transition: opacity 0.3s;
|
| 163 |
+
}
|
| 164 |
+
.vote-bar.show { opacity: 1; }
|
| 165 |
+
.vote-bar .bar-a {
|
| 166 |
+
background: var(--accent);
|
| 167 |
+
transition: width 0.5s;
|
| 168 |
+
}
|
| 169 |
+
.vote-bar .bar-b {
|
| 170 |
+
background: var(--streak);
|
| 171 |
+
transition: width 0.5s;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
.vote-counts {
|
| 175 |
+
display: flex;
|
| 176 |
+
justify-content: space-between;
|
| 177 |
+
font-size: 0.7rem;
|
| 178 |
+
color: var(--muted);
|
| 179 |
+
margin-top: 0.3rem;
|
| 180 |
+
opacity: 0;
|
| 181 |
+
transition: opacity 0.3s;
|
| 182 |
+
}
|
| 183 |
+
.vote-counts.show { opacity: 1; }
|
| 184 |
+
|
| 185 |
+
.vs-badge {
|
| 186 |
+
display: none;
|
| 187 |
+
align-items: center;
|
| 188 |
+
justify-content: center;
|
| 189 |
+
font-size: 0.85rem;
|
| 190 |
+
color: var(--muted);
|
| 191 |
+
font-weight: bold;
|
| 192 |
+
}
|
| 193 |
+
@media (min-width: 501px) {
|
| 194 |
+
.vs-badge { display: none; } /* handled via CSS gap */
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
.vote-status {
|
| 198 |
+
text-align: center;
|
| 199 |
+
color: var(--muted);
|
| 200 |
+
font-size: 0.8rem;
|
| 201 |
+
padding: 1rem;
|
| 202 |
+
}
|
| 203 |
+
.vote-status .stat { color: var(--accent); }
|
| 204 |
+
|
| 205 |
+
.no-tournament {
|
| 206 |
+
text-align: center;
|
| 207 |
+
padding: 3rem 1rem;
|
| 208 |
+
color: var(--muted);
|
| 209 |
+
}
|
| 210 |
+
.no-tournament h2 {
|
| 211 |
+
color: var(--accent);
|
| 212 |
+
margin-bottom: 0.5rem;
|
| 213 |
+
font-size: 1.4rem;
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
/* ===== BRACKET VIEW ===== */
|
| 217 |
+
.bracket-view {
|
| 218 |
+
max-width: 900px;
|
| 219 |
+
margin: 0 auto;
|
| 220 |
+
padding: 0 1rem 2rem;
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
.round-section {
|
| 224 |
+
margin-bottom: 1.5rem;
|
| 225 |
+
}
|
| 226 |
+
.round-header {
|
| 227 |
+
font-size: 1rem;
|
| 228 |
+
color: var(--accent);
|
| 229 |
+
padding: 0.5rem 0;
|
| 230 |
+
border-bottom: 1px solid var(--border);
|
| 231 |
+
margin-bottom: 0.6rem;
|
| 232 |
+
}
|
| 233 |
+
.round-matchups {
|
| 234 |
+
display: grid;
|
| 235 |
+
grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
|
| 236 |
+
gap: 0.6rem;
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
.bracket-matchup {
|
| 240 |
+
background: var(--card);
|
| 241 |
+
border: 1px solid var(--border);
|
| 242 |
+
border-radius: 8px;
|
| 243 |
+
padding: 0.6rem 0.8rem;
|
| 244 |
+
font-size: 0.8rem;
|
| 245 |
+
}
|
| 246 |
+
.bracket-entry {
|
| 247 |
+
display: flex;
|
| 248 |
+
justify-content: space-between;
|
| 249 |
+
align-items: center;
|
| 250 |
+
padding: 0.3rem 0;
|
| 251 |
+
color: var(--muted);
|
| 252 |
+
}
|
| 253 |
+
.bracket-entry .entry-text {
|
| 254 |
+
flex: 1;
|
| 255 |
+
overflow: hidden;
|
| 256 |
+
text-overflow: ellipsis;
|
| 257 |
+
white-space: nowrap;
|
| 258 |
+
margin-right: 0.5rem;
|
| 259 |
+
}
|
| 260 |
+
.bracket-entry .entry-votes {
|
| 261 |
+
font-size: 0.7rem;
|
| 262 |
+
min-width: 30px;
|
| 263 |
+
text-align: right;
|
| 264 |
+
}
|
| 265 |
+
.bracket-entry.is-winner {
|
| 266 |
+
color: var(--accent);
|
| 267 |
+
font-weight: bold;
|
| 268 |
+
}
|
| 269 |
+
.bracket-entry.is-loser {
|
| 270 |
+
opacity: 0.5;
|
| 271 |
+
}
|
| 272 |
+
.bracket-divider {
|
| 273 |
+
height: 1px;
|
| 274 |
+
background: var(--border);
|
| 275 |
+
margin: 0.1rem 0;
|
| 276 |
+
}
|
| 277 |
+
.bracket-matchup.active {
|
| 278 |
+
border-color: var(--ha);
|
| 279 |
+
}
|
| 280 |
+
.bracket-matchup.pending {
|
| 281 |
+
opacity: 0.5;
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
/* ===== LEADERBOARD VIEW ===== */
|
| 285 |
+
.leaderboard-view {
|
| 286 |
+
max-width: 700px;
|
| 287 |
+
margin: 0 auto;
|
| 288 |
+
padding: 0 1rem 2rem;
|
| 289 |
+
}
|
| 290 |
+
.leaderboard-table {
|
| 291 |
+
width: 100%;
|
| 292 |
+
border-collapse: collapse;
|
| 293 |
+
}
|
| 294 |
+
.leaderboard-table th {
|
| 295 |
+
text-align: left;
|
| 296 |
+
color: var(--muted);
|
| 297 |
+
font-size: 0.7rem;
|
| 298 |
+
text-transform: uppercase;
|
| 299 |
+
letter-spacing: 0.05em;
|
| 300 |
+
padding: 0.5rem 0.4rem;
|
| 301 |
+
border-bottom: 1px solid var(--border);
|
| 302 |
+
}
|
| 303 |
+
.leaderboard-table td {
|
| 304 |
+
padding: 0.6rem 0.4rem;
|
| 305 |
+
border-bottom: 1px solid var(--border);
|
| 306 |
+
font-size: 0.85rem;
|
| 307 |
+
vertical-align: top;
|
| 308 |
+
}
|
| 309 |
+
.leaderboard-table tr:hover {
|
| 310 |
+
background: var(--card);
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
.lb-rank {
|
| 314 |
+
font-weight: bold;
|
| 315 |
+
color: var(--accent);
|
| 316 |
+
width: 2rem;
|
| 317 |
+
text-align: center;
|
| 318 |
+
}
|
| 319 |
+
.lb-rank.gold { color: #f5c842; }
|
| 320 |
+
.lb-rank.silver { color: #c0c0c0; }
|
| 321 |
+
.lb-rank.bronze { color: #cd7f32; }
|
| 322 |
+
|
| 323 |
+
.lb-joke {
|
| 324 |
+
max-width: 350px;
|
| 325 |
+
line-height: 1.4;
|
| 326 |
+
}
|
| 327 |
+
.lb-joke .joke-preview {
|
| 328 |
+
display: -webkit-box;
|
| 329 |
+
-webkit-line-clamp: 2;
|
| 330 |
+
-webkit-box-orient: vertical;
|
| 331 |
+
overflow: hidden;
|
| 332 |
+
}
|
| 333 |
+
.lb-author {
|
| 334 |
+
color: var(--muted);
|
| 335 |
+
font-size: 0.7rem;
|
| 336 |
+
margin-top: 0.2rem;
|
| 337 |
+
}
|
| 338 |
+
.lb-elo {
|
| 339 |
+
font-weight: bold;
|
| 340 |
+
color: var(--text);
|
| 341 |
+
white-space: nowrap;
|
| 342 |
+
}
|
| 343 |
+
.lb-record {
|
| 344 |
+
color: var(--muted);
|
| 345 |
+
font-size: 0.8rem;
|
| 346 |
+
white-space: nowrap;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
.empty-leaderboard {
|
| 350 |
+
text-align: center;
|
| 351 |
+
padding: 3rem 1rem;
|
| 352 |
+
color: var(--muted);
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
/* Back link */
|
| 356 |
+
.back-link {
|
| 357 |
+
display: block;
|
| 358 |
+
text-align: center;
|
| 359 |
+
padding: 1.5rem;
|
| 360 |
+
color: var(--muted);
|
| 361 |
+
font-size: 0.75rem;
|
| 362 |
+
text-decoration: none;
|
| 363 |
+
}
|
| 364 |
+
.back-link:hover { color: var(--accent); }
|
| 365 |
+
|
| 366 |
+
/* View switching */
|
| 367 |
+
.view { display: none; }
|
| 368 |
+
.view.active { display: block; }
|
| 369 |
+
|
| 370 |
+
/* Loading spinner */
|
| 371 |
+
.loading {
|
| 372 |
+
text-align: center;
|
| 373 |
+
padding: 2rem;
|
| 374 |
+
color: var(--muted);
|
| 375 |
+
font-style: italic;
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
/* Admin bar */
|
| 379 |
+
.admin-bar {
|
| 380 |
+
text-align: center;
|
| 381 |
+
padding: 0.8rem;
|
| 382 |
+
border-top: 1px solid var(--border);
|
| 383 |
+
margin-top: 1rem;
|
| 384 |
+
}
|
| 385 |
+
.admin-btn {
|
| 386 |
+
background: var(--card);
|
| 387 |
+
border: 1px solid var(--border);
|
| 388 |
+
color: var(--muted);
|
| 389 |
+
font-size: 0.75rem;
|
| 390 |
+
padding: 0.4rem 1rem;
|
| 391 |
+
border-radius: 6px;
|
| 392 |
+
cursor: pointer;
|
| 393 |
+
font-family: inherit;
|
| 394 |
+
margin: 0 0.2rem;
|
| 395 |
+
}
|
| 396 |
+
.admin-btn:hover { color: var(--accent); border-color: var(--accent); }
|
| 397 |
+
|
| 398 |
+
/* Pool filter bar for tournament creation */
|
| 399 |
+
.create-options {
|
| 400 |
+
display: none;
|
| 401 |
+
margin-top: 0.6rem;
|
| 402 |
+
gap: 0.4rem;
|
| 403 |
+
justify-content: center;
|
| 404 |
+
flex-wrap: wrap;
|
| 405 |
+
}
|
| 406 |
+
.create-options.show { display: flex; }
|
| 407 |
+
</style>
|
| 408 |
+
</head>
|
| 409 |
+
<body>
|
| 410 |
+
|
| 411 |
+
<div class="header">
|
| 412 |
+
<h1>Joke World Cup</h1>
|
| 413 |
+
<p class="subtitle">Head-to-head. You pick the winner.</p>
|
| 414 |
+
<div class="tournament-name" id="tournamentName"></div>
|
| 415 |
+
<div class="round-info" id="roundInfo"></div>
|
| 416 |
+
</div>
|
| 417 |
+
|
| 418 |
+
<div class="tabs">
|
| 419 |
+
<button class="tab-btn active" data-tab="vote" onclick="switchTab('vote')">Vote</button>
|
| 420 |
+
<button class="tab-btn" data-tab="bracket" onclick="switchTab('bracket')">Bracket</button>
|
| 421 |
+
<button class="tab-btn" data-tab="leaderboard" onclick="switchTab('leaderboard')">Leaderboard</button>
|
| 422 |
+
</div>
|
| 423 |
+
|
| 424 |
+
<!-- ===== VOTE VIEW ===== -->
|
| 425 |
+
<div class="view active" id="view-vote">
|
| 426 |
+
<div class="vote-view">
|
| 427 |
+
<div class="loading" id="voteLoading">Loading matchup...</div>
|
| 428 |
+
|
| 429 |
+
<div id="matchupArea" style="display:none">
|
| 430 |
+
<div class="matchup-container" id="matchupCards"></div>
|
| 431 |
+
|
| 432 |
+
<div id="voteResult" style="display:none">
|
| 433 |
+
<div class="vote-bar" id="voteBar">
|
| 434 |
+
<div class="bar-a" id="barA"></div>
|
| 435 |
+
<div class="bar-b" id="barB"></div>
|
| 436 |
+
</div>
|
| 437 |
+
<div class="vote-counts" id="voteCounts"></div>
|
| 438 |
+
</div>
|
| 439 |
+
</div>
|
| 440 |
+
|
| 441 |
+
<div class="vote-status" id="voteStatus"></div>
|
| 442 |
+
|
| 443 |
+
<div class="no-tournament" id="noTournament" style="display:none">
|
| 444 |
+
<h2>No active tournament</h2>
|
| 445 |
+
<p>Check back soon, or start one below.</p>
|
| 446 |
+
</div>
|
| 447 |
+
</div>
|
| 448 |
+
</div>
|
| 449 |
+
|
| 450 |
+
<!-- ===== BRACKET VIEW ===== -->
|
| 451 |
+
<div class="view" id="view-bracket">
|
| 452 |
+
<div class="bracket-view">
|
| 453 |
+
<div class="loading" id="bracketLoading">Loading bracket...</div>
|
| 454 |
+
<div id="bracketContent"></div>
|
| 455 |
+
</div>
|
| 456 |
+
</div>
|
| 457 |
+
|
| 458 |
+
<!-- ===== LEADERBOARD VIEW ===== -->
|
| 459 |
+
<div class="view" id="view-leaderboard">
|
| 460 |
+
<div class="leaderboard-view">
|
| 461 |
+
<div class="loading" id="lbLoading">Loading leaderboard...</div>
|
| 462 |
+
<div id="lbContent"></div>
|
| 463 |
+
</div>
|
| 464 |
+
</div>
|
| 465 |
+
|
| 466 |
+
<!-- Admin bar -->
|
| 467 |
+
<div class="admin-bar">
|
| 468 |
+
<button class="admin-btn" onclick="toggleCreateOptions()">New Tournament</button>
|
| 469 |
+
<div class="create-options" id="createOptions">
|
| 470 |
+
<button class="admin-btn" onclick="createTournament('mixed')">Mixed (64)</button>
|
| 471 |
+
<button class="admin-btn" onclick="createTournament('classics')">Classics (64)</button>
|
| 472 |
+
<button class="admin-btn" onclick="createTournament('fresh')">Fresh (64)</button>
|
| 473 |
+
<button class="admin-btn" onclick="createTournament('mixed', 32)">Quick (32)</button>
|
| 474 |
+
</div>
|
| 475 |
+
</div>
|
| 476 |
+
|
| 477 |
+
<a href="/play" class="back-link">Back to the Tombola</a>
|
| 478 |
+
|
| 479 |
+
<script>
|
| 480 |
+
// State
|
| 481 |
+
let currentMatchup = null;
|
| 482 |
+
let votedThisSession = 0;
|
| 483 |
+
let tournamentState = null;
|
| 484 |
+
let voting = false;
|
| 485 |
+
|
| 486 |
+
// ===== TAB SWITCHING =====
|
| 487 |
+
function switchTab(tab) {
|
| 488 |
+
document.querySelectorAll('.tab-btn').forEach(b =>
|
| 489 |
+
b.classList.toggle('active', b.dataset.tab === tab));
|
| 490 |
+
document.querySelectorAll('.view').forEach(v =>
|
| 491 |
+
v.classList.toggle('active', v.id === 'view-' + tab));
|
| 492 |
+
|
| 493 |
+
if (tab === 'bracket') loadBracket();
|
| 494 |
+
if (tab === 'leaderboard') loadLeaderboard();
|
| 495 |
+
}
|
| 496 |
+
|
| 497 |
+
// ===== VOTE VIEW =====
|
| 498 |
+
async function loadMatchup() {
|
| 499 |
+
document.getElementById('voteLoading').style.display = 'block';
|
| 500 |
+
document.getElementById('matchupArea').style.display = 'none';
|
| 501 |
+
document.getElementById('noTournament').style.display = 'none';
|
| 502 |
+
document.getElementById('voteResult').style.display = 'none';
|
| 503 |
+
|
| 504 |
+
try {
|
| 505 |
+
const resp = await fetch('/api/worldcup/matchup');
|
| 506 |
+
const data = await resp.json();
|
| 507 |
+
|
| 508 |
+
document.getElementById('voteLoading').style.display = 'none';
|
| 509 |
+
|
| 510 |
+
if (!data.matchup) {
|
| 511 |
+
if (data.message === 'No active tournament') {
|
| 512 |
+
document.getElementById('noTournament').style.display = 'block';
|
| 513 |
+
} else {
|
| 514 |
+
document.getElementById('voteStatus').innerHTML =
|
| 515 |
+
'<p>' + (data.message || 'No more matchups right now.') + '</p>';
|
| 516 |
+
}
|
| 517 |
+
// Still load tournament info for header
|
| 518 |
+
loadTournamentInfo();
|
| 519 |
+
return;
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
currentMatchup = data.matchup;
|
| 523 |
+
if (data.tournament_name) {
|
| 524 |
+
document.getElementById('tournamentName').textContent = data.tournament_name;
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
renderMatchup(data.matchup);
|
| 528 |
+
document.getElementById('matchupArea').style.display = 'block';
|
| 529 |
+
|
| 530 |
+
} catch (err) {
|
| 531 |
+
document.getElementById('voteLoading').style.display = 'none';
|
| 532 |
+
document.getElementById('voteStatus').innerHTML =
|
| 533 |
+
'<p>Failed to load. <a href="#" onclick="loadMatchup();return false" style="color:var(--accent)">Retry</a></p>';
|
| 534 |
+
}
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
function renderMatchup(matchup) {
|
| 538 |
+
const container = document.getElementById('matchupCards');
|
| 539 |
+
container.innerHTML = '';
|
| 540 |
+
|
| 541 |
+
[matchup.entry_a, matchup.entry_b].forEach((entry, i) => {
|
| 542 |
+
const card = document.createElement('div');
|
| 543 |
+
card.className = 'joke-card';
|
| 544 |
+
card.dataset.entryId = entry.id;
|
| 545 |
+
card.onclick = () => castVote(matchup.id, entry.id);
|
| 546 |
+
|
| 547 |
+
let text = entry.text || '';
|
| 548 |
+
text = text.replace(/'/g, "'")
|
| 549 |
+
.replace(/&/g, "&")
|
| 550 |
+
.replace(/</g, "<")
|
| 551 |
+
.replace(/>/g, ">")
|
| 552 |
+
.replace(/"/g, '"');
|
| 553 |
+
|
| 554 |
+
card.innerHTML = `
|
| 555 |
+
<span class="pick-label">Pick this one</span>
|
| 556 |
+
<div class="joke-text">${escapeHtml(text)}</div>
|
| 557 |
+
<div class="joke-author">${entry.author ? '— ' + escapeHtml(entry.author) : ''}</div>
|
| 558 |
+
`;
|
| 559 |
+
container.appendChild(card);
|
| 560 |
+
});
|
| 561 |
+
|
| 562 |
+
// Update status
|
| 563 |
+
document.getElementById('voteStatus').innerHTML =
|
| 564 |
+
'Votes cast: <span class="stat">' + votedThisSession + '</span>';
|
| 565 |
+
}
|
| 566 |
+
|
| 567 |
+
async function castVote(matchupId, entryId) {
|
| 568 |
+
if (voting) return;
|
| 569 |
+
voting = true;
|
| 570 |
+
|
| 571 |
+
try {
|
| 572 |
+
const resp = await fetch('/api/worldcup/vote', {
|
| 573 |
+
method: 'POST',
|
| 574 |
+
headers: { 'Content-Type': 'application/json' },
|
| 575 |
+
body: JSON.stringify({ matchup_id: matchupId, entry_id: entryId }),
|
| 576 |
+
});
|
| 577 |
+
const data = await resp.json();
|
| 578 |
+
|
| 579 |
+
if (!data.ok) {
|
| 580 |
+
if (resp.status === 409) {
|
| 581 |
+
// Already voted, just advance
|
| 582 |
+
setTimeout(() => { voting = false; loadMatchup(); }, 500);
|
| 583 |
+
return;
|
| 584 |
+
}
|
| 585 |
+
voting = false;
|
| 586 |
+
return;
|
| 587 |
+
}
|
| 588 |
+
|
| 589 |
+
votedThisSession++;
|
| 590 |
+
showVoteResult(entryId, data);
|
| 591 |
+
|
| 592 |
+
// Auto-advance after 1.5s
|
| 593 |
+
setTimeout(() => {
|
| 594 |
+
voting = false;
|
| 595 |
+
loadMatchup();
|
| 596 |
+
}, 1500);
|
| 597 |
+
|
| 598 |
+
} catch (err) {
|
| 599 |
+
voting = false;
|
| 600 |
+
}
|
| 601 |
+
}
|
| 602 |
+
|
| 603 |
+
function showVoteResult(pickedId, data) {
|
| 604 |
+
const cards = document.querySelectorAll('#matchupCards .joke-card');
|
| 605 |
+
cards.forEach(card => {
|
| 606 |
+
card.classList.add('voted');
|
| 607 |
+
const cardId = parseInt(card.dataset.entryId);
|
| 608 |
+
if (data.winner_id) {
|
| 609 |
+
card.classList.add(cardId === data.winner_id ? 'winner' : 'loser');
|
| 610 |
+
} else {
|
| 611 |
+
card.classList.add(cardId === pickedId ? 'winner' : 'loser');
|
| 612 |
+
}
|
| 613 |
+
});
|
| 614 |
+
|
| 615 |
+
// Show vote bar
|
| 616 |
+
const total = (data.votes_a || 0) + (data.votes_b || 0);
|
| 617 |
+
if (total > 0) {
|
| 618 |
+
const pctA = Math.round((data.votes_a / total) * 100);
|
| 619 |
+
const pctB = 100 - pctA;
|
| 620 |
+
|
| 621 |
+
document.getElementById('barA').style.width = pctA + '%';
|
| 622 |
+
document.getElementById('barB').style.width = pctB + '%';
|
| 623 |
+
document.getElementById('voteBar').classList.add('show');
|
| 624 |
+
|
| 625 |
+
document.getElementById('voteCounts').innerHTML =
|
| 626 |
+
'<span>' + data.votes_a + ' votes (' + pctA + '%)</span>' +
|
| 627 |
+
'<span>' + data.votes_b + ' votes (' + pctB + '%)</span>';
|
| 628 |
+
document.getElementById('voteCounts').classList.add('show');
|
| 629 |
+
document.getElementById('voteResult').style.display = 'block';
|
| 630 |
+
}
|
| 631 |
+
|
| 632 |
+
document.getElementById('voteStatus').innerHTML =
|
| 633 |
+
'Votes cast: <span class="stat">' + votedThisSession + '</span>' +
|
| 634 |
+
(data.status === 'complete' ? ' · <span style="color:var(--ha)">Matchup decided!</span>' : '');
|
| 635 |
+
}
|
| 636 |
+
|
| 637 |
+
async function loadTournamentInfo() {
|
| 638 |
+
try {
|
| 639 |
+
const resp = await fetch('/api/worldcup/current');
|
| 640 |
+
const data = await resp.json();
|
| 641 |
+
if (data.tournament) {
|
| 642 |
+
tournamentState = data.tournament;
|
| 643 |
+
document.getElementById('tournamentName').textContent = data.tournament.name;
|
| 644 |
+
const roundName = data.tournament.round_name || ('Round ' + data.tournament.current_round);
|
| 645 |
+
const totalMatchups = data.matchups ? data.matchups.length : '?';
|
| 646 |
+
const complete = data.matchups ? data.matchups.filter(m => m.status === 'complete').length : 0;
|
| 647 |
+
document.getElementById('roundInfo').textContent =
|
| 648 |
+
roundName + ' — ' + complete + '/' + totalMatchups + ' matchups decided';
|
| 649 |
+
}
|
| 650 |
+
} catch (err) { /* ignore */ }
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
// ===== BRACKET VIEW =====
|
| 654 |
+
async function loadBracket() {
|
| 655 |
+
document.getElementById('bracketLoading').style.display = 'block';
|
| 656 |
+
document.getElementById('bracketContent').innerHTML = '';
|
| 657 |
+
|
| 658 |
+
try {
|
| 659 |
+
const resp = await fetch('/api/worldcup/bracket');
|
| 660 |
+
const data = await resp.json();
|
| 661 |
+
|
| 662 |
+
document.getElementById('bracketLoading').style.display = 'none';
|
| 663 |
+
|
| 664 |
+
if (!data.rounds || data.rounds.length === 0) {
|
| 665 |
+
document.getElementById('bracketContent').innerHTML =
|
| 666 |
+
'<div class="empty-leaderboard"><p>No tournament data yet.</p></div>';
|
| 667 |
+
return;
|
| 668 |
+
}
|
| 669 |
+
|
| 670 |
+
if (data.tournament) {
|
| 671 |
+
document.getElementById('tournamentName').textContent = data.tournament.name;
|
| 672 |
+
}
|
| 673 |
+
|
| 674 |
+
let html = '';
|
| 675 |
+
data.rounds.forEach(round => {
|
| 676 |
+
html += '<div class="round-section">';
|
| 677 |
+
html += '<div class="round-header">' + escapeHtml(round.name) +
|
| 678 |
+
' <span style="color:var(--muted);font-size:0.8rem">(' +
|
| 679 |
+
round.matchups.length + ' matchups)</span></div>';
|
| 680 |
+
html += '<div class="round-matchups">';
|
| 681 |
+
|
| 682 |
+
round.matchups.forEach(m => {
|
| 683 |
+
const statusClass = m.status === 'complete' ? '' :
|
| 684 |
+
m.status === 'active' ? 'active' : 'pending';
|
| 685 |
+
html += '<div class="bracket-matchup ' + statusClass + '">';
|
| 686 |
+
html += renderBracketEntry(m.entry_a, m, 'a');
|
| 687 |
+
html += '<div class="bracket-divider"></div>';
|
| 688 |
+
html += renderBracketEntry(m.entry_b, m, 'b');
|
| 689 |
+
html += '</div>';
|
| 690 |
+
});
|
| 691 |
+
|
| 692 |
+
html += '</div></div>';
|
| 693 |
+
});
|
| 694 |
+
|
| 695 |
+
// Show winner if tournament complete
|
| 696 |
+
if (data.tournament && data.tournament.status === 'complete' && data.tournament.winner_entry_id) {
|
| 697 |
+
html = '<div style="text-align:center;padding:1rem;color:var(--accent);font-size:1.2rem;font-weight:bold">' +
|
| 698 |
+
'Tournament Complete!</div>' + html;
|
| 699 |
+
}
|
| 700 |
+
|
| 701 |
+
document.getElementById('bracketContent').innerHTML = html;
|
| 702 |
+
|
| 703 |
+
} catch (err) {
|
| 704 |
+
document.getElementById('bracketLoading').style.display = 'none';
|
| 705 |
+
document.getElementById('bracketContent').innerHTML =
|
| 706 |
+
'<div class="empty-leaderboard"><p>Failed to load bracket.</p></div>';
|
| 707 |
+
}
|
| 708 |
+
}
|
| 709 |
+
|
| 710 |
+
function renderBracketEntry(entry, matchup, side) {
|
| 711 |
+
if (!entry) return '<div class="bracket-entry"><span class="entry-text">TBD</span></div>';
|
| 712 |
+
|
| 713 |
+
const votes = side === 'a' ? matchup.votes_a : matchup.votes_b;
|
| 714 |
+
let cls = 'bracket-entry';
|
| 715 |
+
if (matchup.status === 'complete') {
|
| 716 |
+
cls += matchup.winner_id === entry.id ? ' is-winner' : ' is-loser';
|
| 717 |
+
}
|
| 718 |
+
|
| 719 |
+
const preview = (entry.text || '').substring(0, 60) + ((entry.text || '').length > 60 ? '...' : '');
|
| 720 |
+
const author = entry.author ? ' — ' + entry.author : '';
|
| 721 |
+
|
| 722 |
+
return '<div class="' + cls + '">' +
|
| 723 |
+
'<span class="entry-text">' + escapeHtml(preview) + '<span style="color:var(--muted);font-size:0.7rem">' + escapeHtml(author) + '</span></span>' +
|
| 724 |
+
'<span class="entry-votes">' + (matchup.status !== 'pending' ? votes : '') + '</span>' +
|
| 725 |
+
'</div>';
|
| 726 |
+
}
|
| 727 |
+
|
| 728 |
+
// ===== LEADERBOARD VIEW =====
|
| 729 |
+
async function loadLeaderboard() {
|
| 730 |
+
document.getElementById('lbLoading').style.display = 'block';
|
| 731 |
+
document.getElementById('lbContent').innerHTML = '';
|
| 732 |
+
|
| 733 |
+
try {
|
| 734 |
+
const resp = await fetch('/api/worldcup/leaderboard');
|
| 735 |
+
const data = await resp.json();
|
| 736 |
+
|
| 737 |
+
document.getElementById('lbLoading').style.display = 'none';
|
| 738 |
+
|
| 739 |
+
if (!data || data.length === 0) {
|
| 740 |
+
document.getElementById('lbContent').innerHTML =
|
| 741 |
+
'<div class="empty-leaderboard"><p>No ranked jokes yet. Start a tournament to get going.</p></div>';
|
| 742 |
+
return;
|
| 743 |
+
}
|
| 744 |
+
|
| 745 |
+
let html = '<table class="leaderboard-table">';
|
| 746 |
+
html += '<thead><tr><th>#</th><th>Joke</th><th>ELO</th><th>W-L</th></tr></thead>';
|
| 747 |
+
html += '<tbody>';
|
| 748 |
+
|
| 749 |
+
data.forEach((entry, i) => {
|
| 750 |
+
const rank = i + 1;
|
| 751 |
+
let rankClass = 'lb-rank';
|
| 752 |
+
if (rank === 1) rankClass += ' gold';
|
| 753 |
+
else if (rank === 2) rankClass += ' silver';
|
| 754 |
+
else if (rank === 3) rankClass += ' bronze';
|
| 755 |
+
|
| 756 |
+
const preview = (entry.text || '').substring(0, 120) + ((entry.text || '').length > 120 ? '...' : '');
|
| 757 |
+
|
| 758 |
+
html += '<tr>';
|
| 759 |
+
html += '<td class="' + rankClass + '">' + rank + '</td>';
|
| 760 |
+
html += '<td class="lb-joke"><div class="joke-preview">' + escapeHtml(preview) + '</div>';
|
| 761 |
+
if (entry.author) {
|
| 762 |
+
html += '<div class="lb-author">— ' + escapeHtml(entry.author) + '</div>';
|
| 763 |
+
}
|
| 764 |
+
html += '</td>';
|
| 765 |
+
html += '<td class="lb-elo">' + Math.round(entry.elo) + '</td>';
|
| 766 |
+
html += '<td class="lb-record">' + entry.wins + '-' + entry.losses + '</td>';
|
| 767 |
+
html += '</tr>';
|
| 768 |
+
});
|
| 769 |
+
|
| 770 |
+
html += '</tbody></table>';
|
| 771 |
+
document.getElementById('lbContent').innerHTML = html;
|
| 772 |
+
|
| 773 |
+
} catch (err) {
|
| 774 |
+
document.getElementById('lbLoading').style.display = 'none';
|
| 775 |
+
document.getElementById('lbContent').innerHTML =
|
| 776 |
+
'<div class="empty-leaderboard"><p>Failed to load leaderboard.</p></div>';
|
| 777 |
+
}
|
| 778 |
+
}
|
| 779 |
+
|
| 780 |
+
// ===== ADMIN =====
|
| 781 |
+
function toggleCreateOptions() {
|
| 782 |
+
document.getElementById('createOptions').classList.toggle('show');
|
| 783 |
+
}
|
| 784 |
+
|
| 785 |
+
async function createTournament(pool, size) {
|
| 786 |
+
size = size || 64;
|
| 787 |
+
try {
|
| 788 |
+
const resp = await fetch('/api/worldcup/create', {
|
| 789 |
+
method: 'POST',
|
| 790 |
+
headers: { 'Content-Type': 'application/json' },
|
| 791 |
+
body: JSON.stringify({ pool_filter: pool, bracket_size: size }),
|
| 792 |
+
});
|
| 793 |
+
const data = await resp.json();
|
| 794 |
+
if (data.ok) {
|
| 795 |
+
document.getElementById('createOptions').classList.remove('show');
|
| 796 |
+
loadMatchup();
|
| 797 |
+
loadTournamentInfo();
|
| 798 |
+
} else {
|
| 799 |
+
alert('Failed to create tournament');
|
| 800 |
+
}
|
| 801 |
+
} catch (err) {
|
| 802 |
+
alert('Error creating tournament');
|
| 803 |
+
}
|
| 804 |
+
}
|
| 805 |
+
|
| 806 |
+
// ===== UTILS =====
|
| 807 |
+
function escapeHtml(text) {
|
| 808 |
+
const div = document.createElement('div');
|
| 809 |
+
div.textContent = text;
|
| 810 |
+
return div.innerHTML;
|
| 811 |
+
}
|
| 812 |
+
|
| 813 |
+
// ===== INIT =====
|
| 814 |
+
loadMatchup();
|
| 815 |
+
loadTournamentInfo();
|
| 816 |
+
</script>
|
| 817 |
+
</body>
|
| 818 |
+
</html>
|
webapp.py
ADDED
|
@@ -0,0 +1,1231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Joke Corpus — Web Dashboard."""
|
| 2 |
+
import math
|
| 3 |
+
import os
|
| 4 |
+
import random as _rand
|
| 5 |
+
import sys
|
| 6 |
+
import uuid
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
# Ensure scraper package is importable
|
| 11 |
+
sys.path.insert(0, str(Path(__file__).parent))
|
| 12 |
+
|
| 13 |
+
from flask import Flask, render_template, request, jsonify, redirect, url_for, make_response
|
| 14 |
+
from scraper import db
|
| 15 |
+
from scraper.config import TOPICS, STYLES, DB_PATH, PROJECT_ROOT
|
| 16 |
+
from scraper.scoring import calculate_elo
|
| 17 |
+
|
| 18 |
+
ENV_PATH = PROJECT_ROOT / ".env"
|
| 19 |
+
|
| 20 |
+
# Platform setup definitions — the guided setup wizard uses these
|
| 21 |
+
PLATFORM_SETUP = [
|
| 22 |
+
{
|
| 23 |
+
"id": "guardian",
|
| 24 |
+
"name": "Guardian",
|
| 25 |
+
"cost": "Free",
|
| 26 |
+
"description": "Frankie Boyle, Marina Hyde, John Crace, Stewart Lee, Charlie Brooker — full article text via the Guardian Open Platform API.",
|
| 27 |
+
"signup_url": "https://open-platform.theguardian.com/access/",
|
| 28 |
+
"signup_steps": [
|
| 29 |
+
"Go to the Guardian Open Platform",
|
| 30 |
+
"Click 'Register for a developer key'",
|
| 31 |
+
"Fill in the form (app name: 'joke-corpus', describe as personal research)",
|
| 32 |
+
"You'll get an API key immediately by email",
|
| 33 |
+
"Paste it below",
|
| 34 |
+
],
|
| 35 |
+
"fields": [
|
| 36 |
+
{"key": "GUARDIAN_API_KEY", "label": "API Key", "type": "text"},
|
| 37 |
+
],
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"id": "reddit",
|
| 41 |
+
"name": "Reddit",
|
| 42 |
+
"cost": "Free",
|
| 43 |
+
"description": "r/BritishProblems, r/standupshots, r/jokes and more. Non-commercial access via PRAW.",
|
| 44 |
+
"signup_url": "https://www.reddit.com/prefs/apps",
|
| 45 |
+
"signup_steps": [
|
| 46 |
+
"Go to Reddit app preferences (you need a Reddit account)",
|
| 47 |
+
"Scroll down and click 'create another app...'",
|
| 48 |
+
"Select 'script' as the app type",
|
| 49 |
+
"Name: 'joke-corpus', Description: 'personal comedy corpus'",
|
| 50 |
+
"Redirect URI: http://localhost:8080 (doesn't matter for scripts)",
|
| 51 |
+
"Note the client ID (under the app name) and client secret",
|
| 52 |
+
"Paste both below",
|
| 53 |
+
],
|
| 54 |
+
"fields": [
|
| 55 |
+
{"key": "REDDIT_CLIENT_ID", "label": "Client ID", "type": "text"},
|
| 56 |
+
{"key": "REDDIT_CLIENT_SECRET", "label": "Client Secret", "type": "password"},
|
| 57 |
+
],
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"id": "bluesky",
|
| 61 |
+
"name": "Bluesky",
|
| 62 |
+
"cost": "Free",
|
| 63 |
+
"description": "Open AT Protocol. Full access to any public account's posts. The easiest platform to set up.",
|
| 64 |
+
"signup_url": "https://bsky.app/settings/app-passwords",
|
| 65 |
+
"signup_steps": [
|
| 66 |
+
"Log into Bluesky",
|
| 67 |
+
"Go to Settings > App Passwords",
|
| 68 |
+
"Click 'Add App Password'",
|
| 69 |
+
"Name it 'joke-corpus' and copy the generated password",
|
| 70 |
+
"Enter your handle and the app password below",
|
| 71 |
+
],
|
| 72 |
+
"fields": [
|
| 73 |
+
{"key": "BLUESKY_HANDLE", "label": "Handle (e.g. you.bsky.social)", "type": "text"},
|
| 74 |
+
{"key": "BLUESKY_APP_PASSWORD", "label": "App Password", "type": "password"},
|
| 75 |
+
],
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"id": "x_twitter",
|
| 79 |
+
"name": "X / Twitter",
|
| 80 |
+
"cost": "$200/mo",
|
| 81 |
+
"description": "Tweet lookup, user timelines, search. Required for Twitter comedy accounts. Basic API tier.",
|
| 82 |
+
"signup_url": "https://developer.x.com/en/portal/dashboard",
|
| 83 |
+
"signup_steps": [
|
| 84 |
+
"Go to the X Developer Portal",
|
| 85 |
+
"Apply for a developer account if you don't have one",
|
| 86 |
+
"Subscribe to the Basic tier ($200/mo)",
|
| 87 |
+
"Create a project and app",
|
| 88 |
+
"Generate a Bearer Token under 'Keys and Tokens'",
|
| 89 |
+
"Paste the bearer token below",
|
| 90 |
+
],
|
| 91 |
+
"fields": [
|
| 92 |
+
{"key": "TWITTER_BEARER_TOKEN", "label": "Bearer Token", "type": "password"},
|
| 93 |
+
{"key": "TWITTER_API_KEY", "label": "API Key (optional)", "type": "text"},
|
| 94 |
+
{"key": "TWITTER_API_SECRET", "label": "API Secret (optional)", "type": "password"},
|
| 95 |
+
],
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"id": "apify",
|
| 99 |
+
"name": "Instagram (via Apify)",
|
| 100 |
+
"cost": "Free tier ($5/mo credits)",
|
| 101 |
+
"description": "Instagram's API doesn't allow reading other accounts' posts. Apify scrapes public profiles for you. Free tier gets ~2K posts/month.",
|
| 102 |
+
"signup_url": "https://console.apify.com/sign-up",
|
| 103 |
+
"signup_steps": [
|
| 104 |
+
"Sign up for an Apify account (free)",
|
| 105 |
+
"Go to Settings > Integrations",
|
| 106 |
+
"Copy your Personal API Token",
|
| 107 |
+
"Paste it below",
|
| 108 |
+
"The free tier gives $5/month in credits — enough for casual use",
|
| 109 |
+
],
|
| 110 |
+
"fields": [
|
| 111 |
+
{"key": "APIFY_API_TOKEN", "label": "API Token", "type": "password"},
|
| 112 |
+
],
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"id": "rss",
|
| 116 |
+
"name": "RSS Feeds",
|
| 117 |
+
"cost": "Free",
|
| 118 |
+
"description": "Substacks, The Onion, NewsThump, Waterford Whispers, Daily Mash, McSweeney's. No API key needed — just add sources.",
|
| 119 |
+
"signup_url": "",
|
| 120 |
+
"signup_steps": [
|
| 121 |
+
"No API key required!",
|
| 122 |
+
"RSS feeds are publicly accessible",
|
| 123 |
+
"Go to the Sources tab to add RSS feed URLs",
|
| 124 |
+
"Substack feeds are at: [name].substack.com/feed",
|
| 125 |
+
],
|
| 126 |
+
"fields": [],
|
| 127 |
+
},
|
| 128 |
+
]
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def _read_env():
|
| 132 |
+
"""Read the .env file and return a dict of key=value pairs."""
|
| 133 |
+
env = {}
|
| 134 |
+
if ENV_PATH.exists():
|
| 135 |
+
for line in ENV_PATH.read_text().splitlines():
|
| 136 |
+
line = line.strip()
|
| 137 |
+
if not line or line.startswith("#"):
|
| 138 |
+
continue
|
| 139 |
+
if "=" in line:
|
| 140 |
+
key, _, value = line.partition("=")
|
| 141 |
+
env[key.strip()] = value.strip()
|
| 142 |
+
return env
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def _write_env(updates):
|
| 146 |
+
"""Update specific keys in the .env file, preserving comments and structure."""
|
| 147 |
+
lines = []
|
| 148 |
+
if ENV_PATH.exists():
|
| 149 |
+
lines = ENV_PATH.read_text().splitlines()
|
| 150 |
+
|
| 151 |
+
updated_keys = set()
|
| 152 |
+
new_lines = []
|
| 153 |
+
for line in lines:
|
| 154 |
+
stripped = line.strip()
|
| 155 |
+
if stripped and not stripped.startswith("#") and "=" in stripped:
|
| 156 |
+
key = stripped.split("=", 1)[0].strip()
|
| 157 |
+
if key in updates:
|
| 158 |
+
new_lines.append(f"{key}={updates[key]}")
|
| 159 |
+
updated_keys.add(key)
|
| 160 |
+
continue
|
| 161 |
+
new_lines.append(line)
|
| 162 |
+
|
| 163 |
+
# Add any keys that weren't already in the file
|
| 164 |
+
for key, value in updates.items():
|
| 165 |
+
if key not in updated_keys:
|
| 166 |
+
new_lines.append(f"{key}={value}")
|
| 167 |
+
|
| 168 |
+
ENV_PATH.write_text("\n".join(new_lines) + "\n")
|
| 169 |
+
|
| 170 |
+
# Reload environment variables
|
| 171 |
+
for key, value in updates.items():
|
| 172 |
+
os.environ[key] = value
|
| 173 |
+
|
| 174 |
+
app = Flask(__name__)
|
| 175 |
+
app.config["SECRET_KEY"] = "joke-corpus-local"
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
@app.before_request
|
| 179 |
+
def ensure_db():
|
| 180 |
+
db.init_db()
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
@app.context_processor
|
| 184 |
+
def utility_functions():
|
| 185 |
+
def build_url(**overrides):
|
| 186 |
+
"""Build a URL preserving current filters, with overrides."""
|
| 187 |
+
params = {}
|
| 188 |
+
for key in ("topic", "style", "tier", "platform", "q", "days", "page"):
|
| 189 |
+
val = request.args.get(key)
|
| 190 |
+
if val:
|
| 191 |
+
params[key] = val
|
| 192 |
+
|
| 193 |
+
for key, val in overrides.items():
|
| 194 |
+
if val is None or val == "":
|
| 195 |
+
params.pop(key, None)
|
| 196 |
+
else:
|
| 197 |
+
params[key] = val
|
| 198 |
+
|
| 199 |
+
# Reset page when filters change (unless page is explicitly set)
|
| 200 |
+
if "page" not in overrides and any(k in overrides for k in ("topic", "style", "tier", "platform", "q")):
|
| 201 |
+
params.pop("page", None)
|
| 202 |
+
|
| 203 |
+
qs = "&".join(f"{k}={v}" for k, v in params.items() if v)
|
| 204 |
+
return f"/?{qs}" if qs else "/"
|
| 205 |
+
|
| 206 |
+
return {"_build_url": build_url}
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
# --- Page routes ---
|
| 210 |
+
|
| 211 |
+
@app.route("/")
|
| 212 |
+
def index():
|
| 213 |
+
topic = request.args.get("topic")
|
| 214 |
+
style = request.args.get("style")
|
| 215 |
+
tier = request.args.get("tier")
|
| 216 |
+
platform = request.args.get("platform")
|
| 217 |
+
q = request.args.get("q", "").strip()
|
| 218 |
+
days = request.args.get("days", "0", type=str)
|
| 219 |
+
page = request.args.get("page", 1, type=int)
|
| 220 |
+
per_page = 50
|
| 221 |
+
|
| 222 |
+
days_val = int(days) if days.isdigit() else 0
|
| 223 |
+
|
| 224 |
+
if q:
|
| 225 |
+
entries = db.search_entries(
|
| 226 |
+
query=q, topic=topic, style=style,
|
| 227 |
+
tier=tier, platform=platform, limit=per_page,
|
| 228 |
+
)
|
| 229 |
+
elif tier:
|
| 230 |
+
entries = db.top_entries(
|
| 231 |
+
topic=topic, style=style, tier=tier,
|
| 232 |
+
platform=platform, days=days_val or None, limit=per_page,
|
| 233 |
+
)
|
| 234 |
+
else:
|
| 235 |
+
# Show all entries, newest first, no tier filter
|
| 236 |
+
entries = _browse_entries(
|
| 237 |
+
topic=topic, style=style, platform=platform,
|
| 238 |
+
days=days_val or None, limit=per_page,
|
| 239 |
+
offset=(page - 1) * per_page,
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
# Get styles for each entry
|
| 243 |
+
entries_with_styles = []
|
| 244 |
+
for e in entries:
|
| 245 |
+
entry_dict = dict(e)
|
| 246 |
+
_, styles = db.get_entry(e["id"])
|
| 247 |
+
entry_dict["styles"] = styles
|
| 248 |
+
entries_with_styles.append(entry_dict)
|
| 249 |
+
|
| 250 |
+
stats = db.get_stats()
|
| 251 |
+
sources = db.list_sources()
|
| 252 |
+
|
| 253 |
+
# Get unique platforms from sources
|
| 254 |
+
platforms = sorted(set(s["platform"] for s in sources))
|
| 255 |
+
|
| 256 |
+
return render_template(
|
| 257 |
+
"index.html",
|
| 258 |
+
entries=entries_with_styles,
|
| 259 |
+
stats=stats,
|
| 260 |
+
topics=TOPICS,
|
| 261 |
+
styles=STYLES,
|
| 262 |
+
platforms=platforms,
|
| 263 |
+
current_topic=topic,
|
| 264 |
+
current_style=style,
|
| 265 |
+
current_tier=tier,
|
| 266 |
+
current_platform=platform,
|
| 267 |
+
current_q=q,
|
| 268 |
+
current_days=days,
|
| 269 |
+
current_page=page,
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
@app.route("/sources")
|
| 274 |
+
def sources_page():
|
| 275 |
+
sources = db.list_sources()
|
| 276 |
+
stats = db.get_stats()
|
| 277 |
+
return render_template(
|
| 278 |
+
"index.html",
|
| 279 |
+
view="sources",
|
| 280 |
+
sources=sources,
|
| 281 |
+
stats=stats,
|
| 282 |
+
topics=TOPICS,
|
| 283 |
+
styles=STYLES,
|
| 284 |
+
platforms=[],
|
| 285 |
+
entries=[],
|
| 286 |
+
current_topic=None,
|
| 287 |
+
current_style=None,
|
| 288 |
+
current_tier=None,
|
| 289 |
+
current_platform=None,
|
| 290 |
+
current_q="",
|
| 291 |
+
current_days="0",
|
| 292 |
+
current_page=1,
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
@app.route("/setup")
|
| 297 |
+
def setup_page():
|
| 298 |
+
env = _read_env()
|
| 299 |
+
stats = db.get_stats()
|
| 300 |
+
|
| 301 |
+
# Calculate status for each platform
|
| 302 |
+
platforms_status = []
|
| 303 |
+
for p in PLATFORM_SETUP:
|
| 304 |
+
configured = False
|
| 305 |
+
if p["fields"]:
|
| 306 |
+
# Consider configured if ANY required field has a value
|
| 307 |
+
configured = any(
|
| 308 |
+
bool(env.get(f["key"], ""))
|
| 309 |
+
for f in p["fields"]
|
| 310 |
+
)
|
| 311 |
+
else:
|
| 312 |
+
# No fields needed (e.g. RSS)
|
| 313 |
+
configured = True
|
| 314 |
+
|
| 315 |
+
platforms_status.append({
|
| 316 |
+
**p,
|
| 317 |
+
"configured": configured,
|
| 318 |
+
"field_values": {f["key"]: env.get(f["key"], "") for f in p["fields"]},
|
| 319 |
+
})
|
| 320 |
+
|
| 321 |
+
# Count configured vs total
|
| 322 |
+
configured_count = sum(1 for p in platforms_status if p["configured"])
|
| 323 |
+
total_count = len(platforms_status)
|
| 324 |
+
|
| 325 |
+
return render_template(
|
| 326 |
+
"index.html",
|
| 327 |
+
view="setup",
|
| 328 |
+
platforms_setup=platforms_status,
|
| 329 |
+
configured_count=configured_count,
|
| 330 |
+
total_platforms=total_count,
|
| 331 |
+
stats=stats,
|
| 332 |
+
topics=TOPICS,
|
| 333 |
+
styles=STYLES,
|
| 334 |
+
platforms=[],
|
| 335 |
+
entries=[],
|
| 336 |
+
current_topic=None,
|
| 337 |
+
current_style=None,
|
| 338 |
+
current_tier=None,
|
| 339 |
+
current_platform=None,
|
| 340 |
+
current_q="",
|
| 341 |
+
current_days="0",
|
| 342 |
+
current_page=1,
|
| 343 |
+
)
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
@app.route("/random")
|
| 347 |
+
def random_page():
|
| 348 |
+
topic = request.args.get("topic")
|
| 349 |
+
style = request.args.get("style")
|
| 350 |
+
tier = request.args.get("tier")
|
| 351 |
+
entry = db.random_entry(topic=topic, style=style, tier=tier)
|
| 352 |
+
|
| 353 |
+
entry_dict = None
|
| 354 |
+
entry_styles = []
|
| 355 |
+
if entry:
|
| 356 |
+
entry_dict = dict(entry)
|
| 357 |
+
_, entry_styles = db.get_entry(entry["id"])
|
| 358 |
+
entry_dict["styles"] = entry_styles
|
| 359 |
+
|
| 360 |
+
stats = db.get_stats()
|
| 361 |
+
return render_template(
|
| 362 |
+
"index.html",
|
| 363 |
+
view="random",
|
| 364 |
+
random_entry=entry_dict,
|
| 365 |
+
stats=stats,
|
| 366 |
+
topics=TOPICS,
|
| 367 |
+
styles=STYLES,
|
| 368 |
+
platforms=[],
|
| 369 |
+
entries=[],
|
| 370 |
+
current_topic=topic,
|
| 371 |
+
current_style=style,
|
| 372 |
+
current_tier=tier,
|
| 373 |
+
current_platform=None,
|
| 374 |
+
current_q="",
|
| 375 |
+
current_days="0",
|
| 376 |
+
current_page=1,
|
| 377 |
+
)
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
@app.route("/play")
|
| 381 |
+
def play_page():
|
| 382 |
+
"""The public-facing joke tombola."""
|
| 383 |
+
return render_template("tombola.html")
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
@app.route("/api/tombola/joke")
|
| 387 |
+
def api_tombola_joke():
|
| 388 |
+
"""Get a random joke for the tombola. Short, snackable, quality content only."""
|
| 389 |
+
mood = request.args.get("mood", "any")
|
| 390 |
+
pool = request.args.get("pool", "classics")
|
| 391 |
+
|
| 392 |
+
with db.get_db() as conn:
|
| 393 |
+
# Sources excluded from tombola
|
| 394 |
+
EXCLUDED_SOURCES = ('McSweeneys', 'Private Eye')
|
| 395 |
+
|
| 396 |
+
conditions = [
|
| 397 |
+
"e.content_type IN ('joke', 'post', 'article')",
|
| 398 |
+
"LENGTH(e.text) < 300",
|
| 399 |
+
"(LENGTH(e.text) > 40 OR e.platform = 'curated')",
|
| 400 |
+
# Exclude HuggingFace (ungraded bulk import) and extracted candidates
|
| 401 |
+
"e.platform NOT IN ('huggingface', 'extracted', 'rss')",
|
| 402 |
+
# Exclude sources whose headlines don't stand alone
|
| 403 |
+
"COALESCE(s.display_name, '') NOT IN ({})".format(
|
| 404 |
+
",".join("?" for _ in EXCLUDED_SOURCES)),
|
| 405 |
+
# Skip promo/ad captions and image-dependent reactions
|
| 406 |
+
"e.text NOT LIKE '%#ad %'",
|
| 407 |
+
"e.text NOT LIKE '%tap to buy%'",
|
| 408 |
+
"e.text NOT LIKE '%follow us for more%'",
|
| 409 |
+
"e.text NOT LIKE '%link in bio%'",
|
| 410 |
+
"e.text NOT LIKE '%link in the bio%'",
|
| 411 |
+
"e.text NOT LIKE '%check out my shop%'",
|
| 412 |
+
"e.text NOT LIKE '%Follow us for%'",
|
| 413 |
+
"e.text NOT LIKE '%swipe up%'",
|
| 414 |
+
"e.text NOT LIKE '%Like this content?%'",
|
| 415 |
+
"e.text NOT LIKE 'Swipe %'",
|
| 416 |
+
"e.text NOT LIKE 'Comment %on this post%'",
|
| 417 |
+
# Skip hashtag-heavy posts (usually promos or memes needing images)
|
| 418 |
+
"e.text NOT LIKE '%#%'",
|
| 419 |
+
]
|
| 420 |
+
params = list(EXCLUDED_SOURCES)
|
| 421 |
+
|
| 422 |
+
# Pool filter
|
| 423 |
+
DAD_JOKE_HANDLES = ('r/dadjokes', 'r/cleanjokes', '@dadsaysjokes', '@inventingdadjokes', '@thedad')
|
| 424 |
+
if pool == "classics":
|
| 425 |
+
conditions.append("e.platform = 'curated'")
|
| 426 |
+
elif pool == "dadjokes":
|
| 427 |
+
conditions.append(
|
| 428 |
+
"e.source_id IN (SELECT id FROM sources WHERE handle_or_url IN ({}))".format(
|
| 429 |
+
",".join("?" for _ in DAD_JOKE_HANDLES)))
|
| 430 |
+
params.extend(DAD_JOKE_HANDLES)
|
| 431 |
+
elif pool == "fresh":
|
| 432 |
+
conditions.append("e.platform != 'curated'")
|
| 433 |
+
conditions.append(
|
| 434 |
+
"e.source_id NOT IN (SELECT id FROM sources WHERE handle_or_url IN ({}))".format(
|
| 435 |
+
",".join("?" for _ in DAD_JOKE_HANDLES)))
|
| 436 |
+
params.extend(DAD_JOKE_HANDLES)
|
| 437 |
+
# else: "mixed" — no pool filter
|
| 438 |
+
|
| 439 |
+
if mood == "dark":
|
| 440 |
+
conditions.append("(e.topic IN ('left_wing', 'right_wing') OR e.topic IS NULL)")
|
| 441 |
+
elif mood == "political":
|
| 442 |
+
conditions.append("e.topic IN ('left_wing', 'right_wing')")
|
| 443 |
+
elif mood == "clean":
|
| 444 |
+
conditions.append("(e.topic = 'apolitical' OR e.topic IS NULL)")
|
| 445 |
+
else:
|
| 446 |
+
# Default "surprise me" — skip heavily political content
|
| 447 |
+
conditions.append("(e.topic IS NULL OR e.topic NOT IN ('left_wing', 'right_wing'))")
|
| 448 |
+
|
| 449 |
+
where = " AND ".join(conditions)
|
| 450 |
+
|
| 451 |
+
# Quality-weighted selection: 70% from S/A tier, 30% from any
|
| 452 |
+
use_quality = _rand.random() < 0.7
|
| 453 |
+
if use_quality:
|
| 454 |
+
entry = conn.execute(f"""
|
| 455 |
+
SELECT e.id, e.text, e.author, e.platform, e.topic,
|
| 456 |
+
s.display_name as source_name, e.image_url
|
| 457 |
+
FROM entries e
|
| 458 |
+
LEFT JOIN sources s ON e.source_id = s.id
|
| 459 |
+
WHERE {where} AND e.quality_tier IN ('S', 'A')
|
| 460 |
+
ORDER BY RANDOM()
|
| 461 |
+
LIMIT 1
|
| 462 |
+
""", params).fetchone()
|
| 463 |
+
else:
|
| 464 |
+
entry = None
|
| 465 |
+
|
| 466 |
+
# Fallback to any tier
|
| 467 |
+
if not entry:
|
| 468 |
+
entry = conn.execute(f"""
|
| 469 |
+
SELECT e.id, e.text, e.author, e.platform, e.topic,
|
| 470 |
+
s.display_name as source_name, e.image_url
|
| 471 |
+
FROM entries e
|
| 472 |
+
LEFT JOIN sources s ON e.source_id = s.id
|
| 473 |
+
WHERE {where}
|
| 474 |
+
ORDER BY RANDOM()
|
| 475 |
+
LIMIT 1
|
| 476 |
+
""", params).fetchone()
|
| 477 |
+
|
| 478 |
+
if entry:
|
| 479 |
+
result = {
|
| 480 |
+
"id": entry["id"],
|
| 481 |
+
"text": entry["text"],
|
| 482 |
+
"author": entry["author"],
|
| 483 |
+
"topic": entry["topic"],
|
| 484 |
+
"source": entry["source_name"] or entry["platform"],
|
| 485 |
+
}
|
| 486 |
+
if entry["image_url"]:
|
| 487 |
+
result["image_url"] = entry["image_url"]
|
| 488 |
+
return jsonify(result)
|
| 489 |
+
return jsonify({"error": "No jokes available"}), 404
|
| 490 |
+
|
| 491 |
+
|
| 492 |
+
@app.route("/api/tombola/signup", methods=["POST"])
|
| 493 |
+
def api_tombola_signup():
|
| 494 |
+
"""Collect email and comedy profile from tombola user."""
|
| 495 |
+
import json as _json
|
| 496 |
+
data = request.get_json()
|
| 497 |
+
email = data.get("email", "").strip()
|
| 498 |
+
preferences = data.get("preferences", {})
|
| 499 |
+
reactions = data.get("reactions", [])
|
| 500 |
+
|
| 501 |
+
if not email or "@" not in email:
|
| 502 |
+
return jsonify({"ok": False, "error": "Valid email required"}), 400
|
| 503 |
+
|
| 504 |
+
with db.get_db() as conn:
|
| 505 |
+
conn.execute("""
|
| 506 |
+
CREATE TABLE IF NOT EXISTS signups (
|
| 507 |
+
id INTEGER PRIMARY KEY,
|
| 508 |
+
email TEXT UNIQUE NOT NULL,
|
| 509 |
+
name TEXT,
|
| 510 |
+
preferences TEXT,
|
| 511 |
+
reactions TEXT,
|
| 512 |
+
signed_up_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
| 513 |
+
)
|
| 514 |
+
""")
|
| 515 |
+
try:
|
| 516 |
+
conn.execute(
|
| 517 |
+
"INSERT INTO signups (email, preferences, reactions) VALUES (?, ?, ?)",
|
| 518 |
+
(email, _json.dumps(preferences), _json.dumps(reactions)),
|
| 519 |
+
)
|
| 520 |
+
return jsonify({"ok": True})
|
| 521 |
+
except Exception:
|
| 522 |
+
return jsonify({"ok": True, "note": "Already signed up"})
|
| 523 |
+
|
| 524 |
+
|
| 525 |
+
# --- API routes ---
|
| 526 |
+
|
| 527 |
+
@app.route("/api/rate/<int:entry_id>", methods=["POST"])
|
| 528 |
+
def api_rate(entry_id):
|
| 529 |
+
data = request.get_json()
|
| 530 |
+
rating = data.get("rating")
|
| 531 |
+
if rating and 1 <= int(rating) <= 5:
|
| 532 |
+
db.rate_entry(entry_id, int(rating))
|
| 533 |
+
return jsonify({"ok": True, "rating": int(rating)})
|
| 534 |
+
return jsonify({"ok": False, "error": "Rating must be 1-5"}), 400
|
| 535 |
+
|
| 536 |
+
|
| 537 |
+
@app.route("/api/fav/<int:entry_id>", methods=["POST"])
|
| 538 |
+
def api_fav(entry_id):
|
| 539 |
+
result = db.favourite_entry(entry_id)
|
| 540 |
+
if result is not None:
|
| 541 |
+
return jsonify({"ok": True, "favourite": bool(result)})
|
| 542 |
+
return jsonify({"ok": False, "error": "Not found"}), 404
|
| 543 |
+
|
| 544 |
+
|
| 545 |
+
@app.route("/api/classify/<int:entry_id>", methods=["POST"])
|
| 546 |
+
def api_classify(entry_id):
|
| 547 |
+
data = request.get_json()
|
| 548 |
+
topic = data.get("topic")
|
| 549 |
+
style_names = data.get("styles")
|
| 550 |
+
|
| 551 |
+
if topic and topic not in TOPICS:
|
| 552 |
+
return jsonify({"ok": False, "error": f"Invalid topic: {topic}"}), 400
|
| 553 |
+
if style_names:
|
| 554 |
+
invalid = [s for s in style_names if s not in STYLES]
|
| 555 |
+
if invalid:
|
| 556 |
+
return jsonify({"ok": False, "error": f"Invalid styles: {invalid}"}), 400
|
| 557 |
+
|
| 558 |
+
db.classify_entry(entry_id, topic=topic, style_names=style_names)
|
| 559 |
+
return jsonify({"ok": True})
|
| 560 |
+
|
| 561 |
+
|
| 562 |
+
@app.route("/api/notes/<int:entry_id>", methods=["POST"])
|
| 563 |
+
def api_notes(entry_id):
|
| 564 |
+
data = request.get_json()
|
| 565 |
+
notes = data.get("notes", "")
|
| 566 |
+
db.add_notes(entry_id, notes)
|
| 567 |
+
return jsonify({"ok": True})
|
| 568 |
+
|
| 569 |
+
|
| 570 |
+
@app.route("/api/setup/<platform_id>", methods=["POST"])
|
| 571 |
+
def api_setup(platform_id):
|
| 572 |
+
platform = next((p for p in PLATFORM_SETUP if p["id"] == platform_id), None)
|
| 573 |
+
if not platform:
|
| 574 |
+
return jsonify({"ok": False, "error": "Unknown platform"}), 404
|
| 575 |
+
|
| 576 |
+
data = request.get_json()
|
| 577 |
+
updates = {}
|
| 578 |
+
for field in platform["fields"]:
|
| 579 |
+
key = field["key"]
|
| 580 |
+
if key in data:
|
| 581 |
+
updates[key] = data[key]
|
| 582 |
+
|
| 583 |
+
if updates:
|
| 584 |
+
_write_env(updates)
|
| 585 |
+
return jsonify({"ok": True, "saved": list(updates.keys())})
|
| 586 |
+
|
| 587 |
+
return jsonify({"ok": False, "error": "No fields provided"}), 400
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
@app.route("/api/test/<platform_id>", methods=["POST"])
|
| 591 |
+
def api_test_connection(platform_id):
|
| 592 |
+
"""Quick connection test for a platform."""
|
| 593 |
+
env = _read_env()
|
| 594 |
+
|
| 595 |
+
try:
|
| 596 |
+
if platform_id == "guardian":
|
| 597 |
+
import requests as req
|
| 598 |
+
key = env.get("GUARDIAN_API_KEY", "")
|
| 599 |
+
if not key:
|
| 600 |
+
return jsonify({"ok": False, "error": "No API key set"})
|
| 601 |
+
r = req.get(
|
| 602 |
+
"https://content.guardianapis.com/search",
|
| 603 |
+
params={"api-key": key, "page-size": 1},
|
| 604 |
+
timeout=10,
|
| 605 |
+
)
|
| 606 |
+
if r.status_code == 200:
|
| 607 |
+
return jsonify({"ok": True, "message": "Connected to Guardian API"})
|
| 608 |
+
return jsonify({"ok": False, "error": f"HTTP {r.status_code}"})
|
| 609 |
+
|
| 610 |
+
elif platform_id == "bluesky":
|
| 611 |
+
handle = env.get("BLUESKY_HANDLE", "")
|
| 612 |
+
password = env.get("BLUESKY_APP_PASSWORD", "")
|
| 613 |
+
if not handle or not password:
|
| 614 |
+
return jsonify({"ok": False, "error": "Handle and app password required"})
|
| 615 |
+
from atproto import Client
|
| 616 |
+
client = Client()
|
| 617 |
+
client.login(handle, password)
|
| 618 |
+
return jsonify({"ok": True, "message": f"Logged in as {handle}"})
|
| 619 |
+
|
| 620 |
+
elif platform_id == "reddit":
|
| 621 |
+
cid = env.get("REDDIT_CLIENT_ID", "")
|
| 622 |
+
secret = env.get("REDDIT_CLIENT_SECRET", "")
|
| 623 |
+
if not cid or not secret:
|
| 624 |
+
return jsonify({"ok": False, "error": "Client ID and secret required"})
|
| 625 |
+
import praw
|
| 626 |
+
reddit = praw.Reddit(
|
| 627 |
+
client_id=cid,
|
| 628 |
+
client_secret=secret,
|
| 629 |
+
user_agent="joke-corpus/0.1",
|
| 630 |
+
)
|
| 631 |
+
reddit.subreddit("test").id
|
| 632 |
+
return jsonify({"ok": True, "message": "Connected to Reddit API"})
|
| 633 |
+
|
| 634 |
+
elif platform_id == "youtube":
|
| 635 |
+
key = env.get("YOUTUBE_API_KEY", "")
|
| 636 |
+
if not key:
|
| 637 |
+
return jsonify({"ok": False, "error": "No API key set"})
|
| 638 |
+
import requests as req
|
| 639 |
+
r = req.get(
|
| 640 |
+
"https://www.googleapis.com/youtube/v3/search",
|
| 641 |
+
params={"key": key, "part": "id", "q": "test", "maxResults": 1},
|
| 642 |
+
timeout=10,
|
| 643 |
+
)
|
| 644 |
+
if r.status_code == 200:
|
| 645 |
+
return jsonify({"ok": True, "message": "Connected to YouTube API"})
|
| 646 |
+
return jsonify({"ok": False, "error": f"HTTP {r.status_code}"})
|
| 647 |
+
|
| 648 |
+
elif platform_id == "x_twitter":
|
| 649 |
+
token = env.get("TWITTER_BEARER_TOKEN", "")
|
| 650 |
+
if not token:
|
| 651 |
+
return jsonify({"ok": False, "error": "No bearer token set"})
|
| 652 |
+
import requests as req
|
| 653 |
+
r = req.get(
|
| 654 |
+
"https://api.twitter.com/2/users/me",
|
| 655 |
+
headers={"Authorization": f"Bearer {token}"},
|
| 656 |
+
timeout=10,
|
| 657 |
+
)
|
| 658 |
+
if r.status_code in (200, 403):
|
| 659 |
+
# 403 is expected for app-only auth on /users/me
|
| 660 |
+
return jsonify({"ok": True, "message": "Bearer token accepted"})
|
| 661 |
+
return jsonify({"ok": False, "error": f"HTTP {r.status_code}"})
|
| 662 |
+
|
| 663 |
+
elif platform_id == "apify":
|
| 664 |
+
token = env.get("APIFY_API_TOKEN", "")
|
| 665 |
+
if not token:
|
| 666 |
+
return jsonify({"ok": False, "error": "No API token set"})
|
| 667 |
+
import requests as req
|
| 668 |
+
r = req.get(
|
| 669 |
+
"https://api.apify.com/v2/users/me",
|
| 670 |
+
headers={"Authorization": f"Bearer {token}"},
|
| 671 |
+
timeout=10,
|
| 672 |
+
)
|
| 673 |
+
if r.status_code == 200:
|
| 674 |
+
username = r.json().get("data", {}).get("username", "unknown")
|
| 675 |
+
return jsonify({"ok": True, "message": f"Connected as {username}"})
|
| 676 |
+
return jsonify({"ok": False, "error": f"HTTP {r.status_code}"})
|
| 677 |
+
|
| 678 |
+
else:
|
| 679 |
+
return jsonify({"ok": False, "error": "No test available for this platform"})
|
| 680 |
+
|
| 681 |
+
except Exception as e:
|
| 682 |
+
return jsonify({"ok": False, "error": str(e)})
|
| 683 |
+
|
| 684 |
+
|
| 685 |
+
@app.route("/api/entry/<int:entry_id>")
|
| 686 |
+
def api_entry(entry_id):
|
| 687 |
+
entry, styles = db.get_entry(entry_id)
|
| 688 |
+
if entry:
|
| 689 |
+
entry["styles"] = styles
|
| 690 |
+
return jsonify(entry)
|
| 691 |
+
return jsonify({"error": "Not found"}), 404
|
| 692 |
+
|
| 693 |
+
|
| 694 |
+
# --- Helper queries ---
|
| 695 |
+
|
| 696 |
+
def _browse_entries(topic=None, style=None, platform=None, days=None,
|
| 697 |
+
limit=50, offset=0):
|
| 698 |
+
"""Browse all entries, newest first, no tier filter."""
|
| 699 |
+
with db.get_db() as conn:
|
| 700 |
+
conditions = []
|
| 701 |
+
params = []
|
| 702 |
+
|
| 703 |
+
if days:
|
| 704 |
+
conditions.append("e.scraped_at >= datetime('now', ?)")
|
| 705 |
+
params.append(f"-{days} days")
|
| 706 |
+
|
| 707 |
+
if topic:
|
| 708 |
+
conditions.append("e.topic = ?")
|
| 709 |
+
params.append(topic)
|
| 710 |
+
|
| 711 |
+
if style:
|
| 712 |
+
conditions.append(
|
| 713 |
+
"""e.id IN (SELECT es.entry_id FROM entry_styles es
|
| 714 |
+
JOIN styles s ON s.id = es.style_id
|
| 715 |
+
WHERE s.name = ?)"""
|
| 716 |
+
)
|
| 717 |
+
params.append(style)
|
| 718 |
+
|
| 719 |
+
if platform:
|
| 720 |
+
conditions.append("e.platform = ?")
|
| 721 |
+
params.append(platform)
|
| 722 |
+
|
| 723 |
+
where = " AND ".join(conditions) if conditions else "1=1"
|
| 724 |
+
params.extend([limit, offset])
|
| 725 |
+
|
| 726 |
+
return conn.execute(
|
| 727 |
+
f"""SELECT e.*, s.display_name as source_name
|
| 728 |
+
FROM entries e
|
| 729 |
+
LEFT JOIN sources s ON e.source_id = s.id
|
| 730 |
+
WHERE {where}
|
| 731 |
+
ORDER BY e.scraped_at DESC, e.id DESC
|
| 732 |
+
LIMIT ? OFFSET ?""",
|
| 733 |
+
params,
|
| 734 |
+
).fetchall()
|
| 735 |
+
|
| 736 |
+
|
| 737 |
+
# ============================================================
|
| 738 |
+
# JOKE WORLD CUP
|
| 739 |
+
# ============================================================
|
| 740 |
+
|
| 741 |
+
WORLDCUP_VOTES_TO_CLOSE = 20 # matchup closes after this many total votes
|
| 742 |
+
ROUND_NAMES = {
|
| 743 |
+
1: "Round of 64", 2: "Round of 32", 3: "Round of 16",
|
| 744 |
+
4: "Quarter-Finals", 5: "Semi-Finals", 6: "Final",
|
| 745 |
+
}
|
| 746 |
+
|
| 747 |
+
|
| 748 |
+
def _get_voter_token():
|
| 749 |
+
"""Get or create a voter token from cookies."""
|
| 750 |
+
return request.cookies.get("voter_token") or str(uuid.uuid4())
|
| 751 |
+
|
| 752 |
+
|
| 753 |
+
def _tombola_pool_conditions():
|
| 754 |
+
"""Return the same quality filter conditions used by the tombola."""
|
| 755 |
+
EXCLUDED_SOURCES = ('McSweeneys', 'Private Eye')
|
| 756 |
+
conditions = [
|
| 757 |
+
"e.content_type IN ('joke', 'post', 'article')",
|
| 758 |
+
"LENGTH(e.text) < 300",
|
| 759 |
+
"(LENGTH(e.text) > 40 OR e.platform = 'curated')",
|
| 760 |
+
"e.platform NOT IN ('huggingface', 'extracted', 'rss')",
|
| 761 |
+
"COALESCE(s.display_name, '') NOT IN ({})".format(
|
| 762 |
+
",".join("?" for _ in EXCLUDED_SOURCES)),
|
| 763 |
+
"e.text NOT LIKE '%#%'",
|
| 764 |
+
"e.text NOT LIKE '%link in bio%'",
|
| 765 |
+
"e.text NOT LIKE '%link in the bio%'",
|
| 766 |
+
"e.text NOT LIKE '%follow us for more%'",
|
| 767 |
+
"e.text NOT LIKE '%Follow us for%'",
|
| 768 |
+
"e.text NOT LIKE '%Like this content?%'",
|
| 769 |
+
"e.text NOT LIKE 'Swipe %'",
|
| 770 |
+
]
|
| 771 |
+
return conditions, list(EXCLUDED_SOURCES)
|
| 772 |
+
|
| 773 |
+
|
| 774 |
+
def create_tournament(name=None, pool_filter="mixed", bracket_size=64):
|
| 775 |
+
"""Create a new tournament, seed it, and generate Round 1 matchups."""
|
| 776 |
+
total_rounds = int(math.log2(bracket_size))
|
| 777 |
+
|
| 778 |
+
with db.get_db() as conn:
|
| 779 |
+
if not name:
|
| 780 |
+
count = conn.execute("SELECT COUNT(*) FROM tournaments").fetchone()[0]
|
| 781 |
+
name = f"World Cup #{count + 1}"
|
| 782 |
+
|
| 783 |
+
conn.execute(
|
| 784 |
+
"""INSERT INTO tournaments (name, pool_filter, total_rounds, status, started_at)
|
| 785 |
+
VALUES (?, ?, ?, 'voting', CURRENT_TIMESTAMP)""",
|
| 786 |
+
(name, pool_filter, total_rounds),
|
| 787 |
+
)
|
| 788 |
+
tid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
| 789 |
+
|
| 790 |
+
# Get eligible jokes
|
| 791 |
+
conditions, params = _tombola_pool_conditions()
|
| 792 |
+
if pool_filter == "classics":
|
| 793 |
+
conditions.append("e.platform = 'curated'")
|
| 794 |
+
elif pool_filter == "fresh":
|
| 795 |
+
conditions.append("e.platform != 'curated'")
|
| 796 |
+
|
| 797 |
+
# Default: skip political for the tournament
|
| 798 |
+
conditions.append("(e.topic IS NULL OR e.topic NOT IN ('left_wing', 'right_wing'))")
|
| 799 |
+
|
| 800 |
+
where = " AND ".join(conditions)
|
| 801 |
+
# Check if any jokes have non-default ELO (i.e. have been in a tournament)
|
| 802 |
+
# If not, randomise seeding for a fresh tournament
|
| 803 |
+
has_elo = conn.execute("""
|
| 804 |
+
SELECT COUNT(*) FROM entries WHERE elo_rating != 1200 AND elo_rating IS NOT NULL
|
| 805 |
+
""").fetchone()[0]
|
| 806 |
+
order = "e.elo_rating DESC" if has_elo else "RANDOM()"
|
| 807 |
+
|
| 808 |
+
entries = conn.execute(f"""
|
| 809 |
+
SELECT e.id, e.elo_rating FROM entries e
|
| 810 |
+
LEFT JOIN sources s ON e.source_id = s.id
|
| 811 |
+
WHERE {where}
|
| 812 |
+
ORDER BY {order}
|
| 813 |
+
LIMIT ?
|
| 814 |
+
""", params + [bracket_size]).fetchall()
|
| 815 |
+
|
| 816 |
+
if len(entries) < bracket_size:
|
| 817 |
+
# Not enough jokes — use what we have, rounding down to power of 2
|
| 818 |
+
actual = 2 ** int(math.log2(len(entries)))
|
| 819 |
+
entries = entries[:actual]
|
| 820 |
+
total_rounds = int(math.log2(actual))
|
| 821 |
+
conn.execute("UPDATE tournaments SET total_rounds = ? WHERE id = ?",
|
| 822 |
+
(total_rounds, tid))
|
| 823 |
+
|
| 824 |
+
# Standard bracket seeding: #1 vs #N, #2 vs #N-1, etc.
|
| 825 |
+
n = len(entries)
|
| 826 |
+
matchups_data = []
|
| 827 |
+
for i in range(n // 2):
|
| 828 |
+
a = entries[i]
|
| 829 |
+
b = entries[n - 1 - i]
|
| 830 |
+
matchups_data.append((a["id"], b["id"], i))
|
| 831 |
+
|
| 832 |
+
for a_id, b_id, pos in matchups_data:
|
| 833 |
+
conn.execute(
|
| 834 |
+
"""INSERT INTO matchups
|
| 835 |
+
(tournament_id, round_number, bracket_position,
|
| 836 |
+
entry_a_id, entry_b_id, status)
|
| 837 |
+
VALUES (?, 1, ?, ?, ?, 'active')""",
|
| 838 |
+
(tid, pos, a_id, b_id),
|
| 839 |
+
)
|
| 840 |
+
|
| 841 |
+
return tid, len(entries)
|
| 842 |
+
|
| 843 |
+
|
| 844 |
+
def _advance_round(conn, tournament_id):
|
| 845 |
+
"""Check if current round is complete. If so, generate next round matchups."""
|
| 846 |
+
t = conn.execute("SELECT * FROM tournaments WHERE id = ?",
|
| 847 |
+
(tournament_id,)).fetchone()
|
| 848 |
+
if not t:
|
| 849 |
+
return
|
| 850 |
+
|
| 851 |
+
current_round = t["current_round"]
|
| 852 |
+
total_rounds = t["total_rounds"]
|
| 853 |
+
|
| 854 |
+
# Check if all matchups in current round are complete
|
| 855 |
+
pending = conn.execute(
|
| 856 |
+
"""SELECT COUNT(*) FROM matchups
|
| 857 |
+
WHERE tournament_id = ? AND round_number = ? AND status != 'complete'""",
|
| 858 |
+
(tournament_id, current_round),
|
| 859 |
+
).fetchone()[0]
|
| 860 |
+
|
| 861 |
+
if pending > 0:
|
| 862 |
+
return # Round not finished yet
|
| 863 |
+
|
| 864 |
+
if current_round >= total_rounds:
|
| 865 |
+
# Tournament is done — find the final winner
|
| 866 |
+
final = conn.execute(
|
| 867 |
+
"""SELECT winner_id FROM matchups
|
| 868 |
+
WHERE tournament_id = ? AND round_number = ?""",
|
| 869 |
+
(tournament_id, total_rounds),
|
| 870 |
+
).fetchone()
|
| 871 |
+
conn.execute(
|
| 872 |
+
"""UPDATE tournaments SET status = 'complete',
|
| 873 |
+
completed_at = CURRENT_TIMESTAMP, winner_entry_id = ?
|
| 874 |
+
WHERE id = ?""",
|
| 875 |
+
(final["winner_id"] if final else None, tournament_id),
|
| 876 |
+
)
|
| 877 |
+
return
|
| 878 |
+
|
| 879 |
+
# Advance to next round
|
| 880 |
+
next_round = current_round + 1
|
| 881 |
+
winners = conn.execute(
|
| 882 |
+
"""SELECT winner_id, bracket_position FROM matchups
|
| 883 |
+
WHERE tournament_id = ? AND round_number = ?
|
| 884 |
+
ORDER BY bracket_position""",
|
| 885 |
+
(tournament_id, current_round),
|
| 886 |
+
).fetchall()
|
| 887 |
+
|
| 888 |
+
# Pair winners: 0+1, 2+3, 4+5, etc.
|
| 889 |
+
for i in range(0, len(winners), 2):
|
| 890 |
+
if i + 1 < len(winners):
|
| 891 |
+
conn.execute(
|
| 892 |
+
"""INSERT INTO matchups
|
| 893 |
+
(tournament_id, round_number, bracket_position,
|
| 894 |
+
entry_a_id, entry_b_id, status)
|
| 895 |
+
VALUES (?, ?, ?, ?, ?, 'active')""",
|
| 896 |
+
(tournament_id, next_round, i // 2,
|
| 897 |
+
winners[i]["winner_id"], winners[i + 1]["winner_id"]),
|
| 898 |
+
)
|
| 899 |
+
|
| 900 |
+
conn.execute("UPDATE tournaments SET current_round = ? WHERE id = ?",
|
| 901 |
+
(next_round, tournament_id))
|
| 902 |
+
|
| 903 |
+
|
| 904 |
+
def _close_matchup(conn, matchup_id):
|
| 905 |
+
"""Close a matchup: determine winner, update ELO."""
|
| 906 |
+
m = conn.execute("SELECT * FROM matchups WHERE id = ?", (matchup_id,)).fetchone()
|
| 907 |
+
if not m or m["status"] == "complete":
|
| 908 |
+
return
|
| 909 |
+
|
| 910 |
+
winner_id = m["entry_a_id"] if m["votes_a"] >= m["votes_b"] else m["entry_b_id"]
|
| 911 |
+
loser_id = m["entry_b_id"] if winner_id == m["entry_a_id"] else m["entry_a_id"]
|
| 912 |
+
|
| 913 |
+
conn.execute(
|
| 914 |
+
"""UPDATE matchups SET status = 'complete', winner_id = ?,
|
| 915 |
+
closed_at = CURRENT_TIMESTAMP WHERE id = ?""",
|
| 916 |
+
(winner_id, matchup_id),
|
| 917 |
+
)
|
| 918 |
+
|
| 919 |
+
# Update ELO
|
| 920 |
+
w = conn.execute("SELECT elo_rating FROM entries WHERE id = ?", (winner_id,)).fetchone()
|
| 921 |
+
l = conn.execute("SELECT elo_rating FROM entries WHERE id = ?", (loser_id,)).fetchone()
|
| 922 |
+
if w and l:
|
| 923 |
+
new_w, new_l = calculate_elo(w["elo_rating"] or 1200, l["elo_rating"] or 1200)
|
| 924 |
+
conn.execute("UPDATE entries SET elo_rating = ? WHERE id = ?", (new_w, winner_id))
|
| 925 |
+
conn.execute("UPDATE entries SET elo_rating = ? WHERE id = ?", (new_l, loser_id))
|
| 926 |
+
|
| 927 |
+
_advance_round(conn, m["tournament_id"])
|
| 928 |
+
|
| 929 |
+
|
| 930 |
+
def _entry_json(conn, entry_id):
|
| 931 |
+
"""Get a joke's display data for the World Cup."""
|
| 932 |
+
row = conn.execute("""
|
| 933 |
+
SELECT e.id, e.text, e.author, e.platform, e.topic, e.elo_rating, e.image_url,
|
| 934 |
+
s.display_name as source_name
|
| 935 |
+
FROM entries e
|
| 936 |
+
LEFT JOIN sources s ON e.source_id = s.id
|
| 937 |
+
WHERE e.id = ?
|
| 938 |
+
""", (entry_id,)).fetchone()
|
| 939 |
+
if not row:
|
| 940 |
+
return None
|
| 941 |
+
result = {
|
| 942 |
+
"id": row["id"],
|
| 943 |
+
"text": row["text"],
|
| 944 |
+
"author": row["author"],
|
| 945 |
+
"source": row["source_name"] or row["platform"],
|
| 946 |
+
"elo": row["elo_rating"] or 1200,
|
| 947 |
+
}
|
| 948 |
+
if row["image_url"]:
|
| 949 |
+
result["image_url"] = row["image_url"]
|
| 950 |
+
return result
|
| 951 |
+
|
| 952 |
+
|
| 953 |
+
@app.route("/worldcup")
|
| 954 |
+
def worldcup_page():
|
| 955 |
+
"""The public Joke World Cup page."""
|
| 956 |
+
return render_template("worldcup.html")
|
| 957 |
+
|
| 958 |
+
|
| 959 |
+
@app.route("/api/worldcup/current")
|
| 960 |
+
def api_worldcup_current():
|
| 961 |
+
"""Get the active tournament state."""
|
| 962 |
+
with db.get_db() as conn:
|
| 963 |
+
t = conn.execute(
|
| 964 |
+
"SELECT * FROM tournaments WHERE status = 'voting' ORDER BY id DESC LIMIT 1"
|
| 965 |
+
).fetchone()
|
| 966 |
+
if not t:
|
| 967 |
+
t = conn.execute(
|
| 968 |
+
"SELECT * FROM tournaments ORDER BY id DESC LIMIT 1"
|
| 969 |
+
).fetchone()
|
| 970 |
+
if not t:
|
| 971 |
+
return jsonify({"tournament": None})
|
| 972 |
+
|
| 973 |
+
current_round = t["current_round"]
|
| 974 |
+
round_name = ROUND_NAMES.get(current_round, f"Round {current_round}")
|
| 975 |
+
|
| 976 |
+
matchups = conn.execute(
|
| 977 |
+
"""SELECT * FROM matchups
|
| 978 |
+
WHERE tournament_id = ? AND round_number = ?
|
| 979 |
+
ORDER BY bracket_position""",
|
| 980 |
+
(t["id"], current_round),
|
| 981 |
+
).fetchall()
|
| 982 |
+
|
| 983 |
+
matchup_list = []
|
| 984 |
+
for m in matchups:
|
| 985 |
+
matchup_list.append({
|
| 986 |
+
"id": m["id"],
|
| 987 |
+
"entry_a": _entry_json(conn, m["entry_a_id"]),
|
| 988 |
+
"entry_b": _entry_json(conn, m["entry_b_id"]),
|
| 989 |
+
"votes_a": m["votes_a"],
|
| 990 |
+
"votes_b": m["votes_b"],
|
| 991 |
+
"status": m["status"],
|
| 992 |
+
"winner_id": m["winner_id"],
|
| 993 |
+
})
|
| 994 |
+
|
| 995 |
+
return jsonify({
|
| 996 |
+
"tournament": {
|
| 997 |
+
"id": t["id"],
|
| 998 |
+
"name": t["name"],
|
| 999 |
+
"status": t["status"],
|
| 1000 |
+
"current_round": current_round,
|
| 1001 |
+
"total_rounds": t["total_rounds"],
|
| 1002 |
+
"round_name": round_name,
|
| 1003 |
+
"winner_entry_id": t["winner_entry_id"],
|
| 1004 |
+
},
|
| 1005 |
+
"matchups": matchup_list,
|
| 1006 |
+
})
|
| 1007 |
+
|
| 1008 |
+
|
| 1009 |
+
@app.route("/api/worldcup/matchup")
|
| 1010 |
+
def api_worldcup_matchup():
|
| 1011 |
+
"""Get the next unvoted matchup for this voter."""
|
| 1012 |
+
voter_token = _get_voter_token()
|
| 1013 |
+
|
| 1014 |
+
with db.get_db() as conn:
|
| 1015 |
+
t = conn.execute(
|
| 1016 |
+
"SELECT id FROM tournaments WHERE status = 'voting' ORDER BY id DESC LIMIT 1"
|
| 1017 |
+
).fetchone()
|
| 1018 |
+
if not t:
|
| 1019 |
+
return jsonify({"matchup": None, "message": "No active tournament"})
|
| 1020 |
+
|
| 1021 |
+
# Find an active matchup the voter hasn't voted on
|
| 1022 |
+
matchup = conn.execute("""
|
| 1023 |
+
SELECT m.* FROM matchups m
|
| 1024 |
+
WHERE m.tournament_id = ? AND m.status = 'active'
|
| 1025 |
+
AND m.id NOT IN (
|
| 1026 |
+
SELECT matchup_id FROM votes WHERE voter_token = ?
|
| 1027 |
+
)
|
| 1028 |
+
ORDER BY m.bracket_position
|
| 1029 |
+
LIMIT 1
|
| 1030 |
+
""", (t["id"], voter_token)).fetchone()
|
| 1031 |
+
|
| 1032 |
+
if not matchup:
|
| 1033 |
+
return jsonify({"matchup": None, "message": "You've voted on all current matchups!"})
|
| 1034 |
+
|
| 1035 |
+
result = {
|
| 1036 |
+
"matchup": {
|
| 1037 |
+
"id": matchup["id"],
|
| 1038 |
+
"round": matchup["round_number"],
|
| 1039 |
+
"entry_a": _entry_json(conn, matchup["entry_a_id"]),
|
| 1040 |
+
"entry_b": _entry_json(conn, matchup["entry_b_id"]),
|
| 1041 |
+
"votes_a": matchup["votes_a"],
|
| 1042 |
+
"votes_b": matchup["votes_b"],
|
| 1043 |
+
},
|
| 1044 |
+
"tournament_name": conn.execute(
|
| 1045 |
+
"SELECT name FROM tournaments WHERE id = ?", (t["id"],)
|
| 1046 |
+
).fetchone()["name"],
|
| 1047 |
+
}
|
| 1048 |
+
|
| 1049 |
+
resp = make_response(jsonify(result))
|
| 1050 |
+
resp.set_cookie("voter_token", voter_token, max_age=60*60*24*365, samesite="Lax")
|
| 1051 |
+
return resp
|
| 1052 |
+
|
| 1053 |
+
|
| 1054 |
+
@app.route("/api/worldcup/vote", methods=["POST"])
|
| 1055 |
+
def api_worldcup_vote():
|
| 1056 |
+
"""Cast a vote in a head-to-head matchup."""
|
| 1057 |
+
data = request.get_json()
|
| 1058 |
+
matchup_id = data.get("matchup_id")
|
| 1059 |
+
entry_id = data.get("entry_id")
|
| 1060 |
+
voter_token = _get_voter_token()
|
| 1061 |
+
voter_ip = request.remote_addr
|
| 1062 |
+
|
| 1063 |
+
if not matchup_id or not entry_id:
|
| 1064 |
+
return jsonify({"ok": False, "error": "matchup_id and entry_id required"}), 400
|
| 1065 |
+
|
| 1066 |
+
with db.get_db() as conn:
|
| 1067 |
+
m = conn.execute("SELECT * FROM matchups WHERE id = ?", (matchup_id,)).fetchone()
|
| 1068 |
+
if not m:
|
| 1069 |
+
return jsonify({"ok": False, "error": "Matchup not found"}), 404
|
| 1070 |
+
if m["status"] != "active":
|
| 1071 |
+
return jsonify({"ok": False, "error": "Matchup is closed"}), 400
|
| 1072 |
+
if entry_id not in (m["entry_a_id"], m["entry_b_id"]):
|
| 1073 |
+
return jsonify({"ok": False, "error": "Invalid entry for this matchup"}), 400
|
| 1074 |
+
|
| 1075 |
+
# IP rate limit: 200 votes per hour
|
| 1076 |
+
ip_count = conn.execute(
|
| 1077 |
+
"""SELECT COUNT(*) FROM votes
|
| 1078 |
+
WHERE voter_ip = ? AND voted_at > datetime('now', '-1 hour')""",
|
| 1079 |
+
(voter_ip,),
|
| 1080 |
+
).fetchone()[0]
|
| 1081 |
+
if ip_count >= 200:
|
| 1082 |
+
return jsonify({"ok": False, "error": "Rate limit exceeded"}), 429
|
| 1083 |
+
|
| 1084 |
+
# Insert vote (UNIQUE constraint prevents double-voting)
|
| 1085 |
+
try:
|
| 1086 |
+
conn.execute(
|
| 1087 |
+
"""INSERT INTO votes (matchup_id, entry_id, voter_token, voter_ip)
|
| 1088 |
+
VALUES (?, ?, ?, ?)""",
|
| 1089 |
+
(matchup_id, entry_id, voter_token, voter_ip),
|
| 1090 |
+
)
|
| 1091 |
+
except Exception:
|
| 1092 |
+
return jsonify({"ok": False, "error": "Already voted on this matchup"}), 409
|
| 1093 |
+
|
| 1094 |
+
# Increment vote count
|
| 1095 |
+
if entry_id == m["entry_a_id"]:
|
| 1096 |
+
conn.execute("UPDATE matchups SET votes_a = votes_a + 1 WHERE id = ?",
|
| 1097 |
+
(matchup_id,))
|
| 1098 |
+
else:
|
| 1099 |
+
conn.execute("UPDATE matchups SET votes_b = votes_b + 1 WHERE id = ?",
|
| 1100 |
+
(matchup_id,))
|
| 1101 |
+
|
| 1102 |
+
# Check if matchup should close
|
| 1103 |
+
updated = conn.execute("SELECT * FROM matchups WHERE id = ?",
|
| 1104 |
+
(matchup_id,)).fetchone()
|
| 1105 |
+
total_votes = updated["votes_a"] + updated["votes_b"]
|
| 1106 |
+
|
| 1107 |
+
if total_votes >= WORLDCUP_VOTES_TO_CLOSE:
|
| 1108 |
+
_close_matchup(conn, matchup_id)
|
| 1109 |
+
|
| 1110 |
+
# Re-fetch for response
|
| 1111 |
+
with db.get_db() as conn:
|
| 1112 |
+
updated = conn.execute("SELECT * FROM matchups WHERE id = ?",
|
| 1113 |
+
(matchup_id,)).fetchone()
|
| 1114 |
+
|
| 1115 |
+
resp_data = {
|
| 1116 |
+
"ok": True,
|
| 1117 |
+
"votes_a": updated["votes_a"],
|
| 1118 |
+
"votes_b": updated["votes_b"],
|
| 1119 |
+
"status": updated["status"],
|
| 1120 |
+
"winner_id": updated["winner_id"],
|
| 1121 |
+
}
|
| 1122 |
+
resp = make_response(jsonify(resp_data))
|
| 1123 |
+
resp.set_cookie("voter_token", voter_token, max_age=60*60*24*365, samesite="Lax")
|
| 1124 |
+
return resp
|
| 1125 |
+
|
| 1126 |
+
|
| 1127 |
+
@app.route("/api/worldcup/leaderboard")
|
| 1128 |
+
def api_worldcup_leaderboard():
|
| 1129 |
+
"""Top jokes by ELO rating."""
|
| 1130 |
+
with db.get_db() as conn:
|
| 1131 |
+
rows = conn.execute("""
|
| 1132 |
+
SELECT e.id, e.text, e.author, e.elo_rating, e.platform,
|
| 1133 |
+
s.display_name as source_name,
|
| 1134 |
+
(SELECT COUNT(*) FROM matchups
|
| 1135 |
+
WHERE (entry_a_id = e.id AND winner_id = e.id)
|
| 1136 |
+
OR (entry_b_id = e.id AND winner_id = e.id)) as wins,
|
| 1137 |
+
(SELECT COUNT(*) FROM matchups
|
| 1138 |
+
WHERE ((entry_a_id = e.id OR entry_b_id = e.id)
|
| 1139 |
+
AND status = 'complete' AND winner_id != e.id)) as losses
|
| 1140 |
+
FROM entries e
|
| 1141 |
+
LEFT JOIN sources s ON e.source_id = s.id
|
| 1142 |
+
WHERE e.elo_rating != 1200 OR e.id IN (
|
| 1143 |
+
SELECT entry_a_id FROM matchups UNION SELECT entry_b_id FROM matchups
|
| 1144 |
+
)
|
| 1145 |
+
ORDER BY e.elo_rating DESC
|
| 1146 |
+
LIMIT 50
|
| 1147 |
+
""").fetchall()
|
| 1148 |
+
|
| 1149 |
+
return jsonify([{
|
| 1150 |
+
"id": r["id"],
|
| 1151 |
+
"text": r["text"][:150],
|
| 1152 |
+
"author": r["author"],
|
| 1153 |
+
"source": r["source_name"] or r["platform"],
|
| 1154 |
+
"elo": r["elo_rating"] or 1200,
|
| 1155 |
+
"wins": r["wins"],
|
| 1156 |
+
"losses": r["losses"],
|
| 1157 |
+
} for r in rows])
|
| 1158 |
+
|
| 1159 |
+
|
| 1160 |
+
@app.route("/api/worldcup/create", methods=["POST"])
|
| 1161 |
+
def api_worldcup_create():
|
| 1162 |
+
"""Create a new tournament."""
|
| 1163 |
+
data = request.get_json() or {}
|
| 1164 |
+
name = data.get("name")
|
| 1165 |
+
pool_filter = data.get("pool_filter", "mixed")
|
| 1166 |
+
bracket_size = min(data.get("bracket_size", 64), 128)
|
| 1167 |
+
|
| 1168 |
+
tid, size = create_tournament(name=name, pool_filter=pool_filter,
|
| 1169 |
+
bracket_size=bracket_size)
|
| 1170 |
+
return jsonify({"ok": True, "tournament_id": tid, "bracket_size": size})
|
| 1171 |
+
|
| 1172 |
+
|
| 1173 |
+
@app.route("/api/worldcup/bracket")
|
| 1174 |
+
def api_worldcup_bracket():
|
| 1175 |
+
"""Get full bracket tree for a tournament."""
|
| 1176 |
+
tid = request.args.get("tournament_id")
|
| 1177 |
+
|
| 1178 |
+
with db.get_db() as conn:
|
| 1179 |
+
if not tid:
|
| 1180 |
+
t = conn.execute(
|
| 1181 |
+
"SELECT id FROM tournaments ORDER BY id DESC LIMIT 1"
|
| 1182 |
+
).fetchone()
|
| 1183 |
+
if not t:
|
| 1184 |
+
return jsonify({"rounds": []})
|
| 1185 |
+
tid = t["id"]
|
| 1186 |
+
|
| 1187 |
+
t = conn.execute("SELECT * FROM tournaments WHERE id = ?", (tid,)).fetchone()
|
| 1188 |
+
if not t:
|
| 1189 |
+
return jsonify({"rounds": []})
|
| 1190 |
+
|
| 1191 |
+
rounds = []
|
| 1192 |
+
for r in range(1, t["total_rounds"] + 1):
|
| 1193 |
+
matchups = conn.execute(
|
| 1194 |
+
"""SELECT * FROM matchups
|
| 1195 |
+
WHERE tournament_id = ? AND round_number = ?
|
| 1196 |
+
ORDER BY bracket_position""",
|
| 1197 |
+
(tid, r),
|
| 1198 |
+
).fetchall()
|
| 1199 |
+
|
| 1200 |
+
round_data = {
|
| 1201 |
+
"round": r,
|
| 1202 |
+
"name": ROUND_NAMES.get(r, f"Round {r}"),
|
| 1203 |
+
"matchups": [],
|
| 1204 |
+
}
|
| 1205 |
+
for m in matchups:
|
| 1206 |
+
round_data["matchups"].append({
|
| 1207 |
+
"id": m["id"],
|
| 1208 |
+
"entry_a": _entry_json(conn, m["entry_a_id"]),
|
| 1209 |
+
"entry_b": _entry_json(conn, m["entry_b_id"]),
|
| 1210 |
+
"votes_a": m["votes_a"],
|
| 1211 |
+
"votes_b": m["votes_b"],
|
| 1212 |
+
"status": m["status"],
|
| 1213 |
+
"winner_id": m["winner_id"],
|
| 1214 |
+
})
|
| 1215 |
+
rounds.append(round_data)
|
| 1216 |
+
|
| 1217 |
+
return jsonify({
|
| 1218 |
+
"tournament": {
|
| 1219 |
+
"id": t["id"],
|
| 1220 |
+
"name": t["name"],
|
| 1221 |
+
"status": t["status"],
|
| 1222 |
+
"winner_entry_id": t["winner_entry_id"],
|
| 1223 |
+
},
|
| 1224 |
+
"rounds": rounds,
|
| 1225 |
+
})
|
| 1226 |
+
|
| 1227 |
+
|
| 1228 |
+
if __name__ == "__main__":
|
| 1229 |
+
print(f"Database: {DB_PATH}")
|
| 1230 |
+
print(f"Starting at http://localhost:5050")
|
| 1231 |
+
app.run(debug=True, port=5050)
|