| -- Curated view: English words with etymology, no phrases/proper nouns | |
| -- Filter out sense=NULL entries which are often garbage (e.g., suffix entries | |
| -- like "-er" with corrupted links to unrelated words like "asteroid belt") | |
| -- Paper notes 40% of EtymDB lacks glosses; our curated set is 99% with sense | |
| CREATE OR REPLACE VIEW v_english_curated AS | |
| SELECT DISTINCT w.* | |
| FROM words w | |
| JOIN links l ON w.word_ix = l.source | |
| WHERE w.lang = 'en' | |
| AND is_clean_word(w.lexeme) | |
| AND w.sense IS NOT NULL; | |
| -- View for words with "deep" etymology (at least one link to a real word) | |
| -- Excludes compound-only words where all links point to sequences (negative IDs) | |
| -- Also excludes sense=NULL entries (same rationale as v_english_curated) | |
| CREATE OR REPLACE VIEW v_english_deep AS | |
| SELECT DISTINCT w.* | |
| FROM words w | |
| JOIN links l ON w.word_ix = l.source | |
| WHERE w.lang = 'en' | |
| AND is_clean_word(w.lexeme) | |
| AND w.sense IS NOT NULL | |
| AND l.target > 0; | |