File size: 934 Bytes
13812dc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | -- Curated view: English words with etymology, no phrases/proper nouns
-- Filter out sense=NULL entries which are often garbage (e.g., suffix entries
-- like "-er" with corrupted links to unrelated words like "asteroid belt")
-- Paper notes 40% of EtymDB lacks glosses; our curated set is 99% with sense
CREATE OR REPLACE VIEW v_english_curated AS
SELECT DISTINCT w.*
FROM words w
JOIN links l ON w.word_ix = l.source
WHERE w.lang = 'en'
AND is_clean_word(w.lexeme)
AND w.sense IS NOT NULL;
-- View for words with "deep" etymology (at least one link to a real word)
-- Excludes compound-only words where all links point to sequences (negative IDs)
-- Also excludes sense=NULL entries (same rationale as v_english_curated)
CREATE OR REPLACE VIEW v_english_deep AS
SELECT DISTINCT w.*
FROM words w
JOIN links l ON w.word_ix = l.source
WHERE w.lang = 'en'
AND is_clean_word(w.lexeme)
AND w.sense IS NOT NULL
AND l.target > 0;
|