File size: 934 Bytes
13812dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
-- Curated view: English words with etymology, no phrases/proper nouns
-- Filter out sense=NULL entries which are often garbage (e.g., suffix entries
-- like "-er" with corrupted links to unrelated words like "asteroid belt")
-- Paper notes 40% of EtymDB lacks glosses; our curated set is 99% with sense
CREATE OR REPLACE VIEW v_english_curated AS
SELECT DISTINCT w.*
FROM words w
JOIN links l ON w.word_ix = l.source
WHERE w.lang = 'en'
  AND is_clean_word(w.lexeme)
  AND w.sense IS NOT NULL;

-- View for words with "deep" etymology (at least one link to a real word)
-- Excludes compound-only words where all links point to sequences (negative IDs)
-- Also excludes sense=NULL entries (same rationale as v_english_curated)
CREATE OR REPLACE VIEW v_english_deep AS
SELECT DISTINCT w.*
FROM words w
JOIN links l ON w.word_ix = l.source
WHERE w.lang = 'en'
  AND is_clean_word(w.lexeme)
  AND w.sense IS NOT NULL
  AND l.target > 0;