Navya-Sree commited on
Commit
3b7e3cf
·
verified ·
1 Parent(s): bddb49d

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +21 -10
config.py CHANGED
@@ -1,11 +1,22 @@
1
  # Model configuration
2
- MODEL_CONFIG = {
3
- "base_model": "facebook/m2m100_418M",
4
- "cultural_token_id": 250001,
5
- "max_length": 200,
6
- "supported_langs": {
7
- "ay": {"name": "Aymara", "status": "Vulnerable"},
8
- "chr": {"name": "Cherokee", "status": "Endangered"},
9
- "qu": {"name": "Quechua", "status": "Vulnerable"}
10
- }
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Model configuration
2
+ MODEL_NAME = "facebook/m2m100_1.2B"
3
+
4
+ # Language mapping with UNESCO status
5
+ LANGUAGE_MAPPING = {
6
+ "English": {"code": "en", "status": "Safe"},
7
+ "Spanish": {"code": "es", "status": "Safe"},
8
+ "French": {"code": "fr", "status": "Safe"},
9
+ "Quechua": {"code": "qu", "status": "Vulnerable"},
10
+ "Aymara": {"code": "ay", "status": "Vulnerable"},
11
+ "Cherokee": {"code": "chr", "status": "Endangered"},
12
+ "Navajo": {"code": "nv", "status": "Vulnerable"},
13
+ "Inuktitut": {"code": "iu", "status": "Vulnerable"},
14
+ "Sami": {"code": "se", "status": "Endangered"},
15
+ "Welsh": {"code": "cy", "status": "Vulnerable"}
16
+ }
17
+
18
+ # Endangered languages for target selection
19
+ ENDANGERED_LANGS = [
20
+ "Quechua", "Aymara", "Cherokee",
21
+ "Navajo", "Inuktitut", "Sami", "Welsh"
22
+ ]