NitinBot001 commited on
Commit
9598bf5
Β·
verified Β·
1 Parent(s): 938b272

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +165 -0
Dockerfile ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LibreTranslate Dockerfile with all 22 Indian Regional Languages
2
+ FROM libretranslate/libretranslate:latest
3
+
4
+ # Set environment variables for Indian languages
5
+ ENV LT_HOST=0.0.0.0
6
+ ENV LT_PORT=7860
7
+ ENV LT_THREADS=6
8
+ ENV LT_CHAR_LIMIT=20000
9
+ ENV LT_LOAD_ONLY=en,hi,bn,te,mr,ta,ur,gu,kn,ml,or,pa,as,mai,mag,bh,sa,ne,ks,sd,kok,mni,sat,doi,bo
10
+
11
+ # Create necessary directories
12
+ USER root
13
+ RUN mkdir -p /app/db /home/libretranslate/.local/share/argos-translate/packages
14
+ RUN chown -R libretranslate:libretranslate /app/db /home/libretranslate/.local/share/argos-translate/packages
15
+
16
+ # Switch back to libretranslate user
17
+ USER libretranslate
18
+
19
+ # Pre-install all 22 Indian regional languages plus English
20
+ RUN python -c "
21
+ import argostranslate.package
22
+ import argostranslate.translate
23
+
24
+ # Download and install language packages
25
+ available_packages = argostranslate.package.get_available_packages()
26
+
27
+ # All 22 official Indian languages (ISO 639-1/639-3 codes)
28
+ indian_languages = [
29
+ 'hi', # Hindi
30
+ 'bn', # Bengali
31
+ 'te', # Telugu
32
+ 'mr', # Marathi
33
+ 'ta', # Tamil
34
+ 'ur', # Urdu
35
+ 'gu', # Gujarati
36
+ 'kn', # Kannada
37
+ 'ml', # Malayalam
38
+ 'or', # Odia
39
+ 'pa', # Punjabi
40
+ 'as', # Assamese
41
+ 'mai', # Maithili
42
+ 'mag', # Magahi
43
+ 'bh', # Bhojpuri
44
+ 'sa', # Sanskrit
45
+ 'ne', # Nepali
46
+ 'ks', # Kashmiri
47
+ 'sd', # Sindhi
48
+ 'kok', # Konkani
49
+ 'mni', # Manipuri
50
+ 'sat', # Santali
51
+ 'doi', # Dogri
52
+ 'bo', # Bodo
53
+ ]
54
+
55
+ # Create comprehensive language pairs
56
+ language_pairs = []
57
+
58
+ # Primary focus: English to/from all Indian languages (PRIORITY PAIRS)
59
+ print('Adding English <-> Indian language pairs...')
60
+ for lang in indian_languages:
61
+ language_pairs.extend([
62
+ ('en', lang), # English to Indian language - HIGH PRIORITY
63
+ (lang, 'en'), # Indian language to English - HIGH PRIORITY
64
+ ])
65
+
66
+ # Add ALL possible inter-Indian language combinations
67
+ print('Adding inter-Indian language pairs...')
68
+ for i, lang1 in enumerate(indian_languages):
69
+ for j, lang2 in enumerate(indian_languages):
70
+ if i != j: # Don't add same language pairs
71
+ language_pairs.append((lang1, lang2))
72
+
73
+ # Additional international language pairs with Indian languages
74
+ international_languages = ['es', 'fr', 'de', 'it', 'pt', 'ru', 'zh', 'ja', 'ar', 'ko']
75
+ print('Adding international language pairs with Indian languages...')
76
+ for indian_lang in indian_languages:
77
+ for intl_lang in international_languages:
78
+ language_pairs.extend([
79
+ (indian_lang, intl_lang), # Indian to International
80
+ (intl_lang, indian_lang), # International to Indian
81
+ ])
82
+
83
+ # Install available packages with priority system
84
+ installed_count = 0
85
+ failed_count = 0
86
+ priority_installed = 0
87
+
88
+ print('Starting installation of language models...')
89
+ print(f'Total language pairs to attempt: {len(language_pairs)}')
90
+ print('Priority: English <-> Indian languages first')
91
+
92
+ # Separate English-Indian pairs for priority installation
93
+ english_indian_pairs = [(f, t) for f, t in language_pairs if 'en' in (f, t)]
94
+ other_pairs = [(f, t) for f, t in language_pairs if 'en' not in (f, t)]
95
+
96
+ print(f'English-Indian priority pairs: {len(english_indian_pairs)}')
97
+ print(f'Other language pairs: {len(other_pairs)}')
98
+
99
+ # Install English-Indian pairs first (PRIORITY)
100
+ print('\\n=== Installing Priority Pairs (English <-> Indian) ===')
101
+ for from_code, to_code in english_indian_pairs:
102
+ package_to_install = next(
103
+ (pkg for pkg in available_packages
104
+ if pkg.from_code == from_code and pkg.to_code == to_code),
105
+ None
106
+ )
107
+ if package_to_install:
108
+ try:
109
+ argostranslate.package.install_from_path(package_to_install.download())
110
+ print(f'βœ“ PRIORITY: Successfully installed {from_code}->{to_code}')
111
+ installed_count += 1
112
+ priority_installed += 1
113
+ except Exception as e:
114
+ print(f'βœ— PRIORITY: Failed to install {from_code}->{to_code}: {e}')
115
+ failed_count += 1
116
+ else:
117
+ print(f'⚠ PRIORITY: Package not available: {from_code}->{to_code}')
118
+ failed_count += 1
119
+
120
+ # Install other pairs (inter-Indian and international)
121
+ print('\\n=== Installing Additional Pairs (Inter-Indian & International) ===')
122
+ for from_code, to_code in other_pairs:
123
+ package_to_install = next(
124
+ (pkg for pkg in available_packages
125
+ if pkg.from_code == from_code and pkg.to_code == to_code),
126
+ None
127
+ )
128
+ if package_to_install:
129
+ try:
130
+ argostranslate.package.install_from_path(package_to_install.download())
131
+ print(f'βœ“ Successfully installed {from_code}->{to_code}')
132
+ installed_count += 1
133
+ except Exception as e:
134
+ print(f'βœ— Failed to install {from_code}->{to_code}: {e}')
135
+ failed_count += 1
136
+ else:
137
+ # Don't print warnings for unavailable inter-language pairs to reduce noise
138
+ failed_count += 1
139
+
140
+ print(f'\\n=== Final Installation Summary ===')
141
+ print(f'Priority pairs (English <-> Indian): {priority_installed} installed')
142
+ print(f'Total successfully installed: {installed_count} language pairs')
143
+ print(f'Failed or unavailable: {failed_count} language pairs')
144
+ print(f'\\n🎯 FOCUS: English to Indian Regional Languages Translation Server Ready!')
145
+ print(f'πŸ“‹ Supported: All available English <-> Indian language pairs')
146
+ print(f'🌐 Bonus: Inter-Indian and international language pairs where available')
147
+ print(f'\\n⚠️ Note: Some regional languages may not be available in Argos Translate yet.')
148
+ print(f'βœ… Available languages will be loaded automatically on server start.')
149
+ print(f'πŸš€ Server optimized for English to Indian regional language translation!')
150
+ "
151
+
152
+ # Expose port
153
+ EXPOSE 5000
154
+
155
+ # Health check optimized for Indian languages
156
+ HEALTHCHECK --interval=30s --timeout=15s --start-period=60s --retries=3 \
157
+ CMD curl -f http://localhost:5000/health || exit 1
158
+
159
+ # Default command with Indian language optimization
160
+ CMD ["libretranslate", \
161
+ "--host", "0.0.0.0", \
162
+ "--port", "7860", \
163
+ "--threads", "6", \
164
+ "--char-limit", "20000", \
165
+ "--load-only", "en,hi,bn,te,mr,ta,ur,gu,kn,ml,or,pa,as,ne,sa"]