bikashpatra commited on
Commit
613d3bf
·
1 Parent(s): 2d00be3

first commit

Browse files
Files changed (3) hide show
  1. README.md +7 -7
  2. app.py +102 -0
  3. requirements.txt +10 -0
README.md CHANGED
@@ -1,14 +1,14 @@
1
  ---
2
- title: Indic Translate
3
- emoji: 📚
4
  colorFrom: blue
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 6.1.0
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
- short_description: 'Indic Translation using ai4bharat/indictrans2-en-indic-dist '
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: IndicTrans2 Translation Demo
3
+ emoji: 🌏
4
  colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
+ # IndicTrans2 Translation Demo
13
+
14
+ Test English to Indic language translation using IndicTrans2.
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
+ from IndicTransToolkit.processor import IndicProcessor
5
+
6
+ # Choose device - HF Spaces have free CPU tier, or upgrade for GPU
7
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
8
+
9
+ # Load the distilled model for faster inference
10
+ MODEL_NAME = "ai4bharat/indictrans2-en-indic-dist-200M"
11
+
12
+ @gr.cache
13
+ def load_model():
14
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
15
+ model = AutoModelForSeq2SeqLM.from_pretrained(
16
+ MODEL_NAME,
17
+ trust_remote_code=True,
18
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
19
+ ).to(DEVICE)
20
+ ip = IndicProcessor(inference=True)
21
+ return tokenizer, model, ip
22
+
23
+ tokenizer, model, ip = load_model()
24
+
25
+ # Language mapping
26
+ LANGUAGES = {
27
+ "Hindi": "hin_Deva",
28
+ "Tamil": "tam_Taml",
29
+ "Telugu": "tel_Telu",
30
+ "Bengali": "ben_Beng",
31
+ "Marathi": "mar_Deva",
32
+ "Gujarati": "guj_Gujr",
33
+ "Kannada": "kan_Knda",
34
+ "Malayalam": "mal_Mlym",
35
+ "Punjabi": "pan_Guru",
36
+ "Oriya": "ory_Orya"
37
+ }
38
+
39
+ def translate(text, target_lang):
40
+ if not text.strip():
41
+ return "Please enter some text to translate."
42
+
43
+ # Preprocess
44
+ batch = ip.preprocess_batch(
45
+ [text],
46
+ src_lang="eng_Latn",
47
+ tgt_lang=LANGUAGES[target_lang]
48
+ )
49
+
50
+ # Tokenize
51
+ inputs = tokenizer(
52
+ batch,
53
+ truncation=True,
54
+ padding="longest",
55
+ max_length=256,
56
+ return_tensors="pt"
57
+ ).to(DEVICE)
58
+
59
+ # Generate
60
+ with torch.inference_mode():
61
+ outputs = model.generate(
62
+ **inputs,
63
+ num_beams=5,
64
+ max_length=256
65
+ )
66
+
67
+ # Decode
68
+ decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
69
+
70
+ # Postprocess
71
+ translations = ip.postprocess_batch(decoded, lang=LANGUAGES[target_lang])
72
+
73
+ return translations[0]
74
+
75
+ # Create Gradio interface
76
+ demo = gr.Interface(
77
+ fn=translate,
78
+ inputs=[
79
+ gr.Textbox(
80
+ label="English Text",
81
+ placeholder="Enter English text to translate...",
82
+ lines=5
83
+ ),
84
+ gr.Dropdown(
85
+ choices=list(LANGUAGES.keys()),
86
+ label="Target Language",
87
+ value="Hindi"
88
+ )
89
+ ],
90
+ outputs=gr.Textbox(label="Translation", lines=5),
91
+ title="IndicTrans2 Translation Demo",
92
+ description="Translate English text to Indian languages using IndicTrans2",
93
+ examples=[
94
+ ["Hello, how are you?", "Hindi"],
95
+ ["The weather is beautiful today.", "Tamil"],
96
+ ["I love learning new languages.", "Bengali"]
97
+ ],
98
+ cache_examples=False
99
+ )
100
+
101
+ if __name__ == "__main__":
102
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers==4.53.2
3
+ gradio
4
+ sentencepiece
5
+ nltk
6
+ sacremoses
7
+ pandas
8
+ regex
9
+ IndicTransToolkit
10
+ accelerate