Sumandas990 commited on
Commit
5d6e58d
·
verified ·
1 Parent(s): e971213

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr, torch, threading
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
4
+
5
+ # --- Model config ---
6
+ MODEL_ID = "md-nishat-008/TigerLLM-1B-it" # Bangla-optimized, instruction-tuned
7
+ DEVICE = 0 if torch.cuda.is_available() else -1
8
+
9
+ SYSTEM_BN = "তুমি একজন সহায়ক বাংলা সহকারী। সংক্ষিপ্ত, স্পষ্ট এবং ভদ্রভাবে উত্তর দাও।"
10
+ SYSTEM_HI = "तुम एक सहायक हिन्दी सहायक हो। स्पष्ट और संक्षिप्त उत्तर दो। (यह मॉडल बांग्ला-केंद्रित है, इसलिए हिन्दी गुणवत्ता सीमित हो सकती है।)"
11
+
12
+ # --- Load once at startup (reduces first-turn stall) ---
13
+ tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ MODEL_ID,
16
+ torch_dtype=torch.float32, # CPU-friendly on free Spaces
17
+ low_cpu_mem_usage=True,
18
+ device_map="auto" if DEVICE != -1 else None,
19
+ )
20
+ if DEVICE != -1:
21
+ model = model.to("cuda")
22
+
23
+ def build_prompt(history, lang):
24
+ sys = SYSTEM_BN if lang == "bn" else SYSTEM_HI
25
+ parts = [f"<<SYS>> {sys} </SYS>"]
26
+ for user, bot in history:
27
+ if user:
28
+ parts.append(f"<<USER>> {user}")
29
+ if bot:
30
+ parts.append(f"<<ASSISTANT>> {bot}")
31
+ parts.append("<<ASSISTANT>>")
32
+ return "\n".join(parts)
33
+
34
+ def on_user(user_text, chat_state):
35
+ if not user_text.strip():
36
+ return "", chat_state
37
+ return "", chat_state + [(user_text, None)]
38
+
39
+ def on_bot_stream(chat_state, lang_label):
40
+ # lang_label is "বাংলা" or "हिन्दी"
41
+ lang = "bn" if lang_label == "বাংলা" else "hi"
42
+ last_user, _ = chat_state[-1]
43
+
44
+ prompt = build_prompt(chat_state, lang)
45
+ inputs = tok(prompt, return_tensors="pt")
46
+ if DEVICE != -1:
47
+ inputs = {k: v.to("cuda") for k, v in inputs.items()}
48
+
49
+ streamer = TextIteratorStreamer(tok, skip_prompt=True, skip_special_tokens=True)
50
+ gen_kwargs = dict(
51
+ **inputs,
52
+ max_new_tokens=160, # keep short for speed
53
+ do_sample=True,
54
+ temperature=0.6,
55
+ top_p=0.9,
56
+ repetition_penalty=1.05,
57
+ eos_token_id=tok.eos_token_id,
58
+ pad_token_id=tok.eos_token_id,
59
+ streamer=streamer,
60
+ )
61
+
62
+ thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
63
+ thread.start()
64
+
65
+ partial = ""
66
+ for new_text in streamer:
67
+ partial += new_text
68
+ clean = partial.replace("<<SYS>>", "").replace("<<USER>>", "").replace("<<ASSISTANT>>", "")
69
+ chat_state[-1] = (last_user, clean[:2000])
70
+ yield chat_state
71
+
72
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
73
+ gr.Markdown("### Suman AI Project — বাংলা / हिंदी")
74
+ lang = gr.Radio(choices=["বাংলা", "हिन्दी"], value="বাংলা", label="Language")
75
+ # 'tuples' shows a deprecation warning but works fine on Gradio v5
76
+ chat = gr.Chatbot(height=420, type="tuples")
77
+ txt = gr.Textbox(placeholder="বাংলা বা हिन्दी লিখুন…", autofocus=True)
78
+ send = gr.Button("Send")
79
+
80
+ txt.submit(on_user, [txt, chat], [txt, chat]).then(on_bot_stream, [chat, lang], [chat])
81
+ send.click(on_user, [txt, chat], [txt, chat]).then(on_bot_stream, [chat, lang], [chat])
82
+
83
+ # Gradio v5: queue() takes no kwargs; just enable and launch
84
+ demo.queue().launch()