Sam-Orion
commited on
Commit
·
5946fc0
1
Parent(s):
8d1c7ea
Indus 3.0 Demo
Browse files
app.py
CHANGED
|
@@ -42,13 +42,21 @@ def chat_fn(prompt, history):
|
|
| 42 |
history = history or []
|
| 43 |
history.append({"role": "user", "content": prompt})
|
| 44 |
|
|
|
|
| 45 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
|
|
|
|
|
|
|
|
|
| 46 |
outputs = model.generate(**inputs, max_new_tokens=100)
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
history.append({"role": "assistant", "content": response})
|
| 50 |
return history, ""
|
| 51 |
|
|
|
|
| 52 |
# 6. Build Gradio interface
|
| 53 |
with gr.Blocks() as demo:
|
| 54 |
gr.Markdown("## 🌐 Indus 3.0 Hindi LLM Demo")
|
|
|
|
| 42 |
history = history or []
|
| 43 |
history.append({"role": "user", "content": prompt})
|
| 44 |
|
| 45 |
+
# Tokenize and send to device
|
| 46 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 47 |
+
input_length = inputs["input_ids"].shape[1]
|
| 48 |
+
|
| 49 |
+
# Generate full-sequence tokens (prompt + continuation)
|
| 50 |
outputs = model.generate(**inputs, max_new_tokens=100)
|
| 51 |
+
|
| 52 |
+
# Remove the prompt tokens from the start
|
| 53 |
+
generated_ids = outputs[0][input_length:]
|
| 54 |
+
response = tokenizer.decode(generated_ids, skip_special_tokens=True)
|
| 55 |
|
| 56 |
history.append({"role": "assistant", "content": response})
|
| 57 |
return history, ""
|
| 58 |
|
| 59 |
+
|
| 60 |
# 6. Build Gradio interface
|
| 61 |
with gr.Blocks() as demo:
|
| 62 |
gr.Markdown("## 🌐 Indus 3.0 Hindi LLM Demo")
|