Indus_3.0 / app.py
Sam-Orion
Indus 3.0 Demo
5946fc0
import os
import tarfile
from huggingface_hub import hf_hub_download
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
# 1. Download the tar.gz archive
archive_path = hf_hub_download(
repo_id="SamOrion/Llama_3.2_3b_Hindi_Pruned",
filename="llama-3.2-3b-hindi-pruned.tar.gz",
repo_type="model",
)
# 2. Extract into './model', stripping the top-level folder
extract_dir = "./model"
os.makedirs(extract_dir, exist_ok=True)
with tarfile.open(archive_path, "r:gz") as tar:
for member in tar.getmembers():
# Skip the first path component (the folder name)
parts = member.name.split("/", 1)
if len(parts) == 2:
member.name = parts[1]
tar.extract(member, path=extract_dir)
# 3. Verify that config.json is at ./model/config.json
config_path = os.path.join(extract_dir, "config.json")
if not os.path.isfile(config_path):
raise FileNotFoundError(f"config.json not found in {extract_dir}")
# 4. Load tokenizer and model straight from './model'
tokenizer = AutoTokenizer.from_pretrained(extract_dir)
model = AutoModelForCausalLM.from_pretrained(
extract_dir,
torch_dtype="auto",
device_map="auto",
low_cpu_mem_usage=True,
)
# 5. Define chat function using OpenAI-style messages
def chat_fn(prompt, history):
history = history or []
history.append({"role": "user", "content": prompt})
# Tokenize and send to device
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
input_length = inputs["input_ids"].shape[1]
# Generate full-sequence tokens (prompt + continuation)
outputs = model.generate(**inputs, max_new_tokens=100)
# Remove the prompt tokens from the start
generated_ids = outputs[0][input_length:]
response = tokenizer.decode(generated_ids, skip_special_tokens=True)
history.append({"role": "assistant", "content": response})
return history, ""
# 6. Build Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## 🌐 Indus 3.0 Hindi LLM Demo")
chat = gr.Chatbot(type="messages")
msg = gr.Textbox(placeholder="Type here…")
clear = gr.Button("Clear")
msg.submit(chat_fn, [msg, chat], [chat, msg])
clear.click(lambda: ([], ""), None, [chat, msg])
# 7. Launch without `share=True` on Spaces
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False)