import os import tarfile from huggingface_hub import hf_hub_download from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr # 1. Download the tar.gz archive archive_path = hf_hub_download( repo_id="SamOrion/Llama_3.2_3b_Hindi_Pruned", filename="llama-3.2-3b-hindi-pruned.tar.gz", repo_type="model", ) # 2. Extract into './model', stripping the top-level folder extract_dir = "./model" os.makedirs(extract_dir, exist_ok=True) with tarfile.open(archive_path, "r:gz") as tar: for member in tar.getmembers(): # Skip the first path component (the folder name) parts = member.name.split("/", 1) if len(parts) == 2: member.name = parts[1] tar.extract(member, path=extract_dir) # 3. Verify that config.json is at ./model/config.json config_path = os.path.join(extract_dir, "config.json") if not os.path.isfile(config_path): raise FileNotFoundError(f"config.json not found in {extract_dir}") # 4. Load tokenizer and model straight from './model' tokenizer = AutoTokenizer.from_pretrained(extract_dir) model = AutoModelForCausalLM.from_pretrained( extract_dir, torch_dtype="auto", device_map="auto", low_cpu_mem_usage=True, ) # 5. Define chat function using OpenAI-style messages def chat_fn(prompt, history): history = history or [] history.append({"role": "user", "content": prompt}) # Tokenize and send to device inputs = tokenizer(prompt, return_tensors="pt").to(model.device) input_length = inputs["input_ids"].shape[1] # Generate full-sequence tokens (prompt + continuation) outputs = model.generate(**inputs, max_new_tokens=100) # Remove the prompt tokens from the start generated_ids = outputs[0][input_length:] response = tokenizer.decode(generated_ids, skip_special_tokens=True) history.append({"role": "assistant", "content": response}) return history, "" # 6. Build Gradio interface with gr.Blocks() as demo: gr.Markdown("## 🌐 Indus 3.0 Hindi LLM Demo") chat = gr.Chatbot(type="messages") msg = gr.Textbox(placeholder="Type here…") clear = gr.Button("Clear") msg.submit(chat_fn, [msg, chat], [chat, msg]) clear.click(lambda: ([], ""), None, [chat, msg]) # 7. Launch without `share=True` on Spaces if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False)