Spaces:

SamOrion
/

Indus_3.0

Sleeping

Indus_3.0 / app.py

Sam-Orion

Indus 3.0 Demo

5946fc0 7 months ago

2.4 kB

	import os
	import tarfile
	from huggingface_hub import hf_hub_download
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import gradio as gr

	# 1. Download the tar.gz archive
	archive_path = hf_hub_download(
	repo_id="SamOrion/Llama_3.2_3b_Hindi_Pruned",
	filename="llama-3.2-3b-hindi-pruned.tar.gz",
	repo_type="model",
	)

	# 2. Extract into './model', stripping the top-level folder
	extract_dir = "./model"
	os.makedirs(extract_dir, exist_ok=True)

	with tarfile.open(archive_path, "r:gz") as tar:
	for member in tar.getmembers():
	# Skip the first path component (the folder name)
	parts = member.name.split("/", 1)
	if len(parts) == 2:
	member.name = parts[1]
	tar.extract(member, path=extract_dir)

	# 3. Verify that config.json is at ./model/config.json
	config_path = os.path.join(extract_dir, "config.json")
	if not os.path.isfile(config_path):
	raise FileNotFoundError(f"config.json not found in {extract_dir}")

	# 4. Load tokenizer and model straight from './model'
	tokenizer = AutoTokenizer.from_pretrained(extract_dir)
	model = AutoModelForCausalLM.from_pretrained(
	extract_dir,
	torch_dtype="auto",
	device_map="auto",
	low_cpu_mem_usage=True,
	)

	# 5. Define chat function using OpenAI-style messages
	def chat_fn(prompt, history):
	history = history or []
	history.append({"role": "user", "content": prompt})

	# Tokenize and send to device
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	input_length = inputs["input_ids"].shape[1]

	# Generate full-sequence tokens (prompt + continuation)
	outputs = model.generate(**inputs, max_new_tokens=100)

	# Remove the prompt tokens from the start
	generated_ids = outputs[0][input_length:]
	response = tokenizer.decode(generated_ids, skip_special_tokens=True)

	history.append({"role": "assistant", "content": response})
	return history, ""


	# 6. Build Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("## 🌐 Indus 3.0 Hindi LLM Demo")
	chat = gr.Chatbot(type="messages")
	msg = gr.Textbox(placeholder="Type here…")
	clear = gr.Button("Clear")
	msg.submit(chat_fn, [msg, chat], [chat, msg])
	clear.click(lambda: ([], ""), None, [chat, msg])

	# 7. Launch without `share=True` on Spaces
	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False)