| import gradio as gr |
| from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig |
| import torch |
|
|
| model_name = "lingadevaruhp/thoshan_Flash" |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
| |
| bnb_config = BitsAndBytesConfig( |
| load_in_4bit=True, |
| bnb_4bit_compute_dtype=torch.float16, |
| ) |
|
|
| model = AutoModelForCausalLM.from_pretrained( |
| "unsloth/gemma-2-9b-it-bnb-4bit", |
| quantization_config=bnb_config, |
| device_map="auto" |
| ) |
|
|
| |
| from peft import PeftModel |
| model = PeftModel.from_pretrained(model, model_name) |
| model.eval() |
|
|
| def chat(prompt, history): |
| input_text = f"<s>### Instruction:\n{prompt}\n### Response:\n" |
| inputs = tokenizer(input_text, return_tensors="pt").to(model.device) |
| with torch.no_grad(): |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=200, |
| do_sample=True, |
| temperature=0.8, |
| eos_token_id=tokenizer.eos_token_id |
| ) |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| return response.split("### Response:")[-1].strip() |
|
|
| iface = gr.ChatInterface( |
| fn=chat, |
| title="thoshan_Flash 🔥", |
| description="Kannada-English FlirtAI — Chat in Kanglish!", |
| examples=["Hey, yeno madtha idiya?", "Ninna hesarenu helu", "What's your plan tonight?"] |
| ) |
|
|
| iface.launch() |