Spaces:

richardcsuwandi
/

javanese-knowledge-assistant

Sleeping

File size: 1,591 Bytes

58b771c
35d9f90
4eef3cc
 
58b771c
21fd9aa
35d9f90
6aefc3e
58b771c
4eef3cc
 
 
35d9f90
 
 
58b771c
 
 
 
 
 
 
 
 
21fd9aa
35d9f90
 
 
 
 
 
 
 
e0b6303
 
35d9f90
 
 
e0b6303
 
35d9f90
 
58b771c
 
 
 
ac35195
e0b6303
58b771c
 
 
 
e0b6303

import gradio as gr
import torch
from transformers import AutoTokenizer
from peft import AutoPeftModelForCausalLM

# Load the fine-tuned model and tokenizer
model_name = "richardcsuwandi/llama2-javanese"
model = AutoPeftModelForCausalLM.from_pretrained(model_name, device_map='cpu', offload_folder='./', torch_dtype=torch.bfloat16)

# Merge adapter with base
model = model.merge_and_unload()
model.eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token_id = 0
tokenizer.padding_side = "left"

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # Format the input text
    input_text = f"<s>[INST] <<SYS>> {system_message} <</SYS>> {message} [/INST]"
    
    # Tokenize the input text
    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

    # Generate response
    output_sequences = model.generate(
        input_ids=inputs['input_ids'],
        max_length=max_tokens,
        repetition_penalty=1.2
    )
    
    # Decode the generated response
    input_length = inputs['input_ids'].shape[1]
    generated_text = tokenizer.decode(output_sequences[0][input_length:], skip_special_tokens=True)
    
    return generated_text

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="Sampeyan minangka chatbot umum sing tansah mangsuli nganggo basa Jawa.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
    ],
)

if __name__ == "__main__":
    demo.launch(share=True)