| | import os |
| | import streamlit as st |
| | import torch |
| | from transformers import AutoTokenizer, AutoModelForCausalLM |
| |
|
| | |
| | os.environ["TRANSFORMERS_CACHE"] = "./hf_cache" |
| | os.environ["HF_HOME"] = "./hf_cache" |
| |
|
| | st.title("🤖 Fine-tuned Qwen3 Chatbot") |
| |
|
| | |
| | BASE_MODEL = "unsloth/Qwen3-4B-Instruct-2507" |
| | FINE_TUNED = "phuphan1310/Fine-tuned-model-test" |
| |
|
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| |
|
| | @st.cache_resource(show_spinner=True) |
| | def load_model(): |
| | |
| | tokenizer = AutoTokenizer.from_pretrained( |
| | BASE_MODEL, |
| | trust_remote_code=True |
| | ) |
| | model = AutoModelForCausalLM.from_pretrained( |
| | FINE_TUNED, |
| | trust_remote_code=True, |
| | torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
| | device_map="auto" |
| | ) |
| | return tokenizer, model |
| |
|
| | tokenizer, model = load_model() |
| |
|
| | def generate_response(prompt): |
| | inputs = tokenizer(prompt, return_tensors="pt").to(device) |
| | outputs = model.generate( |
| | **inputs, |
| | max_new_tokens=200, |
| | temperature=0.7, |
| | top_p=0.9, |
| | do_sample=True |
| | ) |
| | return tokenizer.decode(outputs[0], skip_special_tokens=True) |
| |
|
| | if "messages" not in st.session_state: |
| | st.session_state.messages = [] |
| |
|
| | user_input = st.text_input("Enter your message:") |
| | if user_input: |
| | st.session_state.messages.append({"role": "user", "content": user_input}) |
| | response = generate_response(user_input) |
| | st.session_state.messages.append({"role": "assistant", "content": response}) |
| |
|
| | for msg in st.session_state.messages: |
| | if msg["role"] == "user": |
| | st.markdown(f"**You:** {msg['content']}") |
| | else: |
| | st.markdown(f"**Bot:** {msg['content']}") |
| |
|