| import streamlit as st |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
| import torch |
|
|
| |
| st.set_page_config( |
| page_title="Qwen2.5-Coder Chat", |
| page_icon="💬", |
| layout="wide", |
| ) |
|
|
| |
| st.title("💬 Qwen2.5-Coder Chat Interface") |
|
|
| |
| if 'messages' not in st.session_state: |
| st.session_state['messages'] = [] |
|
|
| |
| @st.cache_resource |
| def load_model(): |
| model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| torch_dtype=torch.float16, |
| device_map='auto' |
| ) |
| return tokenizer, model |
|
|
| |
| with st.spinner("Loading model... This may take a while..."): |
| tokenizer, model = load_model() |
|
|
| |
| def generate_response(prompt, max_tokens=2048): |
| inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device) |
| |
| |
| with torch.no_grad(): |
| outputs = model.generate( |
| inputs, |
| max_length=max_tokens, |
| temperature=0.7, |
| top_p=0.9, |
| do_sample=True, |
| num_return_sequences=1 |
| ) |
| |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| |
| response = response[len(prompt):].strip() |
| return response |
|
|
| |
| chat_col, sidebar_col = st.columns([4, 1]) |
|
|
| with chat_col: |
| |
| for message in st.session_state['messages']: |
| if message['role'] == 'user': |
| st.markdown(f"**You:** {message['content']}") |
| else: |
| st.markdown(f"**Qwen2.5-Coder:** {message['content']}") |
|
|
| |
| with st.form(key='chat_form', clear_on_submit=True): |
| user_input = st.text_area("You:", height=100) |
| submit_button = st.form_submit_button(label='Send') |
|
|
| if submit_button and user_input: |
| |
| st.session_state['messages'].append({'role': 'user', 'content': user_input}) |
| |
| |
| with st.spinner("Qwen2.5-Coder is typing..."): |
| response = generate_response(user_input, max_tokens=2048) |
| st.session_state['messages'].append({'role': 'assistant', 'content': response}) |
|
|
| |
| st.experimental_rerun() |
|
|
| with sidebar_col: |
| st.sidebar.header("Settings") |
| max_tokens = st.sidebar.slider( |
| "Maximum Tokens", |
| min_value=512, |
| max_value=4096, |
| value=2048, |
| step=256, |
| help="Set the maximum number of tokens for the model's response." |
| ) |
| |
| temperature = st.sidebar.slider( |
| "Temperature", |
| min_value=0.1, |
| max_value=1.0, |
| value=0.7, |
| step=0.1, |
| help="Controls the randomness of the model's output." |
| ) |
| |
| top_p = st.sidebar.slider( |
| "Top-p (Nucleus Sampling)", |
| min_value=0.1, |
| max_value=1.0, |
| value=0.9, |
| step=0.1, |
| help="Controls the diversity of the model's output." |
| ) |
|
|
| if st.sidebar.button("Clear Chat"): |
| st.session_state['messages'] = [] |
| st.experimental_rerun() |
|
|
| |
| def generate_response(prompt): |
| inputs = tokenizer.encode(prompt, return_tensors='pt').to(model.device) |
| |
| |
| with torch.no_grad(): |
| outputs = model.generate( |
| inputs, |
| max_length=max_tokens, |
| temperature=temperature, |
| top_p=top_p, |
| do_sample=True, |
| num_return_sequences=1 |
| ) |
| |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| |
| response = response[len(prompt):].strip() |
| return response |
|
|