| import streamlit as st |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer |
| from threading import Thread |
| import torch |
|
|
| |
| st.set_page_config(page_title="HiperAI Ultra Pro", page_icon="🏎️") |
|
|
| |
| st.markdown(""" |
| <style> |
| .stChatMessage { background-color: #1e2129 !important; border-radius: 10px; padding: 10px; margin-bottom: 5px; } |
| .stChatInput { border-radius: 20px; } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| st.title("🏎️ HiperAI Ultra Speed") |
|
|
| |
| @st.cache_resource |
| def load_optimized_model(): |
| model_id = "Qwen/Qwen2.5-1.5B-Instruct" |
| |
| |
| tokenizer = AutoTokenizer.from_pretrained(model_id) |
| |
| |
| model = AutoModelForCausalLM.from_pretrained( |
| model_id, |
| torch_dtype=torch.float32, |
| low_cpu_mem_usage=True, |
| device_map="cpu" |
| ) |
|
|
| |
| try: |
| from optimum.bettertransformer import BetterTransformer |
| model = BetterTransformer.transform(model) |
| st.sidebar.success("🚀 Optimum Speedup: ON") |
| except Exception: |
| st.sidebar.info("Optimum: Normal Mode") |
| |
| return tokenizer, model |
|
|
| with st.spinner("Прогрев нейросети..."): |
| tokenizer, model = load_optimized_model() |
|
|
| |
| if "messages" not in st.session_state: |
| st.session_state.messages = [] |
|
|
| |
| with st.sidebar: |
| st.title("⚙️ Настройки") |
| if st.button("🗑️ Очистить историю"): |
| st.session_state.messages = [] |
| st.rerun() |
|
|
| |
| for message in st.session_state.messages[-10:]: |
| with st.chat_message(message["role"]): |
| st.markdown(message["content"]) |
|
|
| |
| if prompt := st.chat_input("Спроси HiperAI..."): |
| st.session_state.messages.append({"role": "user", "content": prompt}) |
| with st.chat_message("user"): |
| st.markdown(prompt) |
|
|
| with st.chat_message("assistant"): |
| |
| history = [{"role": "system", "content": "Ты HiperAI, отвечаешь быстро и на русском."}] |
| history += st.session_state.messages[-5:] |
| |
| inputs = tokenizer.apply_chat_template( |
| history, |
| add_generation_prompt=True, |
| return_tensors="pt" |
| ).to(model.device) |
|
|
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) |
| |
| |
| generation_kwargs = dict( |
| input_ids=inputs, |
| streamer=streamer, |
| max_new_tokens=512, |
| do_sample=True, |
| temperature=0.7, |
| use_cache=True |
| ) |
|
|
| thread = Thread(target=model.generate, kwargs=generation_kwargs) |
| thread.start() |
|
|
| def stream_output(): |
| full_response = "" |
| for new_text in streamer: |
| full_response += new_text |
| yield new_text |
| st.session_state.messages.append({"role": "assistant", "content": full_response}) |
|
|
| st.write_stream(stream_output) |
| |