import streamlit as st from llama_cpp import Llama from huggingface_hub import hf_hub_download st.set_page_config(page_title="Neuron-14B Chat") st.title("🧠 Neuron-14B GGUF Tester") # 1. Model Loading (Caching taaki baar-baar load na ho) @st.cache_resource def load_model(): with st.spinner("📦 Model download ho raha hai..."): model_path = hf_hub_download( repo_id="Anandnrnnffn/Neuron-14B-GGUF", filename="Neuron-14B-Q6_K.gguf" ) return Llama(model_path=model_path, n_ctx=2048) llm = load_model() # 2. Chat UI if "messages" not in st.session_state: st.session_state.messages = [] for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) if prompt := st.chat_input("Kuch pucho..."): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): response = llm(f"Q: {prompt} \nA:", max_tokens=150, stop=["Q:"]) answer = response['choices'][0]['text'] st.markdown(answer) st.session_state.messages.append({"role": "assistant", "content": answer})