| import streamlit as st |
| from llama_cpp import Llama |
|
|
|
|
| repo_ir = "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF" |
| llm = Llama.from_pretrained( |
| repo_id=repo_ir, |
| filename="qwen2.5-coder-1.5b-instruct-q8_0.gguf", |
| verbose=True, |
| use_mmap=True, |
| use_mlock=True, |
| n_threads=4, |
| n_threads_batch=4, |
| n_ctx=8000, |
| ) |
| print(f"{repo_ir} loaded successfully. ✅") |
|
|
|
|
| |
| def response_generator(messages): |
| completion = llm.create_chat_completion( |
| messages, max_tokens=2048, stream=True, temperature=0.7, top_p=0.95 |
| ) |
|
|
| for message in completion: |
| delta = message["choices"][0]["delta"] |
| if "content" in delta: |
| yield delta["content"] |
|
|
|
|
| st.title("CSV TO SQL") |
|
|
| |
| if "messages" not in st.session_state: |
| st.session_state.messages = [] |
|
|
| |
| for message in st.session_state.messages: |
| with st.chat_message(message["role"]): |
| st.markdown(message["content"]) |
|
|
| |
| if prompt := st.chat_input("What is up?"): |
| |
| st.session_state.messages.append({"role": "user", "content": prompt}) |
| |
| with st.chat_message("user"): |
| st.markdown(prompt) |
|
|
| messages = [{"role": "system", "content": "You are a helpful assistant"}] |
|
|
| for val in st.session_state.messages: |
| messages.append(val) |
|
|
| messages.append({"role": "user", "content": prompt}) |
| |
| with st.chat_message("assistant"): |
| response = st.write_stream(response_generator(messages=messages)) |
| |
| st.session_state.messages.append({"role": "assistant", "content": response}) |
|
|