File size: 1,974 Bytes
2adf16b
 
 
70b12e1
2adf16b
 
b984098
 
2adf16b
 
 
 
f8079bf
2adf16b
d22949d
58817e7
2adf16b
 
8e47ba7
d22949d
58817e7
 
 
 
 
 
 
 
 
70b12e1
58817e7
 
 
 
 
 
 
3047a3d
58817e7
2adf16b
d22949d
 
58817e7
d22949d
 
 
2adf16b
d22949d
 
 
58817e7
 
 
d22949d
70b12e1
d22949d
58817e7
 
2adf16b
8e47ba7
 
58817e7
8e47ba7
2adf16b
30fd225
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import traceback

model_path = hf_hub_download(
    repo_id="AIencoder/Axon26-Coder-Q8_0-GGUF",
    filename="axon26-coder-q8_0.gguf"
)

llm = Llama(
    model_path=model_path,
    n_ctx=4096,
    n_threads=2,
    chat_format="chatml",
    verbose=False
)

def chat(message, history):
    try:
        messages = [{"role": "system", "content": "You are a helpful assistant good at coding and general knowledge."}]

        for item in history:

            if isinstance(item, dict) and "role" in item and "content" in item:
                messages.append({"role": item["role"], "content": str(item["content"])})

            elif hasattr(item, "role") and hasattr(item, "content"):
                messages.append({"role": item.role, "content": str(item.content)})

            elif isinstance(item, (list, tuple)) and len(item) >= 2:
                user_msg = item[0]
                assistant_msg = item[1]
                if user_msg:
                    messages.append({"role": "user", "content": str(user_msg)})
                if assistant_msg:
                    messages.append({"role": "assistant", "content": str(assistant_msg)})

        messages.append({"role": "user", "content": str(message)})

        response = llm.create_chat_completion(
            messages=messages,
            max_tokens=512,
            temperature=0.7,
            stream=True
        )

        text = ""
        for chunk in response:
            delta = chunk["choices"][0]["delta"]
            content = delta.get("content", "")
            if content:
                text += content
                yield text

    except Exception as e:
        error_msg = traceback.format_exc()
        yield f"**CRASH REPORT:**\n```python\n{error_msg}\n```"

demo = gr.ChatInterface(
    fn=chat,
    title="T.C.S AI Bot"
)

demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)