File size: 1,461 Bytes
b0a826f
 
 
 
0710173
b0a826f
0710173
b0a826f
 
 
0710173
 
134ca8e
0710173
 
 
 
134ca8e
b0a826f
0710173
 
 
b0a826f
0710173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0a826f
 
 
134ca8e
0710173
 
134ca8e
0710173
b0a826f
 
0710173
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

# 📥 Model Download
model_path = hf_hub_download(
    repo_id="RockSky1/Infinity_1.0",
    filename="Infinity_1.0.gguf"
)

print("Loading model...")

llm = Llama(
    model_path=model_path,
    n_ctx=512,
    n_threads=2,
    n_batch=128
)

print("Model loaded ✅")


def chat_function(message, history):
    try:
        output = llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": "You are Infinity AI 🔥 created by Shivam Kumar (RockSky1) from Bihar, India."
                },
                *[
                    {"role": "user", "content": h[0]} if i % 2 == 0
                    else {"role": "assistant", "content": h[1]}
                    for i, h in enumerate(history)
                ],
                {"role": "user", "content": message}
            ],
            max_tokens=128,
            temperature=0.7
        )

        reply = output["choices"][0]["message"]["content"]

    except Exception as e:
        reply = f"Error: {str(e)}"

    return reply


with gr.Blocks() as demo:
    gr.Markdown("# ♾️ Infinity-LLM v1.0 🚀")

    gr.ChatInterface(
        fn=chat_function,
        examples=[
            "Who is Shivam Kumar?",
            "Tell me about Infinity-LLM.",
            "Write a simple Python script."
        ]
    )

demo.launch()