Files changed (1) hide show
  1. app.py +122 -4
app.py CHANGED
@@ -26,9 +26,127 @@ zero = torch.Tensor([0]).cuda()
26
  print(zero.device) # <-- 'cpu' πŸ€”
27
 
28
  @spaces.GPU(duration=120)
29
- def greet(n):
30
- print(zero.device) # <-- 'cuda:0' πŸ€—
31
- return f"Hello {zero + n} Tensor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
34
  demo.launch()
 
26
  print(zero.device) # <-- 'cpu' πŸ€”
27
 
28
  @spaces.GPU(duration=120)
29
+ def respond(
30
+ message,
31
+ history: list[tuple[str, str]],
32
+ model,
33
+ max_tokens,
34
+ temperature,
35
+ top_p,
36
+ top_k,
37
+ repeat_penalty,
38
+ ):
39
+ global llm
40
+ global llm_model
41
+
42
+ if llm is None or llm_model != model:
43
+ llm = Llama(
44
+ model_path=f"models/{model}",
45
+ flash_attn=True,
46
+ n_gpu_layers=81,
47
+ n_batch=1024,
48
+ n_ctx=8192,
49
+ )
50
+ llm_model=model
51
+ provider = LlamaCppPythonProvider(llm)
52
+
53
+ agent = LlamaCppAgent(
54
+ provider,
55
+ system_prompt="You are Dolphin, an AI assistant that helps humanity, trained to specialize in reasoning and first-principles analysis. When responding, always format your replies using <think>{reasoning}</think>{answer}. Use at least 6 reasoning steps and perform a root cause analysis before answering. However, if the answer is very easy and requires little thought, you may leave the <think></think> block empty. Your responses should be detailed, structured with rich Markdown formatting, and engaging with emojis. Be extensive in your explanations, just as the greatest scientific minds would be. Always reason through the problem first, unless it's trivial, in which case you may answer directly.",
56
+ predefined_messages_formatter_type=MessagesFormatterType.CHATML,
57
+ debug_output=True
58
+ )
59
+
60
+ settings = provider.get_provider_default_settings()
61
+ settings.temperature = temperature
62
+ settings.top_k = top_k
63
+ settings.top_p = top_p
64
+ settings.max_tokens = max_tokens
65
+ settings.repeat_penalty = repeat_penalty
66
+ settings.stream = True
67
+
68
+ messages = BasicChatHistory()
69
+
70
+ for msn in history:
71
+ user = {
72
+ 'role': Roles.user,
73
+ 'content': msn[0]
74
+ }
75
+ assistant = {
76
+ 'role': Roles.assistant,
77
+ 'content': msn[1]
78
+ }
79
+ messages.add_message(user)
80
+ messages.add_message(assistant)
81
+
82
+ stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False)
83
+
84
+ outputs = ""
85
+ for output in stream:
86
+ outputs += output
87
+ yield outputs
88
+
89
+ # def greet(n):
90
+ # print(zero.device) # <-- 'cuda:0' πŸ€—
91
+ # return f"Hello {zero + n} Tensor"
92
+
93
+ # demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
94
+
95
+
96
+ demo = gr.ChatInterface(
97
+ respond,
98
+ additional_inputs=[
99
+ gr.Dropdown([
100
+ 'cognitivecomputations_Dolphin3.0-R1-Mistral-24B-Q8_0.gguf',
101
+ 'qwen2-Q3_K_M.gguf'
102
+ ], value="cognitivecomputations_Dolphin3.0-R1-Mistral-24B-Q8_0.gguf", label="Model"),
103
+ gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max tokens"),
104
+ gr.Slider(minimum=0.05, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
105
+ gr.Slider(
106
+ minimum=0.1,
107
+ maximum=1.0,
108
+ value=0.95,
109
+ step=0.05,
110
+ label="Top-p",
111
+ ),
112
+ gr.Slider(
113
+ minimum=0,
114
+ maximum=100,
115
+ value=40,
116
+ step=1,
117
+ label="Top-k",
118
+ ),
119
+ gr.Slider(
120
+ minimum=0.0,
121
+ maximum=2.0,
122
+ value=1.1,
123
+ step=0.1,
124
+ label="Repetition penalty",
125
+ ),
126
+ ],
127
+ theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
128
+ body_background_fill_dark="#0f172a",
129
+ block_background_fill_dark="#0f172a",
130
+ block_border_width="1px",
131
+ block_title_background_fill_dark="#070d1b",
132
+ input_background_fill_dark="#0c1425",
133
+ button_secondary_background_fill_dark="#070d1b",
134
+ border_color_accent_dark="#21293b",
135
+ border_color_primary_dark="#21293b",
136
+ background_fill_secondary_dark="#0f172a",
137
+ color_accent_soft_dark="transparent"
138
+ ),
139
+ css=css,
140
+ retry_btn="Retry",
141
+ undo_btn="Undo",
142
+ clear_btn="Clear",
143
+ submit_btn="Send",
144
+ description="Cognitive Computation: Chat Dolphin 🐬",
145
+ chatbot=gr.Chatbot(
146
+ scale=1,
147
+ placeholder=PLACEHOLDER,
148
+ show_copy_button=True
149
+ )
150
+ )
151
 
 
152
  demo.launch()