spanofzero commited on
Commit
059a6e6
·
verified ·
1 Parent(s): 685d841

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -27
app.py CHANGED
@@ -1,43 +1,35 @@
1
  import gradio as gr
2
- from openai import OpenAI
3
  import os
4
 
5
- # The "Kernel" Configuration
6
- # Get your API key from Moonshot AI or OpenRouter
7
- API_KEY = os.getenv("KIMI_API_KEY")
8
- BASE_URL = "https://api.moonshot.cn/v1" # Or your custom endpoint
9
 
10
- client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
 
11
 
12
  def samaran_kernel_chat(message, history):
13
- # 1. Prepare the Conversation Context
14
- messages = [{"role": "system", "content": "You are the Samaran Kernel. Use <think> tags for deep reasoning. Be witty, technical, and precise."}]
 
 
15
  for user_msg, ai_msg in history:
16
- messages.append({"role": "user", "content": user_msg})
17
- messages.append({"role": "assistant", "content": ai_msg})
18
- messages.append({"role": "user", "content": message})
19
 
20
- # 2. Call the Kimi K2 Engine
21
- response = client.chat.completions.create(
22
- model="moonshot-v1-32k", # Replace with k2-thinking if using private endpoint
23
- messages=messages,
24
- stream=True,
25
- temperature=0.6 # Recommended for Kimi K2 stability
26
- )
27
 
28
  partial_message = ""
29
- for chunk in response:
30
- if chunk.choices[0].delta.content:
31
- partial_message += chunk.choices[0].delta.content
32
- yield partial_message
33
 
34
- # 3. The T3-Style UI Interface
35
  view = gr.ChatInterface(
36
  fn=samaran_kernel_chat,
37
- title="Samaran Kernel T3 Chat",
38
- description="Running on Kimi K2 Engine. Optimized for Deep Reasoning and Agentic Logic.",
39
- theme="soft",
40
- examples=["Explain the MoE architecture of Kimi K2.", "Draft a technical pitch for a new AI SaaS."]
41
  )
42
 
43
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
  import os
4
 
5
+ # Uses the Hugging Face token you just saved as a secret
6
+ HF_TOKEN = os.getenv("KIMI_API_KEY")
 
 
7
 
8
+ # We'll use a powerful open-source model hosted by Hugging Face
9
+ client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=HF_TOKEN)
10
 
11
  def samaran_kernel_chat(message, history):
12
+ system_message = "You are the Samaran Kernel. You are a privacy-first AI. Use deep reasoning and be witty."
13
+
14
+ # Build the prompt
15
+ prompt = f"<|system|>\n{system_message}</s>\n"
16
  for user_msg, ai_msg in history:
17
+ prompt += f"<|user|>\n{user_msg}</s>\n<|assistant|>\n{ai_msg}</s>\n"
18
+ prompt += f"<|user|>\n{message}</s>\n<|assistant|>\n"
 
19
 
20
+ # Generate response
21
+ response = client.text_generation(prompt, max_new_tokens=512, stream=True)
 
 
 
 
 
22
 
23
  partial_message = ""
24
+ for token in response:
25
+ partial_message += token
26
+ yield partial_message
 
27
 
 
28
  view = gr.ChatInterface(
29
  fn=samaran_kernel_chat,
30
+ title="Samaran Kernel (Privacy-First)",
31
+ description="Running on Open-Source Llama-3 via Hugging Face. No 3rd-party corporate data sharing.",
32
+ theme="soft"
 
33
  )
34
 
35
  if __name__ == "__main__":