THEZYZSTUDIO commited on
Commit
65dc97e
·
verified ·
1 Parent(s): 8daf7ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -4,8 +4,8 @@ from llama_cpp import Llama
4
  import base64
5
  import json
6
 
7
- REPO_ID = "bartowski/Nanbeige_Nanbeige4-3B-Thinking-2511-GGUF"
8
- FILENAME = "Nanbeige_Nanbeige4-3B-Thinking-2511-bf16.gguf"
9
 
10
  print("🚀 Starting THE Z AI Server...", flush=True)
11
 
@@ -16,7 +16,7 @@ def load_model():
16
  try:
17
  print("📥 Downloading Model...", flush=True)
18
  model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
19
- llm = Llama(model_path=model_path, n_ctx=80000, n_threads=2, verbose=False)
20
  print("✅ Model Ready!", flush=True)
21
  return True
22
  except Exception as e:
@@ -49,7 +49,7 @@ def chat(message, history_json, system_info, file_content):
49
  else:
50
  messages.append({
51
  "role": "system",
52
- "content": "First, your name is THE Z AI THINKER and the name of the company that created you is THE ZYZ STUDIO When you think, you must think in the user's language, and the answer must also be in the user's language. Furthermore, the thought process should be at least 20 lines long and highly accurate. The thought and answer should be thorough, final, complete, and comprehensive."
53
  })
54
 
55
  for h in history:
@@ -64,8 +64,10 @@ def chat(message, history_json, system_info, file_content):
64
 
65
  response_obj = llm.create_chat_completion(
66
  messages=messages,
67
- max_tokens=7000,
68
- temperature=0.7
 
 
69
  )
70
 
71
  response = response_obj['choices'][0]['message']['content'].strip()
 
4
  import base64
5
  import json
6
 
7
+ REPO_ID = "prithivMLmods/OpenRHO-2B-Thinker-GGUF"
8
+ FILENAME = "OpenRHO-2B-Thinker.F32.gguf"
9
 
10
  print("🚀 Starting THE Z AI Server...", flush=True)
11
 
 
16
  try:
17
  print("📥 Downloading Model...", flush=True)
18
  model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
19
+ llm = Llama(model_path=model_path, n_ctx=150000, n_threads=4, verbose=False)
20
  print("✅ Model Ready!", flush=True)
21
  return True
22
  except Exception as e:
 
49
  else:
50
  messages.append({
51
  "role": "system",
52
+ "content": "First, your name is THE Z AI THINKER and the name of the company that created you is THE ZYZ STUDIO When you think, you must think in the user's language, and the answer must also be in the user's language. Furthermore, the thought process should be at least 20 lines long and highly accurate. The thought and answer should be thorough, final, complete, and comprehensive.Do not repeat the same word or phrase more than once. Text should be clear and free of repetition. Try not to make mistakes in your answers, and mentally write the answer before presenting it to the user. Ensure it is not incorrect or contains unnecessary repetition that could damage the sentence or the message you are conveying to the user. If it is Corden, for example, you can repeat something because it is important, but do so correctly and not carelessly. Something very, very important: you are a specialist in thinking, so try to think a lot and understand every word the user said to you."
53
  })
54
 
55
  for h in history:
 
64
 
65
  response_obj = llm.create_chat_completion(
66
  messages=messages,
67
+ max_tokens=15000,
68
+ temperature=0.7,
69
+ top_k=40,
70
+ top_p=0.95
71
  )
72
 
73
  response = response_obj['choices'][0]['message']['content'].strip()