AboyNight commited on
Commit
4dba0f7
·
verified ·
1 Parent(s): 3e21e27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -13
app.py CHANGED
@@ -1,29 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
- # Téléchargement de ton modèle
6
- model_path = hf_hub_download(
7
- repo_id="AboyNight/KryZen",
8
- filename="Dolphin3.0-Llama3.1-8B-Q5_K_M.gguf"
9
- )
10
 
11
- # Chargement du modèle
12
- llm = Llama(model_path=model_path, n_ctx=2048, n_threads=4)
 
 
 
 
 
 
 
13
 
14
  def generate(message, history):
15
- # Format spécial pour Dolphin 3.0 (ChatML)
16
  prompt = f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
17
 
18
  output = llm(
19
- prompt,
20
- max_tokens=512,
21
- stop=["<|im_end|>", "<|im_start|>"],
22
  echo=False
23
  )
24
  return output["choices"][0]["text"]
25
 
26
- demo = gr.ChatInterface(fn=generate, title="KryZen AI (Dolphin 3.0)")
 
 
 
 
 
27
 
28
  if __name__ == "__main__":
29
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ import os
2
+ import subprocess
3
+ import sys
4
+
5
+ # Forcer l'installation de llama-cpp-python pré-compilé avant de lancer le reste
6
+ try:
7
+ import llama_cpp
8
+ except ImportError:
9
+ print("Installation de llama-cpp-python...")
10
+ subprocess.check_call([
11
+ sys.executable, "-m", "pip", "install",
12
+ "llama-cpp-python",
13
+ "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
14
+ ])
15
+ import llama_cpp
16
+
17
  import gradio as gr
 
18
  from huggingface_hub import hf_hub_download
19
 
20
+ # Configuration du modèle
21
+ REPO_ID = "AboyNight/KryZen"
22
+ FILENAME = "Dolphin3.0-Llama3.1-8B-Q5_K_M.gguf"
 
 
23
 
24
+ print(f"Téléchargement du modèle {FILENAME}...")
25
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
26
+
27
+ print("Chargement du modèle en mémoire...")
28
+ llm = llama_cpp.Llama(
29
+ model_path=model_path,
30
+ n_ctx=2048,
31
+ n_threads=4 # Adapté au CPU gratuit de HF
32
+ )
33
 
34
  def generate(message, history):
35
+ # Format ChatML pour Dolphin 3.0
36
  prompt = f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
37
 
38
  output = llm(
39
+ prompt,
40
+ max_tokens=512,
41
+ stop=["<|im_end|>", "<|im_start|>"],
42
  echo=False
43
  )
44
  return output["choices"][0]["text"]
45
 
46
+ # Interface Gradio
47
+ demo = gr.ChatInterface(
48
+ fn=generate,
49
+ title="KryZen AI (Dolphin 3.0)",
50
+ description="Hébergé gratuitement sur HuggingFace Spaces"
51
+ )
52
 
53
  if __name__ == "__main__":
54
  demo.launch(server_name="0.0.0.0", server_port=7860)