Fu01978 commited on
Commit
b53b3ec
·
verified ·
1 Parent(s): c392854

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -92
app.py CHANGED
@@ -1,111 +1,153 @@
1
  import os
 
 
 
2
  import requests
3
  from huggingface_hub import hf_hub_download, hf_hub_url
4
  from llama_cpp import Llama
5
  import gradio as gr
6
- import tempfile
7
 
8
- # -------------------------
9
- # Config: change if you want
10
- # -------------------------
11
  REPO_ID = "mradermacher/EuroLLM-1.7B-Instruct-GGUF"
12
  FILENAME = "EuroLLM-1.7B-Instruct.Q8_0.gguf"
13
  SYSTEM_PROMPT = "You are a helpful assistant. Answer concisely and helpfully."
14
-
15
- # local path we'll store the model
16
  MODEL_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "models")
17
  os.makedirs(MODEL_DIR, exist_ok=True)
18
- MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)
 
19
 
20
- # -------------------------
21
- # Helper: robust download
22
- # -------------------------
23
- def download_from_hf(repo_id: str, filename: str, dest: str) -> str:
24
- """Download using huggingface_hub if possible; fallback to direct url via requests."""
 
 
25
  if os.path.exists(dest) and os.path.getsize(dest) > 0:
26
- print(f"Model already exists at {dest}")
27
  return dest
28
 
29
- try:
30
- print("Trying hf_hub_download...")
31
- path = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=MODEL_DIR)
32
- # hf_hub_download may return a cache path; copy/move to dest if needed
33
- if os.path.abspath(path) != os.path.abspath(dest):
34
- # move the cached file into our models folder with the expected name
35
- os.replace(path, dest)
36
- path = dest
37
- print("Downloaded via hf_hub_download:", path)
38
- return path
39
- except Exception as e:
40
- print("hf_hub_download failed:", e)
41
- # fallback: construct the direct URL and download via requests
42
  try:
43
- print("Falling back to direct URL via requests...")
44
- url = hf_hub_url(repo_id=repo_id, filename=filename)
45
- # url is the Hub URL (signed? but usually works for public repos)
46
- # If user provided direct URL with ?download=true, you can paste that directly.
47
- print("Downloading from:", url)
48
- with requests.get(url, stream=True, timeout=60) as r:
49
- r.raise_for_status()
50
- with open(dest, "wb") as f:
51
- for chunk in r.iter_content(chunk_size=8192):
52
- if chunk:
53
- f.write(chunk)
54
- print("Downloaded fallback to:", dest)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  return dest
56
- except Exception as e2:
57
- raise RuntimeError(f"Both hf_hub_download and direct download failed: {e2}")
58
-
59
- # -------------------------
60
- # Ensure model is present
61
- # -------------------------
62
- model_path = download_from_hf(REPO_ID, FILENAME, MODEL_PATH)
63
-
64
- # -------------------------
65
- # Load the model (llama-cpp-python)
66
- # -------------------------
67
- llm = Llama(
68
- model_path=model_path,
69
- n_ctx=2048, # lower if you need less memory
70
- n_threads=4,
71
- n_gpu_layers=0, # CPU-only. If you have GPU layers available, adjust.
72
- # stream is set per-call in create_chat_completion below.
73
- )
74
-
75
- # -------------------------
76
- # Chat formatting helpers
77
- # -------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def build_messages(history, user_message, system_prompt=SYSTEM_PROMPT):
79
- """
80
- Convert history (list of [user, assistant]) into chat messages format expected by create_chat_completion.
81
- Then append the current user_message at the end.
82
- """
83
  messages = []
84
  if system_prompt:
85
  messages.append({"role": "system", "content": system_prompt})
86
-
87
- # history is list of [user, assistant] pairs
88
  for user_msg, assistant_msg in history:
89
  messages.append({"role": "user", "content": user_msg})
90
- if assistant_msg is not None and assistant_msg != "":
91
  messages.append({"role": "assistant", "content": assistant_msg})
92
-
93
- # now add current user message
94
  messages.append({"role": "user", "content": user_message})
95
  return messages
96
 
97
- # -------------------------
98
- # Streaming generator for Gradio
99
- # -------------------------
100
  def chat_fn(user_message, history):
101
- """
102
- Gradio ChatInterface expects either a single return (reply string) or a generator that yields partial strings.
103
- We'll stream partial assistant text as it arrives from llama-cpp-python create_chat_completion(..., stream=True).
104
- """
105
- # history is list of [user, assistant] pairs from Gradio
106
  messages = build_messages(history or [], user_message)
107
-
108
- # create_chat_completion returns an iterator when stream=True
109
  stream = llm.create_chat_completion(
110
  messages=messages,
111
  max_tokens=512,
@@ -113,11 +155,8 @@ def chat_fn(user_message, history):
113
  top_p=0.95,
114
  stream=True
115
  )
116
-
117
- # accumulate incremental content and yield progressive replies
118
  partial = ""
119
  for chunk in stream:
120
- # chunk structure: {"id":..., "object":"chat.completion.chunk", "choices":[{"delta":{"content": "..."}}, ...]}
121
  try:
122
  if "choices" in chunk and len(chunk["choices"]) > 0:
123
  delta = chunk["choices"][0].get("delta", {})
@@ -125,17 +164,8 @@ def chat_fn(user_message, history):
125
  partial += delta["content"]
126
  yield partial
127
  except Exception:
128
- # ignore malformed chunk and continue
129
  continue
130
 
131
- # -------------------------
132
- # Launch Gradio
133
- # -------------------------
134
- demo = gr.ChatInterface(
135
- fn=chat_fn,
136
- title="EuroLLM 1.7B (GGUF) — streaming chat",
137
- description="Model: mradermacher/EuroLLM-1.7B-Instruct (Q8_0). System prompt enabled. Streaming ON.",
138
- )
139
-
140
  if __name__ == "__main__":
141
  demo.launch()
 
1
  import os
2
+ import shutil
3
+ import time
4
+ import stat
5
  import requests
6
  from huggingface_hub import hf_hub_download, hf_hub_url
7
  from llama_cpp import Llama
8
  import gradio as gr
 
9
 
10
+ # --------- config ----------
 
 
11
  REPO_ID = "mradermacher/EuroLLM-1.7B-Instruct-GGUF"
12
  FILENAME = "EuroLLM-1.7B-Instruct.Q8_0.gguf"
13
  SYSTEM_PROMPT = "You are a helpful assistant. Answer concisely and helpfully."
 
 
14
  MODEL_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "models")
15
  os.makedirs(MODEL_DIR, exist_ok=True)
16
+ DEST_PATH = os.path.join(MODEL_DIR, FILENAME)
17
+ # ---------------------------
18
 
19
+ def robust_download(repo_id: str, filename: str, dest: str, max_attempts: int = 2) -> str:
20
+ """
21
+ Try hf_hub_download first. If it returns a cached path, copy it into `dest`.
22
+ If hf_hub_download fails, fallback to huggingface direct URL via requests.
23
+ After placing the file at dest, fsync and chmod to ensure llama-cpp-python can read it.
24
+ """
25
+ # quick exit if already present with some reasonable size
26
  if os.path.exists(dest) and os.path.getsize(dest) > 0:
27
+ print(f"[robust_download] Already present: {dest} ({os.path.getsize(dest)} bytes)")
28
  return dest
29
 
30
+ last_err = None
31
+ for attempt in range(1, max_attempts + 1):
 
 
 
 
 
 
 
 
 
 
 
32
  try:
33
+ print(f"[robust_download] Attempt {attempt}: hf_hub_download...")
34
+ cached_path = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=MODEL_DIR)
35
+ print("[robust_download] hf_hub_download returned:", cached_path)
36
+
37
+ # If hf_hub_download already saved directly at dest, great
38
+ if os.path.abspath(cached_path) == os.path.abspath(dest):
39
+ print("[robust_download] File is already at dest.")
40
+ else:
41
+ # copy to dest (safer than os.replace in some mount setups)
42
+ print(f"[robust_download] Copying cached file -> {dest}")
43
+ shutil.copy2(cached_path, dest)
44
+
45
+ # ensure it's synced to disk and readable
46
+ with open(dest, "rb") as f:
47
+ f.flush()
48
+ try:
49
+ os.fsync(f.fileno())
50
+ except OSError:
51
+ # some environments / filesystems may not support fsync; ignore if so
52
+ pass
53
+
54
+ # set sane permissions
55
+ os.chmod(dest, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) # 0o644
56
+
57
+ # verify
58
+ size = os.path.getsize(dest)
59
+ if size == 0:
60
+ raise RuntimeError("Downloaded file has size 0 after copy")
61
+ print(f"[robust_download] Success: {dest} ({size} bytes)")
62
  return dest
63
+
64
+ except Exception as e:
65
+ print(f"[robust_download] hf_hub_download attempt failed: {e}")
66
+ last_err = e
67
+ # fallback to direct requests download next loop or after attempts exhausted
68
+ time.sleep(1)
69
+
70
+ # fallback: direct URL using hf_hub_url and requests
71
+ try:
72
+ print("[robust_download] Falling back to direct download via requests...")
73
+ url = hf_hub_url(repo_id=repo_id, filename=filename)
74
+ print("[robust_download] Downloading from URL:", url)
75
+ with requests.get(url, stream=True, timeout=120) as r:
76
+ r.raise_for_status()
77
+ tmp_path = dest + ".part"
78
+ with open(tmp_path, "wb") as f:
79
+ for chunk in r.iter_content(chunk_size=8192):
80
+ if chunk:
81
+ f.write(chunk)
82
+ f.flush()
83
+ # move into place
84
+ shutil.move(tmp_path, dest)
85
+ # fsync + chmod
86
+ with open(dest, "rb") as f:
87
+ try:
88
+ os.fsync(f.fileno())
89
+ except OSError:
90
+ pass
91
+ os.chmod(dest, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
92
+ print("[robust_download] Direct download success:", dest)
93
+ return dest
94
+ except Exception as e2:
95
+ print("[robust_download] Direct download failed:", e2)
96
+ raise RuntimeError(f"All download attempts failed. last_err={last_err}, last_fallback_err={e2}")
97
+
98
+ # ---------- ensure model present ----------
99
+ print("Trying to ensure model is present at DEST_PATH:", DEST_PATH)
100
+ model_path = robust_download(REPO_ID, FILENAME, DEST_PATH)
101
+
102
+ # debug check: list the models folder and stat
103
+ print("DEBUG: listing model dir:", MODEL_DIR)
104
+ for fn in sorted(os.listdir(MODEL_DIR)):
105
+ p = os.path.join(MODEL_DIR, fn)
106
+ try:
107
+ st = os.stat(p)
108
+ print(f" - {fn}: exists, size={st.st_size}, mode={oct(st.st_mode)}")
109
+ except FileNotFoundError:
110
+ print(f" - {fn}: NOT FOUND after copy (weird)")
111
+
112
+ # small safety wait for FS to settle (rarely needed, but prevents race in some hosted FS)
113
+ time.sleep(0.2)
114
+
115
+ # ---------- initialize llama -----------
116
+ try:
117
+ print("Initializing Llama with model_path:", model_path)
118
+ llm = Llama(
119
+ model_path=model_path,
120
+ n_ctx=2048,
121
+ n_threads=4,
122
+ n_gpu_layers=0,
123
+ )
124
+ except ValueError as ve:
125
+ # dump extra debug info for logs and re-raise with context
126
+ print("Llama init ValueError:", ve)
127
+ print("Model dir listing at failure:")
128
+ for fn in sorted(os.listdir(MODEL_DIR)):
129
+ p = os.path.join(MODEL_DIR, fn)
130
+ try:
131
+ st = os.stat(p)
132
+ print(f" * {p}: size={st.st_size}, mode={oct(st.st_mode)}")
133
+ except Exception as ex:
134
+ print(" * stat failed for", p, ex)
135
+ raise
136
+
137
+ # ---------- chat utilities ----------
138
  def build_messages(history, user_message, system_prompt=SYSTEM_PROMPT):
 
 
 
 
139
  messages = []
140
  if system_prompt:
141
  messages.append({"role": "system", "content": system_prompt})
 
 
142
  for user_msg, assistant_msg in history:
143
  messages.append({"role": "user", "content": user_msg})
144
+ if assistant_msg:
145
  messages.append({"role": "assistant", "content": assistant_msg})
 
 
146
  messages.append({"role": "user", "content": user_message})
147
  return messages
148
 
 
 
 
149
  def chat_fn(user_message, history):
 
 
 
 
 
150
  messages = build_messages(history or [], user_message)
 
 
151
  stream = llm.create_chat_completion(
152
  messages=messages,
153
  max_tokens=512,
 
155
  top_p=0.95,
156
  stream=True
157
  )
 
 
158
  partial = ""
159
  for chunk in stream:
 
160
  try:
161
  if "choices" in chunk and len(chunk["choices"]) > 0:
162
  delta = chunk["choices"][0].get("delta", {})
 
164
  partial += delta["content"]
165
  yield partial
166
  except Exception:
 
167
  continue
168
 
169
+ demo = gr.ChatInterface(fn=chat_fn, title="EuroLLM 1.7B (robust loader + streaming)")
 
 
 
 
 
 
 
 
170
  if __name__ == "__main__":
171
  demo.launch()