Fu01978 commited on
Commit
50d2614
·
verified ·
1 Parent(s): b53b3ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +190 -53
app.py CHANGED
@@ -7,22 +7,32 @@ from huggingface_hub import hf_hub_download, hf_hub_url
7
  from llama_cpp import Llama
8
  import gradio as gr
9
 
10
- # --------- config ----------
11
  REPO_ID = "mradermacher/EuroLLM-1.7B-Instruct-GGUF"
12
  FILENAME = "EuroLLM-1.7B-Instruct.Q8_0.gguf"
13
  SYSTEM_PROMPT = "You are a helpful assistant. Answer concisely and helpfully."
14
  MODEL_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "models")
15
  os.makedirs(MODEL_DIR, exist_ok=True)
16
  DEST_PATH = os.path.join(MODEL_DIR, FILENAME)
17
- # ---------------------------
 
 
 
 
 
 
 
 
18
 
19
  def robust_download(repo_id: str, filename: str, dest: str, max_attempts: int = 2) -> str:
20
  """
21
- Try hf_hub_download first. If it returns a cached path, copy it into `dest`.
22
- If hf_hub_download fails, fallback to huggingface direct URL via requests.
23
- After placing the file at dest, fsync and chmod to ensure llama-cpp-python can read it.
 
 
24
  """
25
- # quick exit if already present with some reasonable size
26
  if os.path.exists(dest) and os.path.getsize(dest) > 0:
27
  print(f"[robust_download] Already present: {dest} ({os.path.getsize(dest)} bytes)")
28
  return dest
@@ -30,31 +40,29 @@ def robust_download(repo_id: str, filename: str, dest: str, max_attempts: int =
30
  last_err = None
31
  for attempt in range(1, max_attempts + 1):
32
  try:
33
- print(f"[robust_download] Attempt {attempt}: hf_hub_download...")
34
  cached_path = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=MODEL_DIR)
35
  print("[robust_download] hf_hub_download returned:", cached_path)
36
 
37
- # If hf_hub_download already saved directly at dest, great
38
- if os.path.abspath(cached_path) == os.path.abspath(dest):
39
- print("[robust_download] File is already at dest.")
40
- else:
41
- # copy to dest (safer than os.replace in some mount setups)
42
  print(f"[robust_download] Copying cached file -> {dest}")
43
  shutil.copy2(cached_path, dest)
 
 
44
 
45
- # ensure it's synced to disk and readable
46
  with open(dest, "rb") as f:
47
- f.flush()
48
  try:
 
49
  os.fsync(f.fileno())
50
- except OSError:
51
- # some environments / filesystems may not support fsync; ignore if so
52
  pass
53
 
54
- # set sane permissions
55
  os.chmod(dest, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) # 0o644
56
 
57
- # verify
58
  size = os.path.getsize(dest)
59
  if size == 0:
60
  raise RuntimeError("Downloaded file has size 0 after copy")
@@ -62,44 +70,43 @@ def robust_download(repo_id: str, filename: str, dest: str, max_attempts: int =
62
  return dest
63
 
64
  except Exception as e:
65
- print(f"[robust_download] hf_hub_download attempt failed: {e}")
66
  last_err = e
67
- # fallback to direct requests download next loop or after attempts exhausted
68
  time.sleep(1)
69
 
70
- # fallback: direct URL using hf_hub_url and requests
71
  try:
72
  print("[robust_download] Falling back to direct download via requests...")
73
  url = hf_hub_url(repo_id=repo_id, filename=filename)
74
  print("[robust_download] Downloading from URL:", url)
 
75
  with requests.get(url, stream=True, timeout=120) as r:
76
  r.raise_for_status()
77
- tmp_path = dest + ".part"
78
  with open(tmp_path, "wb") as f:
79
  for chunk in r.iter_content(chunk_size=8192):
80
  if chunk:
81
  f.write(chunk)
82
  f.flush()
83
- # move into place
84
- shutil.move(tmp_path, dest)
85
  # fsync + chmod
86
  with open(dest, "rb") as f:
87
  try:
88
  os.fsync(f.fileno())
89
- except OSError:
90
  pass
91
  os.chmod(dest, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
92
  print("[robust_download] Direct download success:", dest)
93
  return dest
94
  except Exception as e2:
95
  print("[robust_download] Direct download failed:", e2)
96
- raise RuntimeError(f"All download attempts failed. last_err={last_err}, last_fallback_err={e2}")
97
 
98
- # ---------- ensure model present ----------
99
- print("Trying to ensure model is present at DEST_PATH:", DEST_PATH)
100
  model_path = robust_download(REPO_ID, FILENAME, DEST_PATH)
101
 
102
- # debug check: list the models folder and stat
103
  print("DEBUG: listing model dir:", MODEL_DIR)
104
  for fn in sorted(os.listdir(MODEL_DIR)):
105
  p = os.path.join(MODEL_DIR, fn)
@@ -107,22 +114,22 @@ for fn in sorted(os.listdir(MODEL_DIR)):
107
  st = os.stat(p)
108
  print(f" - {fn}: exists, size={st.st_size}, mode={oct(st.st_mode)}")
109
  except FileNotFoundError:
110
- print(f" - {fn}: NOT FOUND after copy (weird)")
111
 
112
- # small safety wait for FS to settle (rarely needed, but prevents race in some hosted FS)
113
  time.sleep(0.2)
114
 
115
- # ---------- initialize llama -----------
116
  try:
117
  print("Initializing Llama with model_path:", model_path)
118
  llm = Llama(
119
  model_path=model_path,
120
- n_ctx=2048,
121
- n_threads=4,
122
  n_gpu_layers=0,
123
  )
 
124
  except ValueError as ve:
125
- # dump extra debug info for logs and re-raise with context
126
  print("Llama init ValueError:", ve)
127
  print("Model dir listing at failure:")
128
  for fn in sorted(os.listdir(MODEL_DIR)):
@@ -134,38 +141,168 @@ except ValueError as ve:
134
  print(" * stat failed for", p, ex)
135
  raise
136
 
137
- # ---------- chat utilities ----------
138
  def build_messages(history, user_message, system_prompt=SYSTEM_PROMPT):
 
 
 
 
139
  messages = []
140
  if system_prompt:
141
  messages.append({"role": "system", "content": system_prompt})
142
  for user_msg, assistant_msg in history:
143
  messages.append({"role": "user", "content": user_msg})
144
- if assistant_msg:
145
  messages.append({"role": "assistant", "content": assistant_msg})
146
  messages.append({"role": "user", "content": user_message})
147
  return messages
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  def chat_fn(user_message, history):
 
 
 
 
 
150
  messages = build_messages(history or [], user_message)
151
- stream = llm.create_chat_completion(
152
- messages=messages,
153
- max_tokens=512,
154
- temperature=0.2,
155
- top_p=0.95,
156
- stream=True
157
- )
158
- partial = ""
159
- for chunk in stream:
 
 
160
  try:
161
- if "choices" in chunk and len(chunk["choices"]) > 0:
162
- delta = chunk["choices"][0].get("delta", {})
163
- if "content" in delta:
164
- partial += delta["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  yield partial
166
- except Exception:
167
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
- demo = gr.ChatInterface(fn=chat_fn, title="EuroLLM 1.7B (robust loader + streaming)")
170
  if __name__ == "__main__":
 
171
  demo.launch()
 
7
  from llama_cpp import Llama
8
  import gradio as gr
9
 
10
+ # ------------------ CONFIG ------------------
11
  REPO_ID = "mradermacher/EuroLLM-1.7B-Instruct-GGUF"
12
  FILENAME = "EuroLLM-1.7B-Instruct.Q8_0.gguf"
13
  SYSTEM_PROMPT = "You are a helpful assistant. Answer concisely and helpfully."
14
  MODEL_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "models")
15
  os.makedirs(MODEL_DIR, exist_ok=True)
16
  DEST_PATH = os.path.join(MODEL_DIR, FILENAME)
17
+
18
+ # Llama runtime params
19
+ N_CTX = 2048
20
+ MAX_TOKENS = 512
21
+ TEMPERATURE = 0.2
22
+ TOP_P = 0.95
23
+ # Threads: be conservative; Spaces gives limited CPU
24
+ N_THREADS = min(4, max(1, (os.cpu_count() or 1) // 2))
25
+ # --------------------------------------------
26
 
27
  def robust_download(repo_id: str, filename: str, dest: str, max_attempts: int = 2) -> str:
28
  """
29
+ Download model robustly:
30
+ 1) Try hf_hub_download (may return a cached path)
31
+ 2) If cached path differs, copy to dest, fsync, chmod
32
+ 3) If hf_hub_download fails repeat attempts then fallback to direct URL via requests
33
+ Returns the final dest path (guaranteed to exist and be readable or raises)
34
  """
35
+ # quick exit
36
  if os.path.exists(dest) and os.path.getsize(dest) > 0:
37
  print(f"[robust_download] Already present: {dest} ({os.path.getsize(dest)} bytes)")
38
  return dest
 
40
  last_err = None
41
  for attempt in range(1, max_attempts + 1):
42
  try:
43
+ print(f"[robust_download] Attempt {attempt}: hf_hub_download(repo_id={repo_id}, filename={filename})")
44
  cached_path = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=MODEL_DIR)
45
  print("[robust_download] hf_hub_download returned:", cached_path)
46
 
47
+ # If hf_hub_download already saved at dest, good.
48
+ if os.path.abspath(cached_path) != os.path.abspath(dest):
 
 
 
49
  print(f"[robust_download] Copying cached file -> {dest}")
50
  shutil.copy2(cached_path, dest)
51
+ else:
52
+ print("[robust_download] Cached path equals dest; no copy needed.")
53
 
54
+ # ensure it's synced to disk
55
  with open(dest, "rb") as f:
 
56
  try:
57
+ f.flush()
58
  os.fsync(f.fileno())
59
+ except Exception:
60
+ # some hosted filesystems don't support fsync; ignore but we tried
61
  pass
62
 
63
+ # set readable permissions
64
  os.chmod(dest, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) # 0o644
65
 
 
66
  size = os.path.getsize(dest)
67
  if size == 0:
68
  raise RuntimeError("Downloaded file has size 0 after copy")
 
70
  return dest
71
 
72
  except Exception as e:
73
+ print(f"[robust_download] hf_hub_download attempt {attempt} failed: {e}")
74
  last_err = e
 
75
  time.sleep(1)
76
 
77
+ # fallback: direct URL using hf_hub_url + requests
78
  try:
79
  print("[robust_download] Falling back to direct download via requests...")
80
  url = hf_hub_url(repo_id=repo_id, filename=filename)
81
  print("[robust_download] Downloading from URL:", url)
82
+ tmp_path = dest + ".part"
83
  with requests.get(url, stream=True, timeout=120) as r:
84
  r.raise_for_status()
 
85
  with open(tmp_path, "wb") as f:
86
  for chunk in r.iter_content(chunk_size=8192):
87
  if chunk:
88
  f.write(chunk)
89
  f.flush()
90
+ shutil.move(tmp_path, dest)
91
+
92
  # fsync + chmod
93
  with open(dest, "rb") as f:
94
  try:
95
  os.fsync(f.fileno())
96
+ except Exception:
97
  pass
98
  os.chmod(dest, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
99
  print("[robust_download] Direct download success:", dest)
100
  return dest
101
  except Exception as e2:
102
  print("[robust_download] Direct download failed:", e2)
103
+ raise RuntimeError(f"All download attempts failed. last_err={last_err}, fallback_err={e2}")
104
 
105
+ # ------------------ Ensure model exists ------------------
106
+ print("Ensuring model present at:", DEST_PATH)
107
  model_path = robust_download(REPO_ID, FILENAME, DEST_PATH)
108
 
109
+ # Debug listing of model dir (useful in logs)
110
  print("DEBUG: listing model dir:", MODEL_DIR)
111
  for fn in sorted(os.listdir(MODEL_DIR)):
112
  p = os.path.join(MODEL_DIR, fn)
 
114
  st = os.stat(p)
115
  print(f" - {fn}: exists, size={st.st_size}, mode={oct(st.st_mode)}")
116
  except FileNotFoundError:
117
+ print(f" - {fn}: NOT FOUND after copy")
118
 
119
+ # tiny delay for some hosted FS race conditions
120
  time.sleep(0.2)
121
 
122
+ # ------------------ Initialize Llama ------------------
123
  try:
124
  print("Initializing Llama with model_path:", model_path)
125
  llm = Llama(
126
  model_path=model_path,
127
+ n_ctx=N_CTX,
128
+ n_threads=N_THREADS,
129
  n_gpu_layers=0,
130
  )
131
+ print("Llama initialized successfully.")
132
  except ValueError as ve:
 
133
  print("Llama init ValueError:", ve)
134
  print("Model dir listing at failure:")
135
  for fn in sorted(os.listdir(MODEL_DIR)):
 
141
  print(" * stat failed for", p, ex)
142
  raise
143
 
144
+ # ------------------ Helpers ------------------
145
  def build_messages(history, user_message, system_prompt=SYSTEM_PROMPT):
146
+ """
147
+ Convert Gradio history (list of [user, assistant]) into chat messages:
148
+ [{role: system}, {role: user}, {role: assistant}, ... , {role: user}]
149
+ """
150
  messages = []
151
  if system_prompt:
152
  messages.append({"role": "system", "content": system_prompt})
153
  for user_msg, assistant_msg in history:
154
  messages.append({"role": "user", "content": user_msg})
155
+ if assistant_msg is not None and assistant_msg != "":
156
  messages.append({"role": "assistant", "content": assistant_msg})
157
  messages.append({"role": "user", "content": user_message})
158
  return messages
159
 
160
+ def parse_final_response(resp):
161
+ """
162
+ Normalize single-shot responses to a string.
163
+ Handles common llama-cpp-python shapes.
164
+ """
165
+ try:
166
+ if resp is None:
167
+ return ""
168
+ if isinstance(resp, str):
169
+ return resp
170
+ if isinstance(resp, dict):
171
+ choices = resp.get("choices", [])
172
+ if len(choices) > 0:
173
+ c = choices[0]
174
+ # message.content (chat format)
175
+ if isinstance(c.get("message"), dict):
176
+ return c["message"].get("content", "") or ""
177
+ # text fallback
178
+ if "text" in c and c["text"]:
179
+ return c["text"]
180
+ # delta fallback
181
+ if "delta" in c and isinstance(c["delta"], dict):
182
+ return c["delta"].get("content", "") or ""
183
+ return str(resp)
184
+ except Exception:
185
+ return str(resp)
186
+
187
+ # ------------------ Robust streaming chat function ------------------
188
  def chat_fn(user_message, history):
189
+ """
190
+ Generator for Gradio ChatInterface streaming.
191
+ Yields progressive partial strings. Falls back to non-streaming if needed.
192
+ Ensures Gradio never sees an empty generator (avoids StopAsyncIteration crash).
193
+ """
194
  messages = build_messages(history or [], user_message)
195
+ # Attempt streaming call
196
+ try:
197
+ stream = llm.create_chat_completion(
198
+ messages=messages,
199
+ max_tokens=MAX_TOKENS,
200
+ temperature=TEMPERATURE,
201
+ top_p=TOP_P,
202
+ stream=True
203
+ )
204
+ except Exception as e:
205
+ # immediate failure: try non-stream fallback
206
  try:
207
+ final = llm.create_chat_completion(
208
+ messages=messages,
209
+ max_tokens=MAX_TOKENS,
210
+ temperature=TEMPERATURE,
211
+ top_p=TOP_P,
212
+ stream=False
213
+ )
214
+ text = parse_final_response(final)
215
+ yield text
216
+ return
217
+ except Exception as e2:
218
+ yield f"[error] create_chat_completion failed: {e} | fallback error: {e2}"
219
+ return
220
+
221
+ # If the returned object isn't iterable, treat as final
222
+ if not hasattr(stream, "__iter__"):
223
+ final_text = parse_final_response(stream)
224
+ yield final_text
225
+ return
226
+
227
+ partial = ""
228
+ yielded_any = False
229
+
230
+ try:
231
+ for chunk in stream:
232
+ # Debug: uncomment to log raw chunks
233
+ # print("STREAM CHUNK:", repr(chunk))
234
+
235
+ if not chunk:
236
+ continue
237
+ # expected format: {"choices":[{"delta":{"content":"..."}}], ...}
238
+ try:
239
+ choices = chunk.get("choices", [])
240
+ except Exception:
241
+ # some weird shapes might be simple strings
242
+ chunk_str = str(chunk)
243
+ if chunk_str:
244
+ partial += chunk_str
245
+ yielded_any = True
246
  yield partial
247
+ continue
248
+
249
+ if len(choices) == 0:
250
+ continue
251
+
252
+ c0 = choices[0]
253
+ # delta content (usual)
254
+ delta = c0.get("delta", {})
255
+ if isinstance(delta, dict) and "content" in delta:
256
+ partial += delta["content"]
257
+ yielded_any = True
258
+ yield partial
259
+ continue
260
+
261
+ # final message object (some runners)
262
+ msg = c0.get("message") or c0.get("text") or {}
263
+ if isinstance(msg, dict):
264
+ content = msg.get("content") or msg.get("content_text") or ""
265
+ if content:
266
+ partial = content
267
+ yielded_any = True
268
+ yield partial
269
+ continue
270
+ elif isinstance(msg, str) and msg:
271
+ partial += msg
272
+ yielded_any = True
273
+ yield partial
274
+ continue
275
+
276
+ except StopIteration:
277
+ pass
278
+ except Exception as e:
279
+ yield f"[error] stream iteration error: {e}"
280
+ return
281
+
282
+ # If streaming produced nothing, fallback to non-stream
283
+ if not yielded_any:
284
+ try:
285
+ final = llm.create_chat_completion(
286
+ messages=messages,
287
+ max_tokens=MAX_TOKENS,
288
+ temperature=TEMPERATURE,
289
+ top_p=TOP_P,
290
+ stream=False
291
+ )
292
+ final_text = parse_final_response(final)
293
+ yield final_text if final_text is not None else ""
294
+ return
295
+ except Exception as e:
296
+ yield f"[error] fallback non-stream failed: {e}"
297
+ return
298
+
299
+ # ------------------ Launch Gradio ------------------
300
+ demo = gr.ChatInterface(
301
+ fn=chat_fn,
302
+ title="EuroLLM 1.7B (GGUF) — Robust streaming chat",
303
+ description="System prompt enabled. Streaming ON.",
304
+ )
305
 
 
306
  if __name__ == "__main__":
307
+ # If you want a public link during testing, pass share=True in launch()
308
  demo.launch()