proti0070 commited on
Commit
b976823
Β·
verified Β·
1 Parent(s): 837a21e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -276
app.py CHANGED
@@ -243,282 +243,6 @@ async def delete_model(request: Request):
243
  return resp.json() if resp.content else {"status": "ok"}
244
 
245
 
246
- # ─────────────────────────────────────────────
247
- # POST /shell β€” plain text, no SSE, no "event: line" prefix
248
- # curl -N /shell -d '{"cmd":"apt update"}'
249
- # ─────────────────────────────────────────────
250
- def _sanitize_cmd(cmd: str) -> str:
251
- """
252
- Auto-fix apt/apt-get commands so they never hang waiting for input:
253
- - prefix with sudo (non-root user on HF Spaces)
254
- - inject -y (auto yes)
255
- - inject -o flags to suppress dpkg config prompts
256
- - set DEBIAN_FRONTEND=noninteractive inline
257
- """
258
- s = cmd.strip()
259
- is_apt = s.startswith("apt ") or s.startswith("apt-get ") \
260
- or s == "apt" or s == "apt-get"
261
- if is_apt:
262
- # inject -y if not already present
263
- if " -y" not in s and " --yes" not in s:
264
- s = s.replace("apt-get ", "apt-get -y ", 1)
265
- s = s.replace("apt ", "apt -y ", 1)
266
- # inject dpkg options to suppress config file prompts
267
- dpkg_opts = '-o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold"'
268
- s = f'DEBIAN_FRONTEND=noninteractive sudo {s} {dpkg_opts}'
269
- elif not s.startswith("sudo "):
270
- # for other privileged commands let user decide, but keep env clean
271
- pass
272
- return s
273
-
274
-
275
- @app.post("/shell")
276
- async def shell_post(request: Request):
277
- import asyncio
278
- import httpx
279
- import json
280
- import os
281
- import uuid
282
- from datetime import datetime
283
- from fastapi import FastAPI, Request
284
- from fastapi.responses import StreamingResponse, HTMLResponse, JSONResponse, PlainTextResponse
285
-
286
- app = FastAPI(title="Ollama HF Space Server")
287
-
288
- OLLAMA_PORT = os.environ.get("OLLAMA_PORT", "11434")
289
- OLLAMA_BASE_URL = f"http://localhost:{OLLAMA_PORT}"
290
- DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "hydra-ai")
291
-
292
- sessions: dict = {}
293
- MAX_SESSIONS = 100
294
- MAX_MESSAGES = 50
295
-
296
-
297
- def get_or_create_session(conv_id: str, model: str) -> dict:
298
- if conv_id not in sessions:
299
- if len(sessions) >= MAX_SESSIONS:
300
- oldest = next(iter(sessions))
301
- del sessions[oldest]
302
- sessions[conv_id] = {
303
- "model": model,
304
- "created_at": datetime.utcnow().isoformat(),
305
- "messages": []
306
- }
307
- return sessions[conv_id]
308
-
309
-
310
- @app.get("/", response_class=HTMLResponse)
311
- async def root():
312
- return """<!DOCTYPE html>
313
- <html><head><title>Ollama HF Space</title>
314
- <style>body{font-family:monospace;background:#0d0d0d;color:#00ff88;padding:20px}
315
- .ep{background:#111;border:1px solid #00ff88;padding:10px;margin:8px 0;border-radius:4px}
316
- a{color:#00ffcc}code{background:#1a1a1a;padding:2px 6px}</style></head>
317
- <body>
318
- <h1>πŸ¦™ Ollama HF Space</h1>
319
- <div class="ep"><b>GET /web</b> β€” <a href="/web">Full Web Control Panel</a></div>
320
- <div class="ep"><b>POST /api/chat</b> β€” Chat, streams plain text content only</div>
321
- <div class="ep"><b>POST /api/chat/new</b> β€” Create new conversation</div>
322
- <div class="ep"><b>GET /api/chat/history?conv_id=...</b> β€” Get history</div>
323
- <div class="ep"><b>DEL /api/chat/history?conv_id=...</b> β€” Clear history</div>
324
- <div class="ep"><b>GET /api/sessions</b> β€” List sessions</div>
325
- <div class="ep"><b>POST /shell</b> β€” Plain text shell stream (no event: prefixes)</div>
326
- <div class="ep"><b>GET /api/tags</b> β€” List models</div>
327
- <div class="ep"><b>POST /api/pull</b> β€” Pull model</div>
328
- </body></html>"""
329
-
330
-
331
- @app.post("/api/chat/new")
332
- async def new_conversation(request: Request):
333
- body = await request.json()
334
- model = body.get("model", DEFAULT_MODEL)
335
- conv_id = str(uuid.uuid4())[:8]
336
- get_or_create_session(conv_id, model)
337
- return {"conv_id": conv_id, "model": model, "created_at": sessions[conv_id]["created_at"]}
338
-
339
-
340
- @app.get("/api/chat/history")
341
- async def get_history(conv_id: str):
342
- if conv_id not in sessions:
343
- return JSONResponse({"error": "conversation not found"}, status_code=404)
344
- s = sessions[conv_id]
345
- return {"conv_id": conv_id, "model": s["model"], "created_at": s["created_at"],
346
- "message_count": len(s["messages"]), "messages": s["messages"]}
347
-
348
-
349
- @app.delete("/api/chat/history")
350
- async def clear_history(conv_id: str):
351
- if conv_id not in sessions:
352
- return JSONResponse({"error": "conversation not found"}, status_code=404)
353
- sessions[conv_id]["messages"] = []
354
- return {"conv_id": conv_id, "status": "cleared"}
355
-
356
-
357
- @app.get("/api/sessions")
358
- async def list_sessions():
359
- return {"total": len(sessions), "sessions": [
360
- {"conv_id": cid, "model": s["model"], "created_at": s["created_at"],
361
- "message_count": len(s["messages"])}
362
- for cid, s in sessions.items()
363
- ]}
364
-
365
-
366
- # ─────────────────────────────────────────────
367
- # POST /api/chat
368
- # stream=true β†’ streams ONLY the text content, no JSON wrapper
369
- # stream=false β†’ returns full JSON with conv_id
370
- #
371
- # With session: {"conv_id": "a3f9c1b2", "message": "Hello"}
372
- # Without session: {"messages": [...], "model": "...", "stream": true}
373
- # Auto session: {"message": "Hello", "model": "tinyllama"}
374
- # ─────────────────────────────────────────────
375
- @app.post("/api/chat")
376
- async def chat(request: Request):
377
- body = await request.json()
378
-
379
- # ── Session-based mode ──
380
- if "message" in body:
381
- user_msg = body["message"]
382
- model = body.get("model", DEFAULT_MODEL)
383
- conv_id = body.get("conv_id") or str(uuid.uuid4())[:8]
384
- stream = body.get("stream", True)
385
-
386
- session = get_or_create_session(conv_id, model)
387
- session["model"] = model
388
- session["messages"].append({"role": "user", "content": user_msg})
389
- if len(session["messages"]) > MAX_MESSAGES:
390
- session["messages"] = session["messages"][-MAX_MESSAGES:]
391
-
392
- ollama_body = {"model": model, "messages": session["messages"], "stream": True}
393
- full_response = []
394
-
395
- async def stream_content_only():
396
- # Streams ONLY the text tokens β€” no JSON, no metadata
397
- async with httpx.AsyncClient(timeout=300) as client:
398
- async with client.stream("POST", f"{OLLAMA_BASE_URL}/api/chat", json=ollama_body) as r:
399
- async for line in r.aiter_lines():
400
- if not line.strip():
401
- continue
402
- try:
403
- d = json.loads(line)
404
- token = d.get("message", {}).get("content", "")
405
- if token:
406
- full_response.append(token)
407
- yield token # ← just the raw text
408
- except Exception:
409
- pass
410
- # save to session after stream ends
411
- session["messages"].append({"role": "assistant", "content": "".join(full_response)})
412
-
413
- if stream:
414
- return StreamingResponse(
415
- stream_content_only(),
416
- media_type="text/plain",
417
- headers={
418
- "X-Conv-Id": conv_id,
419
- "Cache-Control": "no-cache",
420
- "X-Accel-Buffering": "no",
421
- },
422
- )
423
- else:
424
- # collect full response then return JSON
425
- async with httpx.AsyncClient(timeout=300) as client:
426
- resp = await client.post(f"{OLLAMA_BASE_URL}/api/chat", json={**ollama_body, "stream": False})
427
- data = resp.json()
428
- assistant_text = data.get("message", {}).get("content", "")
429
- if assistant_text:
430
- session["messages"].append({"role": "assistant", "content": assistant_text})
431
- return {"conv_id": conv_id, "model": model, "content": assistant_text}
432
-
433
- # ── Raw passthrough mode (old style) ──
434
- else:
435
- if "model" not in body:
436
- body["model"] = DEFAULT_MODEL
437
- stream = body.get("stream", True)
438
-
439
- async def stream_content_raw():
440
- # Also streams only content text for raw mode
441
- async with httpx.AsyncClient(timeout=300) as client:
442
- async with client.stream("POST", f"{OLLAMA_BASE_URL}/api/chat", json={**body, "stream": True}) as r:
443
- async for line in r.aiter_lines():
444
- if not line.strip():
445
- continue
446
- try:
447
- d = json.loads(line)
448
- token = d.get("message", {}).get("content", "")
449
- if token:
450
- yield token
451
- except Exception:
452
- pass
453
-
454
- if stream:
455
- return StreamingResponse(
456
- stream_content_raw(),
457
- media_type="text/plain",
458
- headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
459
- )
460
- else:
461
- async with httpx.AsyncClient(timeout=300) as client:
462
- resp = await client.post(f"{OLLAMA_BASE_URL}/api/chat", json={**body, "stream": False})
463
- data = resp.json()
464
- return {"content": data.get("message", {}).get("content", ""), "model": body["model"]}
465
-
466
-
467
- @app.post("/api/generate")
468
- async def generate(request: Request):
469
- body = await request.json()
470
- if "model" not in body:
471
- body["model"] = DEFAULT_MODEL
472
-
473
- async def stream_content():
474
- async with httpx.AsyncClient(timeout=300) as client:
475
- async with client.stream("POST", f"{OLLAMA_BASE_URL}/api/generate", json={**body, "stream": True}) as r:
476
- async for line in r.aiter_lines():
477
- if not line.strip():
478
- continue
479
- try:
480
- d = json.loads(line)
481
- token = d.get("response", "")
482
- if token:
483
- yield token
484
- except Exception:
485
- pass
486
-
487
- return StreamingResponse(
488
- stream_content(),
489
- media_type="text/plain",
490
- headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
491
- )
492
-
493
-
494
- @app.get("/api/tags")
495
- async def list_models():
496
- async with httpx.AsyncClient(timeout=10) as client:
497
- resp = await client.get(f"{OLLAMA_BASE_URL}/api/tags")
498
- return resp.json()
499
-
500
-
501
- @app.post("/api/pull")
502
- async def pull_model(request: Request):
503
- body = await request.json()
504
-
505
- async def stream_pull():
506
- async with httpx.AsyncClient(timeout=600) as client:
507
- async with client.stream("POST", f"{OLLAMA_BASE_URL}/api/pull", json=body) as r:
508
- async for chunk in r.aiter_bytes():
509
- yield chunk
510
-
511
- return StreamingResponse(stream_pull(), media_type="application/x-ndjson")
512
-
513
-
514
- @app.delete("/api/delete")
515
- async def delete_model(request: Request):
516
- body = await request.json()
517
- async with httpx.AsyncClient(timeout=60) as client:
518
- resp = await client.delete(f"{OLLAMA_BASE_URL}/api/delete", json=body)
519
- return resp.json() if resp.content else {"status": "ok"}
520
-
521
-
522
  # ─────────────────────────────────────────────
523
  # POST /shell β€” plain text, no SSE, no "event: line" prefix
524
  # curl -N /shell -d '{"cmd":"apt update"}'
 
243
  return resp.json() if resp.content else {"status": "ok"}
244
 
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  # ─────────────────────────────────────────────
247
  # POST /shell β€” plain text, no SSE, no "event: line" prefix
248
  # curl -N /shell -d '{"cmd":"apt update"}'