manishksharma98 commited on
Commit
66a7f89
·
verified ·
1 Parent(s): 0a3bf97

Viona: HF Space — InferenceClient, Dockerfile PORT, README metadata

Browse files
Files changed (6) hide show
  1. Dockerfile +9 -12
  2. README.md +23 -24
  3. ai/__init__.py +5 -0
  4. ai/llm.py +88 -0
  5. app.py +63 -98
  6. requirements.txt +4 -4
Dockerfile CHANGED
@@ -1,20 +1,17 @@
1
- FROM python:3.10-slim
2
-
3
- # Install system dependencies including zstd
4
- RUN apt-get update && apt-get install -y curl zstd \
5
- && rm -rf /var/lib/apt/lists/*
6
 
7
  WORKDIR /app
8
 
9
- # Install Python dependencies
 
 
10
  COPY requirements.txt .
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
13
- # Install Ollama
14
- RUN curl -fsSL https://ollama.com/install.sh | sh
15
 
16
- COPY . .
17
- RUN chmod +x start.sh
18
 
19
- # start.sh: starts ollama serve, pulls tinyllama at runtime, then runs FastAPI
20
- CMD ["./start.sh"]
 
1
+ FROM python:3.11-slim
 
 
 
 
2
 
3
  WORKDIR /app
4
 
5
+ ENV PYTHONUNBUFFERED=1 \
6
+ PIP_NO_CACHE_DIR=1
7
+
8
  COPY requirements.txt .
9
  RUN pip install --no-cache-dir -r requirements.txt
10
 
11
+ COPY app.py .
12
+ COPY ai ./ai
13
 
14
+ EXPOSE 7860
 
15
 
16
+ # Hugging Face Spaces set PORT; default 7860 for local docker test
17
+ CMD ["sh", "-c", "exec streamlit run app.py --server.port=${PORT:-7860} --server.address=0.0.0.0 --server.headless=true --browser.gatherUsageStats=false"]
README.md CHANGED
@@ -1,39 +1,38 @@
1
  ---
2
- title: Personal AI
3
- emoji: 🦙
4
- colorFrom: purple
5
- colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  license: mit
 
9
  ---
10
 
11
- # 🚀 Personal AI with Ollama on Hugging Face Spaces
12
 
13
- This Space runs **Ollama** inside a Docker container on Hugging Face's free tier.
14
- It uses **FastAPI** to expose endpoints on port 7860, which proxy requests to Ollama's internal API (`127.0.0.1:11434`).
15
 
16
- ## Updated Dockerfile
17
 
18
- ```dockerfile
19
- FROM python:3.10-slim
 
 
 
 
 
20
 
21
- # Install system dependencies including zstd (needed for Ollama)
22
- RUN apt-get update && apt-get install -y curl zstd \
23
- && rm -rf /var/lib/apt/lists/*
24
 
25
- WORKDIR /app
26
 
27
- # Python dependencies
28
- COPY requirements.txt .
29
- RUN pip install --no-cache-dir -r requirements.txt
30
 
31
- # Install Ollama
32
- RUN curl -fsSL https://ollama.com/install.sh | sh
33
 
34
- COPY . .
35
- RUN chmod +x start.sh
36
 
37
- # start.sh starts ollama serve, pulls tinyllama at runtime, then runs FastAPI on 7860
38
- CMD ["./start.sh"]
39
- ```
 
1
  ---
2
+ title: CyberWatch / Viona
3
+ emoji: 🛡️
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
+ short_description: Viona cyber assistant via Hugging Face Inference
10
  ---
11
 
12
+ # CyberWatch / Viona (Hugging Face Space)
13
 
14
+ Streamlit UI that calls your model through **Hugging Face Inference** using `huggingface_hub.InferenceClient` (current router; not the legacy `api-inference` URL).
 
15
 
16
+ ## Secrets / variables (Space Settings)
17
 
18
+ | Name | Required | Notes |
19
+ |------|----------|--------|
20
+ | `HF_MODEL_ID` | **Yes** | e.g. `mistralai/Mistral-7B-Instruct-v0.3` — must be enabled for **Inference** on your account |
21
+ | `HF_API_TOKEN` | Recommended | User token with **Inference** scope; use if the default Space token is not enough |
22
+ | `HF_TOKEN` | Auto | Hugging Face injects this in Spaces; used when `HF_API_TOKEN` is unset |
23
+ | `APP_NAME` | No | Default: `CyberWatch` |
24
+ | `LOG_LEVEL` | No | Default: `INFO` |
25
 
26
+ Add secrets under **Settings Secrets and variables → New secret**. Do not commit tokens.
 
 
27
 
28
+ ## Deploy
29
 
30
+ 1. Create a **Docker** Space and push this folder as the repo root (or symlink these files to your Space repository).
31
+ 2. Set **`HF_MODEL_ID`** (and optionally **`HF_API_TOKEN`**) in Space settings.
32
+ 3. Rebuild the Space.
33
 
34
+ Local multipage tools (training history, MongoDB) stay in the main Viona repo; this image stays small for cold start.
 
35
 
36
+ ### Streamlit SDK instead of Docker
 
37
 
38
+ To use the hosted **Streamlit** builder (no `Dockerfile`), set the README frontmatter to `sdk: streamlit` and `app_file: app.py`, keep `requirements.txt` and `app.py` at the repo root, and delete or ignore `Dockerfile`. Secrets behave the same.
 
 
ai/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """Viona LLM integration (Hugging Face Inference API)."""
2
+
3
+ from ai.llm import viona_complete
4
+
5
+ __all__ = ["viona_complete"]
ai/llm.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Viona on Hugging Face Spaces — Inference API only (no local/Ollama paths).
2
+
3
+ Spaces expose HF_TOKEN automatically; optional HF_API_TOKEN overrides.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import os
10
+
11
+ from dotenv import load_dotenv
12
+ from huggingface_hub import InferenceClient
13
+ from huggingface_hub.utils import HfHubHTTPError
14
+
15
+ load_dotenv()
16
+ logger = logging.getLogger(__name__)
17
+
18
+ VIONA_SYSTEM = """You are Viona, an expert cybersecurity research assistant.
19
+ Your name is Viona. If asked who you are, say you are Viona, a cybersecurity research assistant.
20
+ Do not reveal underlying model or provider names."""
21
+
22
+
23
+ def _hf_token() -> str:
24
+ return (
25
+ os.getenv("HF_API_TOKEN", "").strip()
26
+ or os.getenv("HF_TOKEN", "").strip()
27
+ or os.getenv("HUGGING_FACE_HUB_TOKEN", "").strip()
28
+ )
29
+
30
+
31
+ def _hf_model_id() -> str:
32
+ return os.getenv("HF_MODEL_ID", "").strip()
33
+
34
+
35
+ def inference_backend_label() -> str:
36
+ return "hf"
37
+
38
+
39
+ def viona_complete(user_prompt: str, timeout: int = 120) -> str:
40
+ """Call Hugging Face Inference (router) via InferenceClient."""
41
+ token = _hf_token()
42
+ model_id = _hf_model_id()
43
+ if not token:
44
+ return (
45
+ "Viona: No Hugging Face token found. In this Space go to **Settings → Secrets and variables "
46
+ "→ Secrets** and add `HF_API_TOKEN` (recommended), or rely on the default `HF_TOKEN` "
47
+ "if your Space has repository access."
48
+ )
49
+ if not model_id:
50
+ return (
51
+ "Viona: Set **HF_MODEL_ID** in Space **Settings → Secrets** (e.g. "
52
+ "`mistralai/Mistral-7B-Instruct-v0.3`). The model must be allowed for inference on your account."
53
+ )
54
+ try:
55
+ client = InferenceClient(
56
+ model=model_id,
57
+ token=token,
58
+ timeout=float(timeout),
59
+ )
60
+ out = client.chat_completion(
61
+ messages=[
62
+ {"role": "system", "content": VIONA_SYSTEM},
63
+ {"role": "user", "content": user_prompt},
64
+ ],
65
+ max_tokens=512,
66
+ )
67
+ if getattr(out, "choices", None):
68
+ msg = out.choices[0].message
69
+ content = getattr(msg, "content", None)
70
+ if content:
71
+ return str(content).strip()
72
+ logger.warning("Unexpected HF chat_completion response shape")
73
+ return "Viona: Unexpected response from inference. Try again or pick another HF_MODEL_ID."
74
+ except HfHubHTTPError as e:
75
+ code = e.response.status_code if getattr(e, "response", None) else "?"
76
+ logger.error("HF inference HTTP error: %s", code)
77
+ return (
78
+ "Viona: Inference returned an HTTP error. Check **HF_MODEL_ID**, token permissions, "
79
+ "and whether the model supports chat on the Inference API."
80
+ )
81
+ except Exception as e:
82
+ logger.error("HF inference failed: %s", type(e).__name__)
83
+ return "Viona: Could not reach Hugging Face Inference from this Space. Retry shortly."
84
+
85
+
86
+ def viona_complete_openai_compat(user_prompt: str, timeout: int = 120) -> str:
87
+ """Spaces build uses InferenceClient only; kept for API compatibility."""
88
+ return viona_complete(user_prompt, timeout=timeout)
app.py CHANGED
@@ -1,110 +1,75 @@
1
- """
2
- Gradio UI + FastAPI /api/generate so the Space serves both the web UI and the API.
3
- Your local app can call: GET/POST .../api/generate
4
- """
5
- import os
6
- import requests
7
- import gradio as gr
8
- from fastapi import FastAPI, Query, Body
9
- from fastapi.responses import JSONResponse
10
- from fastapi.middleware.cors import CORSMiddleware
11
- from pydantic import BaseModel
12
-
13
- OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "http://127.0.0.1:11434")
14
- OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "tinyllama")
15
- API_URL = f"{OLLAMA_HOST}/api/generate"
16
-
17
-
18
- def generate_text(prompt):
19
- if not (prompt or "").strip():
20
- return ""
21
- try:
22
- r = requests.post(
23
- API_URL,
24
- json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False},
25
- timeout=120,
26
- )
27
- r.raise_for_status()
28
- return r.json().get("response", "")
29
- except requests.RequestException as e:
30
- return f"Error: {e}"
31
 
 
32
 
33
- # Gradio app (mounted at /)
34
- demo = gr.Interface(
35
- fn=generate_text,
36
- inputs="text",
37
- outputs="text",
38
- title="Ollama on HF",
39
- description="Generate text with Ollama. Call /api/generate from your local app.",
40
- )
41
 
42
- # FastAPI app: mount Gradio at / and expose /api/generate
43
- app = FastAPI(title="Ollama on HF")
44
- app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
45
 
 
46
 
47
- class GenerateBody(BaseModel):
48
- prompt: str = ""
49
- model: str = OLLAMA_MODEL
50
 
 
51
 
52
- @app.get("/api/generate")
53
- def api_generate_get(
54
- prompt: str = Query(..., description="Text prompt"),
55
- model: str = Query(OLLAMA_MODEL, description="Ollama model"),
56
- ):
57
- """GET /api/generate?prompt=your+prompt"""
58
- return _do_generate((prompt or "").strip(), model)
59
 
60
 
61
- @app.post("/api/generate")
62
- def api_generate_post(body: GenerateBody = Body(...)):
63
- """POST /api/generate with JSON {"prompt": "...", "model": "tinyllama"}"""
64
- return _do_generate((body.prompt or "").strip(), body.model or OLLAMA_MODEL)
65
 
 
 
66
 
67
- def _do_generate(prompt: str, model: str):
68
- """Shared logic for GET and POST /api/generate."""
69
- if not prompt:
70
- return JSONResponse(
71
- status_code=400,
72
- content={"error": "prompt is required and cannot be empty"},
73
  )
74
- payload = {"model": model, "prompt": prompt, "stream": False}
75
- try:
76
- r = requests.post(API_URL, json=payload, timeout=120)
77
- r.raise_for_status()
78
- data = r.json()
79
- return {
80
- "response": data.get("response", ""),
81
- "model": data.get("model", model),
82
- "done": data.get("done", True),
83
- }
84
- except requests.ConnectionError:
85
- return JSONResponse(
86
- status_code=503,
87
- content={"error": "Ollama is not ready. Wait for the Space to finish loading."},
88
- )
89
- except requests.HTTPError as e:
90
- return JSONResponse(
91
- status_code=e.response.status_code,
92
- content={"error": e.response.text or str(e)},
93
- )
94
-
95
-
96
- @app.get("/api/status")
97
- def api_status():
98
- """Check if Ollama is up and list models."""
99
- try:
100
- r = requests.get(f"{OLLAMA_HOST}/api/tags", timeout=5)
101
- r.raise_for_status()
102
- data = r.json()
103
- models = [m.get("name", "") for m in data.get("models", [])]
104
- return {"status": "ok", "ollama": "up", "models": models}
105
- except Exception as e:
106
- return {"status": "error", "ollama": "down", "models": [], "detail": str(e)}
107
-
108
-
109
- # Mount Gradio at root (must be last so /api/* are matched first)
110
- app = gr.mount_gradio_app(app, demo, path="/")
 
1
+ """CyberWatch Streamlit web UI — Viona chat and security tooling entry."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ from __future__ import annotations
4
 
5
+ import logging
6
+ import os
 
 
 
 
 
 
7
 
8
+ import streamlit as st
9
+ from dotenv import load_dotenv
 
10
 
11
+ from ai.llm import inference_backend_label, viona_complete
12
 
13
+ load_dotenv()
14
+ logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
 
15
 
16
+ APP_NAME = os.getenv("APP_NAME", "CyberWatch")
17
 
18
+ THEME_CSS = """
19
+ @import url('https://fonts.googleapis.com/css2?family=Exo+2:wght@400;600&family=Share+Tech+Mono&display=swap');
20
+ html, body, [class*="css"] { font-family: 'Exo 2', sans-serif; }
21
+ h1, h2, h3 { font-family: 'Share Tech Mono', monospace; color: #4fc3f7 !important; }
22
+ section.main > div { background-color: #0a0e1a; color: #e8eaf0; }
23
+ .metric-container { background: #121826; border: 1px solid #1e2740; border-radius: 8px; padding: 0.5rem; }
24
+ """
25
 
26
 
27
+ def main() -> None:
28
+ st.set_page_config(page_title=f"{APP_NAME} | Viona", layout="wide")
29
+ st.markdown(f"<style>{THEME_CSS}</style>", unsafe_allow_html=True)
 
30
 
31
+ if "disclaimer_ok" not in st.session_state:
32
+ st.session_state.disclaimer_ok = False
33
 
34
+ if not st.session_state.disclaimer_ok:
35
+ st.title(APP_NAME)
36
+ st.error(
37
+ "**Authorized use only.** CyberWatch and Viona are for research and "
38
+ "defensive security on systems and networks you own or have **explicit written "
39
+ "permission** to test. Misuse may be illegal."
40
  )
41
+ if st.button("I understand continue"):
42
+ st.session_state.disclaimer_ok = True
43
+ st.rerun()
44
+ st.stop()
45
+
46
+ st.title(f"{APP_NAME} / Viona")
47
+ st.caption("Cybersecurity research assistant")
48
+
49
+ c1, c2, c3 = st.columns(3)
50
+ with c1:
51
+ st.metric("Assistant", "Viona")
52
+ with c2:
53
+ st.metric("Inference", inference_backend_label().upper())
54
+ with c3:
55
+ st.metric("Log level", os.getenv("LOG_LEVEL", "INFO"))
56
+
57
+ if "messages" not in st.session_state:
58
+ st.session_state.messages = []
59
+
60
+ for m in st.session_state.messages:
61
+ with st.chat_message(m["role"], avatar="🛡️" if m["role"] == "assistant" else None):
62
+ label = "Viona" if m["role"] == "assistant" else "You"
63
+ st.caption(label)
64
+ st.markdown(m["content"])
65
+
66
+ prompt = st.chat_input("Message Viona…")
67
+ if prompt:
68
+ st.session_state.messages.append({"role": "user", "content": prompt})
69
+ reply = viona_complete(prompt)
70
+ st.session_state.messages.append({"role": "assistant", "content": reply})
71
+ st.rerun()
72
+
73
+
74
+ if __name__ == "__main__":
75
+ main()
 
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- fastapi>=0.104.0
2
- uvicorn[standard]>=0.24.0
3
- gradio>=4.0.0
4
- requests>=2.28.0
 
1
+ streamlit>=1.28.0
2
+ python-dotenv>=1.0.0
3
+ requests>=2.31.0
4
+ huggingface_hub>=0.26.0