Spaces:

mishrabp
/

ollama

Sleeping

App Files Files Community

mishrabp commited on Jan 30

Commit

1c1c554

verified ·

1 Parent(s): aa73888

Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

.gitignore +208 -0
Dockerfile +37 -0
LICENSE +21 -0
README.md +146 -4
deploy.yml +43 -0
deploy_microk8s.sh +31 -0
testapp/README.md +11 -0
testapp/client.py +61 -0
testapp/requirements.txt +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,208 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+*.tar
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/

Dockerfile ADDED Viewed

	@@ -0,0 +1,37 @@

+FROM ollama/ollama:latest
+# Set environment variables for Hugging Face Spaces
+# Spaces expect the service to listen on port 7860
+ENV OLLAMA_HOST=0.0.0.0:7860
+ENV OLLAMA_ORIGINS="*"
+# Hugging Face Spaces typically run as user 1000.
+# We create a user and set up the home directory and model path.
+ENV HOME=/home/user
+ENV OLLAMA_MODELS=$HOME/.ollama/models
+# The base image may already have a user with UID 1000 (often named 'ollama').
+# We setup the home directory expected by HF Spaces and ensure permissions.
+RUN mkdir -p $HOME/.ollama/models && \
+    chown -R 1000:1000 $HOME
+# Switch to UID 1000 (whether it's 'user' or 'ollama')
+USER 1000
+# Pre-pull the model during the build process
+# Llama 3.2 3B is small enough to fit in the image layer for deployment stability
+RUN /bin/ollama serve & \
+    pid=$! && \
+    sleep 5 && \
+    echo "Pulling llama3.2:3b model..." && \
+    /bin/ollama pull llama3.2:3b && \
+    kill $pid
+# Expose the port expected by HF Spaces
+EXPOSE 7860
+# Start Ollama
+ENTRYPOINT ["/bin/ollama"]
+CMD ["serve"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2026 bm80177
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,10 +1,152 @@
 ---
 title: Ollama
-emoji: 🔥
-colorFrom: indigo
-colorTo: indigo
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Ollama
+emoji: ⚡
+colorFrom: pink
+colorTo: blue
 sdk: docker
 pinned: false
+license: mit
+short_description: Ollama (llama3.2:3b) on Hugging Face Spaces (Docker)
 ---
+# 🦙 Ollama (llama3.2:3b) on Hugging Face Spaces (Docker)
+This Space runs **[Ollama](https://ollama.com/)** with the **`llama3.2:3b`** model using a **Docker-based Hugging Face Space**.
+The container starts Ollama, pulls the model at runtime, and exposes the Ollama HTTP API so you can interact with the model programmatically.
+---
+## 🚀 Hugging Face Space Configuration
+When creating the Space:
+- **SDK**: `Docker`
+- **Template**: `Blank`
+- **Hardware**: `Free CPU` (sufficient for 3B models)
+- **Visibility**: Public or Private (your choice)
+No additional configuration files are required beyond this repository.
+---
+## 🧱 How It Works
+- Ollama runs inside a Docker container
+- The model `llama3.2:3b` is pulled on startup
+- Ollama listens on port **11434**
+- Hugging Face automatically maps the port and exposes the Space URL
+---
+## 📡 API Usage
+Once the Space is running, you can interact with Ollama via HTTP.
+### Check version
+```bash
+curl https://<your-space>.hf.space/api/version
+```
+### Generate text
+```bash
+curl https://<your-space>.hf.space/api/generate \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "llama3.2:3b",
+    "prompt": "Explain Kubernetes like I am five"
+  }'
+```
+---
+## 🔄 Automatic Deployment (GitHub → Hugging Face)
+You can deploy automatically using **GitHub Actions**.
+### Required Secrets (GitHub Repository)
+Go to **Settings → Secrets and variables → Actions**, then add:
+| Secret Name | Description |
+|------------|------------|
+| `HF_USERNAME` | Your Hugging Face username |
+| `HF_TOKEN` | Hugging Face **Write** access token |
+| `SPACE_NAME` | Name of the Hugging Face Space |
+The workflow logs in to:
+```
+registry.hf.space
+```
+and pushes the Docker image, triggering a redeploy of the Space.
+---
+## 🧪 Local Development (Optional)
+You can run the same container locally for testing.
+### Build the image
+```bash
+docker build -t ollama-local:latest .
+```
+If you hit DNS/network issues:
+```bash
+docker build --network=host -t ollama-local:latest .
+```
+### Run locally
+```bash
+docker run -p 11434:11434 ollama-local:latest
+```
+Test:
+```bash
+curl http://localhost:11434/api/version
+```
+---
+## ☸️ Local Kubernetes Deployment (Optional)
+This project can also be deployed to a local Kubernetes cluster (Docker Desktop, Minikube, MicroK8s).
+### Build the image
+```bash
+docker build -t ollama-local:latest .
+```
+### Deploy
+```bash
+kubectl apply -f local.yml
+```
+### Access the service
+- **Docker Desktop**:
+  `http://localhost:30786`
+- **Minikube**:
+  ```bash
+  echo "http://$(minikube ip):30786"
+  ```
+Verify:
+```bash
+curl http://localhost:30786/api/version
+```
+---
+## ⚠️ Notes & Limitations
+- Free CPU Spaces are **slow** for inference (expected for LLMs)
+- Model downloads happen at container startup
+- Hugging Face Spaces may restart containers periodically
+---
+## 📚 References
+- Ollama: https://ollama.com/
+- Hugging Face Spaces (Docker): https://huggingface.co/docs/hub/spaces-sdks-docker

deploy.yml ADDED Viewed

	@@ -0,0 +1,43 @@

+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama-service
+spec:
+  selector:
+    app: ollama
+  ports:
+    - protocol: TCP
+      port: 7860
+      targetPort: 7860
+      nodePort: 30786
+  type: NodePort
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama-deployment
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ollama
+  template:
+    metadata:
+      labels:
+        app: ollama
+    spec:
+      containers:
+      - name: ollama
+        image: docker.io/library/ollama-local:latest
+        # Never forces K8s to use the local image or fail immediately (no network pull)
+        imagePullPolicy: Never
+        ports:
+        - containerPort: 7860
+        resources:
+          requests:
+            memory: "4Gi"
+            cpu: "2"
+          limits:
+            memory: "8Gi"
+            cpu: "4"

deploy_microk8s.sh ADDED Viewed

	@@ -0,0 +1,31 @@

+#!/bin/bash
+set -e
+# Define image name
+IMG_NAME="docker.io/library/ollama-local:latest"
+echo "1. Building image: $IMG_NAME ..."
+docker build --network=host -t $IMG_NAME .
+echo "2. Saving image to ollama-local.tar ..."
+docker save $IMG_NAME > ollama-local.tar
+echo "3. Importing into Microk8s (k8s.io namespace)..."
+# Using 'images import' explicitly
+microk8s ctr --namespace k8s.io images import ollama-local.tar
+echo "---------------------------------------------------"
+echo "VERIFICATION: Checking if image exists in Microk8s:"
+microk8s ctr --namespace k8s.io images list | grep "ollama-local" || echo "❌ Image NOT found in Microk8s registry!"
+echo "---------------------------------------------------"
+echo "4. Redeploying..."
+kubectl apply -f deploy.yml
+# Retrieve the pod name to delete it
+POD_NAME=$(kubectl get pod -l app=ollama -o jsonpath="{.items[0].metadata.name}")
+if [ -n "$POD_NAME" ]; then
+    echo "Deleting old pod: $POD_NAME"
+    kubectl delete pod $POD_NAME
+fi
+echo "Done! Watch the pods with: kubectl get pods -w"

testapp/README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+# Setup
+# Best practice on Ubuntu/WSL: use a virtual environment
+python3 -m venv venv
+source venv/bin/activate
+pip install -r requirements.txt
+# Run with default prompt
+python3 client.py
+# Run with custom prompt (Quote your strings in Zsh!)
+python3 client.py "Write a haiku about Kubernetes"

testapp/client.py ADDED Viewed

	@@ -0,0 +1,61 @@

+#!/usr/bin/env python3
+import requests
+import json
+import sys
+import time
+# Configuration
+# If running outside k8s (e.g. from your WSL terminal), use localhost and the NodePort
+# If running inside k8s, you would use http://ollama-service:7860
+OLLAMA_URL = "http://localhost:30786/api/generate"
+MODEL = "llama3.2:3b"
+def query_ollama(prompt):
+    print(f"🔵 Querying {MODEL} at {OLLAMA_URL}...")
+    print(f"📝 Prompt: {prompt}")
+    print("-" * 50)
+    payload = {
+        "model": MODEL,
+        "prompt": prompt,
+        "stream": True  # Enable streaming for real-time output
+    }
+    try:
+        response = requests.post(OLLAMA_URL, json=payload, stream=True)
+        response.raise_for_status()
+        full_response = ""
+        start_time = time.time()
+        for line in response.iter_lines():
+            if line:
+                decoded_line = line.decode('utf-8')
+                data = json.loads(decoded_line)
+                if "response" in data:
+                    chunk = data["response"]
+                    print(chunk, end='', flush=True)
+                    full_response += chunk
+                if data.get("done", False):
+                    print() # Newline at end
+                    total_duration = data.get("total_duration", 0) / 1e9 # Convert ns to s
+                    print("-" * 50)
+                    print(f"✅ Done in {total_duration:.2f}s")
+    except requests.exceptions.ConnectionError:
+        print(f"\n❌ Could not connect to Ollama at {OLLAMA_URL}")
+        print("   Ensure the Kubernetes service is running and port 30786 is accessible.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        sys.exit(1)
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        prompt = " ".join(sys.argv[1:])
+    else:
+        prompt = "Why is the sky blue? Keep it brief."
+    query_ollama(prompt)

testapp/requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ requests