Fresh deploy with binaries and models via LFS
Browse files- Dockerfile +45 -0
- README.md +63 -0
- binaries/bin/llama-cli +3 -0
- binaries/bin/llama-embedding +3 -0
- binaries/lib/libggml-base.so +1 -0
- binaries/lib/libggml-base.so.0 +1 -0
- binaries/lib/libggml-base.so.0.9.4 +3 -0
- binaries/lib/libggml-cpu.so +1 -0
- binaries/lib/libggml-cpu.so.0 +1 -0
- binaries/lib/libggml-cpu.so.0.9.4 +3 -0
- binaries/lib/libggml.so +1 -0
- binaries/lib/libggml.so.0 +1 -0
- binaries/lib/libggml.so.0.9.4 +3 -0
- binaries/lib/libllama.so +1 -0
- binaries/lib/libllama.so.0 +1 -0
- binaries/lib/libllama.so.0.0.7584 +3 -0
- binaries/lib/libmtmd.so +1 -0
- binaries/lib/libmtmd.so.0 +1 -0
- binaries/lib/libmtmd.so.0.0.7584 +3 -0
- models/qwen3-0.6b-q4_k_m.gguf +3 -0
- models/qwen3-embedding-0.6b-q4_k_m.gguf +3 -0
Dockerfile
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM php:8.2-cli
|
| 2 |
+
|
| 3 |
+
# 1. Instalar dependencias del sistema y extensiones PHP
|
| 4 |
+
# libgomp1 es vital para OpenMP (multithreading de llama.cpp)
|
| 5 |
+
RUN apt-get update && apt-get install -y \
|
| 6 |
+
libcurl4 \
|
| 7 |
+
libgomp1 \
|
| 8 |
+
libzip-dev \
|
| 9 |
+
unzip \
|
| 10 |
+
git \
|
| 11 |
+
&& docker-php-ext-install zip \
|
| 12 |
+
&& apt-get clean \
|
| 13 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 14 |
+
|
| 15 |
+
# 2. Instalar Composer
|
| 16 |
+
COPY --from=composer:latest /usr/bin/composer /usr/bin/composer
|
| 17 |
+
|
| 18 |
+
# 3. Copiar binarios pre-compilados y librerías
|
| 19 |
+
COPY binaries/bin/* /usr/local/bin/
|
| 20 |
+
COPY binaries/lib/* /usr/local/lib/
|
| 21 |
+
# Actualizar el enlazador dinámico para que encuentre las libs en /usr/local/lib
|
| 22 |
+
RUN ldconfig
|
| 23 |
+
|
| 24 |
+
# 4. Configurar usuario no-root (Requerido por HF Spaces)
|
| 25 |
+
RUN useradd -m -u 1000 user
|
| 26 |
+
|
| 27 |
+
# 5. Preparar directorio de la aplicación
|
| 28 |
+
WORKDIR /app
|
| 29 |
+
RUN chown user:user /app
|
| 30 |
+
|
| 31 |
+
# Cambiar a usuario
|
| 32 |
+
USER user
|
| 33 |
+
ENV HOME=/home/user \
|
| 34 |
+
PATH=/home/user/.local/bin:$PATH
|
| 35 |
+
|
| 36 |
+
# 6. Instalar la aplicación llama-php
|
| 37 |
+
RUN git clone https://github.com/enacimie/llama-php . \
|
| 38 |
+
&& composer install --no-dev --optimize-autoloader
|
| 39 |
+
|
| 40 |
+
# 7. Copiar modelos locales (con permisos de usuario)
|
| 41 |
+
COPY --chown=user:user models/ /app/models/
|
| 42 |
+
|
| 43 |
+
# 8. Exponer puerto y ejecutar
|
| 44 |
+
EXPOSE 7860
|
| 45 |
+
CMD ["php", "-S", "0.0.0.0:7860", "-t", "web/"]
|
README.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Llama Php Demo
|
| 3 |
+
emoji: 🏆
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
short_description: Llama-php Demo
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# 🏆 Llama PHP Demo
|
| 12 |
+
|
| 13 |
+

|
| 14 |
+

|
| 15 |
+
|
| 16 |
+
This Hugging Face Space demonstrates **[llama.php](https://github.com/enacimie/llama-php)**, a robust PHP wrapper for executing local Large Language Models using `llama.cpp` as the inference engine.
|
| 17 |
+
|
| 18 |
+
## 🌟 About llama.php
|
| 19 |
+
|
| 20 |
+
llama.php is a modular, and productive PHP wrapper that lets you run Large Language Models completely offline. With a clean API similar to OpenAI or Hugging Face but 100% self-contained, it brings the power of LLMs to PHP applications without external dependencies.
|
| 21 |
+
|
| 22 |
+
## ✨ Features Demonstrated
|
| 23 |
+
|
| 24 |
+
- **Local Inference**: Runs completely offline using CPU
|
| 25 |
+
- **GGUF Support**: Works with quantized models (Q4_K_M, Q5_K_S, etc.)
|
| 26 |
+
- **Chat Templates**: Includes templates for Qwen, Llama 3, Mistral, and more
|
| 27 |
+
- **Text Generation**: Generate responses to prompts
|
| 28 |
+
- **Embeddings**: Create vector embeddings from text
|
| 29 |
+
- **JSON Output**: Force structured JSON output with schema validation
|
| 30 |
+
- **Secure Execution**: Proper shell argument escaping to prevent injection
|
| 31 |
+
|
| 32 |
+
## 🚀 How to Use This Demo
|
| 33 |
+
|
| 34 |
+
1. **Text Generation**: Enter a prompt in the text box and click "Generate"
|
| 35 |
+
2. **Chat Mode**: Start a conversation with the model in chat interface
|
| 36 |
+
3. **Embedding Demo**: Convert text to vector embeddings
|
| 37 |
+
4. **JSON Mode**: Generate structured JSON output based on a schema
|
| 38 |
+
|
| 39 |
+
Adjust parameters like temperature, max tokens, and top-p to control the generation behavior.
|
| 40 |
+
|
| 41 |
+
## ⚙️ Technical Details
|
| 42 |
+
|
| 43 |
+
- **PHP Version**: 8.2
|
| 44 |
+
- **Inference Engine**: llama.cpp
|
| 45 |
+
- **Model**: Qwen3-0.6B-Q4_K_M (quantized for efficient CPU inference)
|
| 46 |
+
- **Embedding Model**: Qwen3-Embedding-0.6B-Q4_K_M
|
| 47 |
+
- **Docker Base**: Custom image with PHP 8.2 and llama.cpp binaries
|
| 48 |
+
|
| 49 |
+
## 🤝 Credits
|
| 50 |
+
|
| 51 |
+
This demo is powered by **[llama.php](https://github.com/enacimie/llama-php)** created by Eduardo Nacimiento-García.
|
| 52 |
+
|
| 53 |
+
- **GitHub Repository**: https://github.com/enacimie/llama-php
|
| 54 |
+
- **Original Models**: Qwen3 series from Alibaba
|
| 55 |
+
- **Inference Backend**: https://github.com/ggerganov/llama.cpp
|
| 56 |
+
|
| 57 |
+
## 📜 License
|
| 58 |
+
|
| 59 |
+
This demo and the underlying llama.php library are released under the MIT License.
|
| 60 |
+
|
| 61 |
+
---
|
| 62 |
+
|
| 63 |
+
*Note: Due to resource limitations on Hugging Face Spaces, generation might be slower than on dedicated hardware. The model runs entirely on CPU with limited context window size.*
|
binaries/bin/llama-cli
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41ffbbe9d3e860c632ab58eb69d4b9c5ea7060724bc682a74227ef02129f2916
|
| 3 |
+
size 4352904
|
binaries/bin/llama-embedding
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a99c70b3d62004c2997e33386d8e956e622cc1bb150132017682ab0717b4fca
|
| 3 |
+
size 3291224
|
binaries/lib/libggml-base.so
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
libggml-base.so.0
|
binaries/lib/libggml-base.so.0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
libggml-base.so.0.9.4
|
binaries/lib/libggml-base.so.0.9.4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:065a3616ad3be7ebaa4156454163cbf12fa6e6e98c5c04a3a5a479253e7ca4a5
|
| 3 |
+
size 694632
|
binaries/lib/libggml-cpu.so
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
libggml-cpu.so.0
|
binaries/lib/libggml-cpu.so.0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
libggml-cpu.so.0.9.4
|
binaries/lib/libggml-cpu.so.0.9.4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99c8e44f06dfd329bba146a0bbbda8ec77fcbe95191f6c5ad7a203d75e7984de
|
| 3 |
+
size 963592
|
binaries/lib/libggml.so
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
libggml.so.0
|
binaries/lib/libggml.so.0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
libggml.so.0.9.4
|
binaries/lib/libggml.so.0.9.4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e5ebea3cd1243ab6a494c39285c52e3dafe1712aef9d2677d26ab440d084e16
|
| 3 |
+
size 55712
|
binaries/lib/libllama.so
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
libllama.so.0
|
binaries/lib/libllama.so.0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
libllama.so.0.0.7584
|
binaries/lib/libllama.so.0.0.7584
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efda5218094af8b4680aa20b6555b284755f414fd2e75e98731a0e25632317f0
|
| 3 |
+
size 2872232
|
binaries/lib/libmtmd.so
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
libmtmd.so.0
|
binaries/lib/libmtmd.so.0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
libmtmd.so.0.0.7584
|
binaries/lib/libmtmd.so.0.0.7584
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5c4eaece54058b8c5bb1f8371d51e97017c5c0042aec57b1a5dece72022603b
|
| 3 |
+
size 877448
|
models/qwen3-0.6b-q4_k_m.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2dc0ee44eb39790624623cf5e2a8cc21973c4839a67fed406dd3f9b2e6b7f800
|
| 3 |
+
size 484220160
|
models/qwen3-embedding-0.6b-q4_k_m.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17c3e3f2eaabc6e321702b4a13680d042e72afc5d602f359f27a670c3e54718c
|
| 3 |
+
size 396474560
|