Spaces:

chmielvu
/

prompt-generator

Sleeping

App Files Files Community

chmielvu commited on Mar 25

Commit

2285363

verified ·

1 Parent(s): 980ab55

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

Dockerfile +20 -0
README.md +42 -4
app.py +252 -0
requirements.txt +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+FROM python:3.10-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libopenblas-dev \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+RUN pip install --no-cache-dir \
+    https://huggingface.co/Luigi/llama-cpp-python-wheels-hf-spaces-free-cpu/resolve/main/llama_cpp_python-0.3.22-cp310-cp310-linux_x86_64.whl
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY app.py .
+EXPOSE 7860
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,48 @@
 ---
 title: Prompt Generator
-emoji: 💻
-colorFrom: indigo
-colorTo: pink
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Prompt Generator
+emoji: ✨
+colorFrom: yellow
+colorTo: green
 sdk: docker
 pinned: false
+license: other
+preload_from_hub:
+- mradermacher/Promt-generator-GGUF
 ---
+# Prompt Generator (Q4_K_M)
+A 600M parameter Bloom-based model trained for creative prompt generation. Give it a short concept and it will generate detailed, creative prompts for image generation or creative writing.
+## Features
+- **Creative Prompt Generation**: Expand short ideas into detailed prompts
+- **Image Prompt Creator**: Generate prompts for AI image generators
+- **Completion Model**: Continues your text rather than responding
+- **Lightweight**: Only 600M parameters, runs on CPU
+## Model Details
+- **Base**: UnfilteredAI/Promt-generator
+- **Architecture**: Bloom
+- **GGUF by**: mradermacher/Promt-generator-GGUF
+- **Quantization**: Q4_K_M (561 MB)
+- **Type**: Base completion model (not instruct-tuned)
+## API Endpoint
+- `POST /v1/completions` - Text completions (OpenAI-style, supports streaming)
+## Usage
+```bash
+curl -X POST "https://YOUR_SPACE.hf.space/v1/completions" \
+  -H "Content-Type: application/json" \
+  -d '{"prompt": "a mysterious castle on", "max_tokens": 100}'
+```
+## Tech Stack
+- llama.cpp via JamePeng fork (Luigi wheel v0.3.22)
+- Model: Promt-generator (Q4_K_M)
+- Completion API (not chat - this is a base model)

app.py ADDED Viewed

	@@ -0,0 +1,252 @@

+import json
+import threading
+import time
+import uuid
+from functools import lru_cache
+from typing import Any, Dict, Iterable
+import gradio as gr
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse, StreamingResponse
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+# Model configuration - hardcoded
+MODEL_REPO_ID = "mradermacher/Promt-generator-GGUF"
+MODEL_FILE = "Promt-generator.Q4_K_M.gguf"
+# No chat format - this is a base completion model, not instruct-tuned
+# llama.cpp settings optimized for HF Spaces free tier
+N_CTX = 2048
+N_THREADS = 2
+N_BATCH = 512
+USE_MMAP = True
+LOCK = threading.Lock()
+api = FastAPI()
+def _now() -> int:
+    return int(time.time())
+def _openai_id(prefix: str) -> str:
+    return f"{prefix}-{uuid.uuid4().hex[:24]}"
+def _sse(obj: Any) -> str:
+    return f"data: {json.dumps(obj, ensure_ascii=True)}\n\n"
+def _sse_done() -> str:
+    return "data: [DONE]\n\n"
+@lru_cache(maxsize=1)
+def _get_llm_and_path() -> Dict[str, Any]:
+    model_path = hf_hub_download(
+        repo_id=MODEL_REPO_ID, filename=MODEL_FILE, repo_type="model"
+    )
+    llm = Llama(
+        model_path=model_path,
+        n_ctx=N_CTX,
+        n_threads=N_THREADS,
+        n_batch=N_BATCH,
+        n_gpu_layers=0,
+        verbose=False,
+        use_mmap=USE_MMAP,
+    )
+    return {"llm": llm, "model_path": model_path}
+@api.get("/health")
+def health() -> Dict[str, Any]:
+    loaded = _get_llm_and_path.cache_info().currsize > 0
+    return {
+        "status": "ok",
+        "backend": "llama.cpp",
+        "loaded": loaded,
+        "model_repo_id": MODEL_REPO_ID,
+        "model_file": MODEL_FILE,
+        "chat_format": None,
+    }
+@api.get("/ready")
+def ready() -> Dict[str, Any]:
+    m = _get_llm_and_path()
+    llm: Llama = m["llm"]
+    with LOCK:
+        llm("OK", max_tokens=1, temperature=0.0)
+    return {"status": "ok", "loaded": True}
+@api.get("/v1/models")
+def v1_models() -> Dict[str, Any]:
+    model_name = f"{MODEL_REPO_ID}/{MODEL_FILE}"
+    return {"object": "list", "data": [{"id": model_name, "object": "model"}]}
+@api.post("/v1/completions")
+async def completions(req: Request):
+    """OpenAI-style completions endpoint for this base model."""
+    payload = await req.json()
+    prompt = payload.get("prompt") or ""
+    stream = bool(payload.get("stream") or False)
+    max_tokens = int(payload.get("max_tokens") or 128)
+    temperature = float(payload.get("temperature") or 0.7)
+    top_p = float(payload.get("top_p") or 0.95)
+    if not prompt:
+        return JSONResponse(
+            status_code=400,
+            content={"error": {"message": "prompt must be non-empty"}},
+        )
+    m = _get_llm_and_path()
+    llm: Llama = m["llm"]
+    created = _now()
+    resp_id = _openai_id("cmpl")
+    model_name = f"{MODEL_REPO_ID}/{MODEL_FILE}"
+    if not stream:
+        with LOCK:
+            out = llm(
+                prompt=prompt,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                stream=False,
+            )
+        return {
+            "id": resp_id,
+            "object": "text_completion",
+            "created": created,
+            "model": model_name,
+            "choices": [
+                {
+                    "text": out["choices"][0]["text"],
+                    "index": 0,
+                    "finish_reason": out["choices"][0].get("finish_reason", "stop"),
+                }
+            ],
+        }
+    def gen() -> Iterable[str]:
+        with LOCK:
+            it = llm(
+                prompt=prompt,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                stream=True,
+            )
+            for chunk in it:
+                yield _sse({
+                    "id": resp_id,
+                    "object": "text_completion",
+                    "created": created,
+                    "model": model_name,
+                    "choices": [
+                        {
+                            "text": chunk["choices"][0].get("text", ""),
+                            "index": 0,
+                            "finish_reason": chunk["choices"][0].get("finish_reason"),
+                        }
+                    ],
+                })
+            yield _sse_done()
+    return StreamingResponse(gen(), media_type="text/event-stream")
+def _ui_generate(
+    prompt: str,
+    max_tokens: int,
+    temperature: float,
+    top_p: float,
+) -> str:
+    """Generate text completion for the UI."""
+    m = _get_llm_and_path()
+    llm: Llama = m["llm"]
+    with LOCK:
+        out = llm(
+            prompt=prompt,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            stream=False,
+        )
+    return out["choices"][0]["text"]
+DESCRIPTION = """
+### Prompt Generator (Q4_K_M, CPU)
+A 600M parameter Bloom-based model trained for creative prompt generation. Give it a short concept or idea, and it will generate detailed, creative prompts for image generation or other creative tasks.
+**Note:** This is a **completion model** (not chat), so it continues your text rather than responding to it.
+**API Endpoint:**
+- `POST /v1/completions` - Text completions (supports streaming)
+**Best for:** Generating creative prompts, expanding ideas, image prompt creation
+"""
+with gr.Blocks(title="Prompt Generator", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(
+                label="Start your prompt",
+                placeholder="a beautiful sunset over...",
+                lines=3,
+                info="Enter a short concept or beginning of a prompt",
+            )
+            with gr.Row():
+                max_tokens = gr.Slider(
+                    minimum=32, maximum=512, value=128, step=32, label="Max tokens"
+                )
+                temperature = gr.Slider(
+                    minimum=0.1, maximum=2.0, value=0.9, step=0.1, label="Temperature"
+                )
+            top_p = gr.Slider(
+                minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"
+            )
+            generate_btn = gr.Button("Generate", variant="primary")
+        with gr.Column():
+            output_text = gr.Textbox(
+                label="Generated prompt",
+                lines=8,
+                interactive=False,
+            )
+    examples = gr.Examples(
+        examples=[
+            ["a mysterious forest with"],
+            ["a futuristic city at night"],
+            ["an enchanted garden filled with"],
+            ["a steampunk airship flying over"],
+            ["a cozy coffee shop on a rainy day"],
+        ],
+        inputs=input_text,
+        label="Examples (click to try)",
+    )
+    generate_btn.click(
+        fn=_ui_generate,
+        inputs=[input_text, max_tokens, temperature, top_p],
+        outputs=output_text,
+    )
+app = gr.mount_gradio_app(api, demo, path="/")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=4.0.0
+fastapi>=0.115.0
+uvicorn[standard]>=0.30.0
+huggingface_hub>=0.26.0
+numpy