Spaces:

Loomisgitarrist
/

personal-coder-ai

Sleeping

@@ -1,45 +1,16 @@
-# Use standard Python 3.10 slim image (Lightweight & Compatible)
 FROM python:3.10-slim
-WORKDIR /app
-# Install system libraries required for the CPU runner and building wheels
-# build-essential & cmake: required if fallback to source build occurs
-# libopenblas-dev: for optimized matrix operations
-# libgomp1: for OpenMP
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    cmake \
-    libopenblas-dev \
-    libgomp1 \
-    curl \
-    && rm -rf /var/lib/apt/lists/*
-# Upgrade pip to ensure it handles wheels correctly
-RUN pip install --upgrade pip --default-timeout=1000
-# Install Python dependencies (FastAPI, Uvicorn, etc.)
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt --default-timeout=1000
-# -----------------------------------------------------------------------------
-# INSTALL PRE-COMPILED LLAMA-CPP-PYTHON
-# -----------------------------------------------------------------------------
-# We install from the 'cpu' specific index.
-# We added build-essential and cmake above so that if a wheel isn't found,
-# it can successfully build from source without erroring out.
-RUN pip install llama-cpp-python \
-    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
-    --default-timeout=1000
-# Copy application code
-COPY . .
-# Create model cache directory
-RUN mkdir -p /app/model_cache && chmod 777 /app/model_cache
-# Expose port
-EXPOSE 7860
-# Start the application
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.10-slim
+RUN useradd user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user ./ $HOME/app
+RUN pip install -r requirements.txt
+CMD fastapi run --reload --host=0.0.0.0 --port=7860

README.md CHANGED Viewed

@@ -1,21 +1,31 @@
 ---
-title: Personal Coder AI
-emoji: 👨‍💻
-colorFrom: indigo
-colorTo: blue
 sdk: docker
 pinned: false
-license: apache-2.0
-short_description: Qwen 2.5 Coder 7B (GGUF/CPU Version)
 ---
-# Personal Coder AI (CPU Version)
-This Space runs **Qwen 2.5 Coder 7B (GGUF)** on standard CPU.
-*   **No Quotas:** Unlimited usage.
-*   **Speed:** Slower than GPU, but reliable.
-## API Usage
-Endpoint: `POST /chat`
-JSON: `{"prompt": "Write a python script..."}`

 ---
+title: Google Flan Fastapi
+emoji: 👁
+colorFrom: green
+colorTo: gray
 sdk: docker
 pinned: false
+license: mit
 ---
+# Huggingface Spaces for Docker with FastAPI
+## Overview
+This repository contains a simple example of how to deploy a Huggingface model using Docker and FastAPI. The model used is the `google-flan-t5-base` model from the Huggingface model hub.
+## Usage
+To run the FastAPI server, you can use the following command:
+```bash
+docker compose up --build
+```
+Then visit `http://localhost:7860/docs` to see the API documentation.
+## Deployment to Hugging Face Spaces
+1. Create a new Space on Hugging Face (SDK: Docker).
+2. Push these files to the Space's repository.
+3. The Dockerfile will automatically build and serve the app on port 7860.
+## Useful Links
+- [Google Flan T5 Base Model](https://huggingface.co/google/flan-t5-base)
+- [Files](https://huggingface.co/spaces/sarthaksavvy/google-flan-fastapi/tree/main)]

__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (1.01 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from fastapi import FastAPI
+# Use a pipeline as a high-level helper
+from transformers import pipeline
+pipe = pipeline("text2text-generation", model="google/flan-t5-base")
+app = FastAPI()
+@app.get('/')
+def home():
+    return {"message": "Loomyloo Gateway API is running"}
+@app.get('/ask')
+def ask(prompt: str):
+    # This uses the google/flan-t5-base model loaded above
+    result = pipe(prompt)
+    return result[0]

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,11 @@

+version: '3.8'
+services:
+  app:
+    build: .
+    ports:
+      - "7860:7860"
+    volumes:
+      - .:/home/user/app
+    environment:
+      - TRANSFORMERS_CACHE=/home/user/app/cache

main.py DELETED Viewed

@@ -1,78 +0,0 @@
-import os
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
-app = FastAPI()
-# ---------------------------------------------------------
-# CONFIGURATION
-# ---------------------------------------------------------
-# We use Qwen 2.5 Coder 7B (GGUF) - Small & Fast on CPU
-REPO_ID = "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF"
-FILENAME = "qwen2.5-coder-7b-instruct-q4_k_m.gguf"
-MODEL_PATH = f"./model_cache/{FILENAME}"
-# Global model variable
-llm = None
-def load_model():
-    global llm
-    if not os.path.exists(MODEL_PATH):
-        print(f"📥 Downloading {FILENAME} from Hugging Face...")
-        hf_hub_download(
-            repo_id=REPO_ID,
-            filename=FILENAME,
-            local_dir="./model_cache",
-            local_dir_use_symlinks=False
-        )
-        print("✅ Download complete.")
-    print("🚀 Loading Model into RAM...")
-    # n_ctx=8192 allows for decent context window
-    # n_threads=2 is optimized for Hugging Face Free Tier (2 vCPUs)
-    llm = Llama(model_path=MODEL_PATH, n_ctx=8192, n_threads=2)
-    print("✅ Model Loaded!")
-# Load model on startup
-@app.on_event("startup")
-def startup_event():
-    load_model()
-# ---------------------------------------------------------
-# API ENDPOINTS
-# ---------------------------------------------------------
-class ChatRequest(BaseModel):
-    prompt: str
-@app.get("/")
-def read_root():
-    return {"status": "running", "model": REPO_ID}
-@app.post("/chat")
-def chat(request: ChatRequest):
-    global llm
-    if not llm:
-        raise HTTPException(status_code=500, detail="Model not loaded")
-    # Format prompt for Qwen (ChatML style is best, but basic instruct works)
-    # Simple Instruct Format:
-    formatted_prompt = f"<|im_start|>user\n{request.prompt}<|im_end|>\n<|im_start|>assistant\n"
-    print(f"📩 Generating response for: {request.prompt[:50]}...")
-    output = llm(
-        formatted_prompt,
-        max_tokens=1024,
-        stop=["<|im_end|>", "User:"],
-        echo=False
-    )
-    response_text = output['choices'][0]['text']
-    return {"response": response_text.strip()}
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 fastapi
 uvicorn
-huggingface_hub
-pydantic
-# llama-cpp-python is installed manually in the Dockerfile

 fastapi
 uvicorn
+transformers
+torch
+torchvision