Spaces:

airsltd
/

model

Sleeping

App Files Files Community

airsltd commited on Dec 30, 2025

Commit

5392a8d

verified ·

1 Parent(s): 108f42d

Upload 4 files

Browse files

Files changed (4) hide show

.gitignore +55 -0
Dockerfile +16 -0
app.py +194 -0
requirements.txt +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,55 @@

+# Environment variables
+.env
+.env.local
+.env.*.local
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+venv/
+ENV/
+env/
+# Model cache
+my_model_cache/
+*.bin
+*.safetensors
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
+# Logs
+*.log
+logs/
+# Temporary files
+*.tmp
+*.temp

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,194 @@

+#!/usr/bin/env python3
+"""
+Combined application that automatically downloads the model if needed and starts the FastAPI server.
+"""
+import os
+import sys
+from pathlib import Path
+# Check if model exists, if not download it
+def check_and_download_model():
+    """Check if model exists in cache, if not download it"""
+    from transformers import AutoTokenizer, AutoModelForCausalLM
+    from huggingface_hub import login
+    # 下一步测试 mlx-community/functiongemma-270m-it-4bit
+    # Use TinyLlama - a fully public model
+    # model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+    model_name = "unsloth/functiongemma-270m-it"
+    cache_dir = "./my_model_cache"
+    # Check if model already exists in cache
+    model_path = Path(cache_dir) / f"models--{model_name.replace('/', '--')}"
+    snapshot_path = model_path / "snapshots"
+    if snapshot_path.exists() and any(snapshot_path.iterdir()):
+        print(f"✓ Model {model_name} already exists in cache")
+        return model_name, cache_dir
+    print(f"✗ Model {model_name} not found in cache")
+    print("Downloading model...")
+    # Login to Hugging Face (optional, for gated models)
+    token = os.getenv("HUGGINGFACE_TOKEN")
+    if token:
+        try:
+            print("Logging in to Hugging Face...")
+            login(token=token)
+            print("✓ HuggingFace login successful!")
+        except Exception as e:
+            print(f"⚠ Login failed: {e}")
+            print("Continuing without login (public models only)")
+    else:
+        print("ℹ No HUGGINGFACE_TOKEN set - using public models only")
+    try:
+        # Download tokenizer
+        print("Loading tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+        print("✓ Tokenizer loaded successfully!")
+        # Download model
+        print("Loading model...")
+        model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_dir)
+        print("✓ Model loaded successfully!")
+        print(f"✓ Model and tokenizer downloaded successfully to {cache_dir}")
+        return model_name, cache_dir
+    except Exception as e:
+        print(f"✗ Error downloading model: {e}")
+        print("\nPossible reasons:")
+        print("1. Model requires authentication - set HUGGINGFACE_TOKEN in .env")
+        print("2. Model is gated and you don't have access")
+        print("3. Network connection issues")
+        sys.exit(1)
+def main():
+    """Main function to start the application"""
+    print("=" * 60)
+    print("FunctionGemma FastAPI Server")
+    print("=" * 60)
+    # Check and download model if needed
+    model_name, cache_dir = check_and_download_model()
+    # Now import and start the FastAPI app
+    print("\nStarting FastAPI server...")
+    from fastapi import FastAPI
+    from transformers import pipeline
+    app = FastAPI(title="FunctionGemma API", version="1.0.0")
+    # Initialize pipeline
+    print(f"Initializing pipeline with {model_name}...")
+    pipe = pipeline("text-generation", model=model_name)
+    print("✓ Pipeline initialized successfully!")
+    @app.get("/")
+    def greet_json():
+        return {
+            "message": "FunctionGemma API is running!",
+            "model": model_name,
+            "status": "ready"
+        }
+    @app.get("/health")
+    def health_check():
+        return {"status": "healthy", "model": model_name}
+    @app.get("/generate")
+    def generate_text(prompt: str = "Who are you?"):
+        """Generate text using the model"""
+        messages = [{"role": "user", "content": prompt}]
+        result = pipe(messages, max_new_tokens=100)
+        return {"response": result[0]["generated_text"]}
+    @app.post("/chat")
+    def chat_completion(messages: list):
+        """Chat completion endpoint"""
+        result = pipe(messages, max_new_tokens=200)
+        return {"response": result[0]["generated_text"]}
+    @app.post("/v1/chat/completions")
+    def openai_chat_completions(request: dict):
+        print('\n\n request')
+        print(request)
+        """
+        OpenAI-compatible chat completions endpoint
+        Expected request format:
+        {
+            "model": "google/gemma-2b-it",
+            "messages": [
+                {"role": "user", "content": "Hello"}
+            ],
+            "max_tokens": 100,
+            "temperature": 0.7
+        }
+        """
+        import time
+        messages = request.get("messages", [])
+        model = request.get("model", model_name)
+        max_tokens = request.get("max_tokens", 100)
+        temperature = request.get("temperature", 0.7)
+        print('\n\n messages')
+        print(messages)
+        print('\n\n model')
+        print(model)
+        print('\n\n max_tokens')
+        print(max_tokens)
+        print('\n\n temperature')
+        print(temperature)
+        # Generate response
+        result = pipe(
+            messages,
+            max_new_tokens=max_tokens,
+            # temperature=temperature
+        )
+        print('asdfasdfasdfasdf')
+        completion_id = f"chatcmpl-{int(time.time())}"
+        created = int(time.time())
+        return {
+            "id": completion_id,
+            "object": "chat.completion",
+            "created": created,
+            "model": model,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": result[0]["generated_text"]
+                    },
+                    "finish_reason": "stop"
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 0,  # Would need tokenizer to calculate
+                "completion_tokens": 0,
+                "total_tokens": 0
+            }
+        }
+    # Run the server
+    import uvicorn
+    print("\n" + "=" * 60)
+    print("Server starting at http://localhost:8000")
+    print("Available endpoints:")
+    print("  GET  /                       - Welcome message")
+    print("  GET  /health                 - Health check")
+    print("  GET  /generate?prompt=...    - Generate text with prompt")
+    print("  POST /chat                   - Chat completion")
+    print("  POST /v1/chat/completions    - OpenAI-compatible endpoint")
+    print("=" * 60 + "\n")
+    uvicorn.run(app, host="0.0.0.0", port=7860)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi
+uvicorn[standard]
+transformers
+huggingface_hub
+torch
+accelerate