Spaces:

airsltd
/

model

Sleeping

App Files Files Community

airsltd commited on Dec 30, 2025

Commit

383b979

verified ·

1 Parent(s): 5392a8d

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -117

app.py CHANGED Viewed

@@ -1,19 +1,25 @@
 #!/usr/bin/env python3
 """
-Combined application that automatically downloads the model if needed and starts the FastAPI server.
 """
 import os
 import sys
 from pathlib import Path
-# Check if model exists, if not download it
 def check_and_download_model():
     """Check if model exists in cache, if not download it"""
-    from transformers import AutoTokenizer, AutoModelForCausalLM
-    from huggingface_hub import login
-    # 下一步测试 mlx-community/functiongemma-270m-it-4bit
     # Use TinyLlama - a fully public model
     # model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
     model_name = "unsloth/functiongemma-270m-it"
@@ -65,121 +71,128 @@ def check_and_download_model():
         print("3. Network connection issues")
         sys.exit(1)
-def main():
-    """Main function to start the application"""
-    print("=" * 60)
-    print("FunctionGemma FastAPI Server")
-    print("=" * 60)
-    # Check and download model if needed
-    model_name, cache_dir = check_and_download_model()
-    # Now import and start the FastAPI app
-    print("\nStarting FastAPI server...")
-    from fastapi import FastAPI
-    from transformers import pipeline
-    app = FastAPI(title="FunctionGemma API", version="1.0.0")
-    # Initialize pipeline
     print(f"Initializing pipeline with {model_name}...")
     pipe = pipeline("text-generation", model=model_name)
     print("✓ Pipeline initialized successfully!")
-    @app.get("/")
-    def greet_json():
-        return {
-            "message": "FunctionGemma API is running!",
-            "model": model_name,
-            "status": "ready"
-        }
-    @app.get("/health")
-    def health_check():
-        return {"status": "healthy", "model": model_name}
-    @app.get("/generate")
-    def generate_text(prompt: str = "Who are you?"):
-        """Generate text using the model"""
-        messages = [{"role": "user", "content": prompt}]
-        result = pipe(messages, max_new_tokens=100)
-        return {"response": result[0]["generated_text"]}
-    @app.post("/chat")
-    def chat_completion(messages: list):
-        """Chat completion endpoint"""
-        result = pipe(messages, max_new_tokens=200)
-        return {"response": result[0]["generated_text"]}
-    @app.post("/v1/chat/completions")
-    def openai_chat_completions(request: dict):
-        print('\n\n request')
-        print(request)
-        """
-        OpenAI-compatible chat completions endpoint
-        Expected request format:
-        {
-            "model": "google/gemma-2b-it",
-            "messages": [
-                {"role": "user", "content": "Hello"}
-            ],
-            "max_tokens": 100,
-            "temperature": 0.7
-        }
-        """
-        import time
-        messages = request.get("messages", [])
-        model = request.get("model", model_name)
-        max_tokens = request.get("max_tokens", 100)
-        temperature = request.get("temperature", 0.7)
-        print('\n\n messages')
-        print(messages)
-        print('\n\n model')
-        print(model)
-        print('\n\n max_tokens')
-        print(max_tokens)
-        print('\n\n temperature')
-        print(temperature)
-        # Generate response
-        result = pipe(
-            messages,
-            max_new_tokens=max_tokens,
-            # temperature=temperature
-        )
-        print('asdfasdfasdfasdf')
-        completion_id = f"chatcmpl-{int(time.time())}"
-        created = int(time.time())
-        return {
-            "id": completion_id,
-            "object": "chat.completion",
-            "created": created,
-            "model": model,
-            "choices": [
-                {
-                    "index": 0,
-                    "message": {
-                        "role": "assistant",
-                        "content": result[0]["generated_text"]
-                    },
-                    "finish_reason": "stop"
-                }
-            ],
-            "usage": {
-                "prompt_tokens": 0,  # Would need tokenizer to calculate
-                "completion_tokens": 0,
-                "total_tokens": 0
             }
         }
-    # Run the server
-    import uvicorn
     print("\n" + "=" * 60)
-    print("Server starting at http://localhost:8000")
     print("Available endpoints:")
     print("  GET  /                       - Welcome message")
     print("  GET  /health                 - Health check")
@@ -187,8 +200,3 @@ def main():
     print("  POST /chat                   - Chat completion")
     print("  POST /v1/chat/completions    - OpenAI-compatible endpoint")
     print("=" * 60 + "\n")
-    uvicorn.run(app, host="0.0.0.0", port=7860)
-if __name__ == "__main__":
-    main()

 #!/usr/bin/env python3
 """
+FastAPI application for FunctionGemma with HuggingFace login support.
+This file is designed to be run with: uvicorn app:app --host 0.0.0.0 --port 7860
 """
 import os
 import sys
 from pathlib import Path
+from fastapi import FastAPI
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+from huggingface_hub import login
+# Global variables
+model_name = None
+pipe = None
+app = FastAPI(title="FunctionGemma API", version="1.0.0")
 def check_and_download_model():
     """Check if model exists in cache, if not download it"""
+    global model_name
     # Use TinyLlama - a fully public model
     # model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
     model_name = "unsloth/functiongemma-270m-it"
         print("3. Network connection issues")
         sys.exit(1)
+def initialize_pipeline():
+    """Initialize the pipeline with the model"""
+    global pipe, model_name
+    if model_name is None:
+        model_name, _ = check_and_download_model()
     print(f"Initializing pipeline with {model_name}...")
     pipe = pipeline("text-generation", model=model_name)
     print("✓ Pipeline initialized successfully!")
+# API Endpoints
+@app.get("/")
+def greet_json():
+    return {
+        "message": "FunctionGemma API is running!",
+        "model": model_name,
+        "status": "ready"
+    }
+@app.get("/health")
+def health_check():
+    return {"status": "healthy", "model": model_name}
+@app.get("/generate")
+def generate_text(prompt: str = "Who are you?"):
+    """Generate text using the model"""
+    if pipe is None:
+        initialize_pipeline()
+    messages = [{"role": "user", "content": prompt}]
+    result = pipe(messages, max_new_tokens=100)
+    return {"response": result[0]["generated_text"]}
+@app.post("/chat")
+def chat_completion(messages: list):
+    """Chat completion endpoint"""
+    if pipe is None:
+        initialize_pipeline()
+    result = pipe(messages, max_new_tokens=200)
+    return {"response": result[0]["generated_text"]}
+@app.post("/v1/chat/completions")
+def openai_chat_completions(request: dict):
+    """
+    OpenAI-compatible chat completions endpoint
+    Expected request format:
+    {
+        "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        "messages": [
+            {"role": "user", "content": "Hello"}
+        ],
+        "max_tokens": 100,
+        "temperature": 0.7
+    }
+    """
+    if pipe is None:
+        initialize_pipeline()
+    import time
+    messages = request.get("messages", [])
+    model = request.get("model", model_name)
+    max_tokens = request.get("max_tokens", 100)
+    temperature = request.get("temperature", 0.7)
+    print('\n\n request')
+    print(request)
+    print('\n\n messages')
+    print(messages)
+    print('\n\n model')
+    print(model)
+    print('\n\n max_tokens')
+    print(max_tokens)
+    print('\n\n temperature')
+    print(temperature)
+    # Generate response
+    result = pipe(
+        messages,
+        max_new_tokens=max_tokens,
+        # temperature=temperature
+    )
+    print('asdfasdfasdfasdf')
+    completion_id = f"chatcmpl-{int(time.time())}"
+    created = int(time.time())
+    return {
+        "id": completion_id,
+        "object": "chat.completion",
+        "created": created,
+        "model": model,
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": result[0]["generated_text"]
+                },
+                "finish_reason": "stop"
             }
+        ],
+        "usage": {
+            "prompt_tokens": 0,  # Would need tokenizer to calculate
+            "completion_tokens": 0,
+            "total_tokens": 0
         }
+    }
+# Initialize model on startup
+@app.on_event("startup")
+async def startup_event():
+    """Initialize the model when the app starts"""
+    print("=" * 60)
+    print("FunctionGemma FastAPI Server")
+    print("=" * 60)
+    print("Initializing model...")
+    initialize_pipeline()
     print("\n" + "=" * 60)
+    print("Server ready at http://0.0.0.0:7860")
     print("Available endpoints:")
     print("  GET  /                       - Welcome message")
     print("  GET  /health                 - Health check")
     print("  POST /chat                   - Chat completion")
     print("  POST /v1/chat/completions    - OpenAI-compatible endpoint")
     print("=" * 60 + "\n")