Spaces:

airsltd
/

model

Sleeping

App Files Files Community

airsltd commited on Dec 31, 2025

Commit

05e9938

verified ·

1 Parent(s): 8eae1e8

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -6

app.py CHANGED Viewed

@@ -2,6 +2,7 @@
 """
 FastAPI application for FunctionGemma with HuggingFace login support.
 This file is designed to be run with: uvicorn app:app --host 0.0.0.0 --port 7860
 """
 import os
@@ -14,11 +15,12 @@ from huggingface_hub import login
 # Global variables
 model_name = None
 pipe = None
 app = FastAPI(title="FunctionGemma API", version="1.0.0")
 def check_and_download_model():
     """Check if model exists in cache, if not download it"""
-    global model_name
     # Use TinyLlama - a fully public model
     # model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
@@ -32,6 +34,7 @@ def check_and_download_model():
     if snapshot_path.exists() and any(snapshot_path.iterdir()):
         print(f"✓ Model {model_name} already exists in cache")
         return model_name, cache_dir
     print(f"✗ Model {model_name} not found in cache")
@@ -74,13 +77,16 @@ def check_and_download_model():
 def initialize_pipeline():
     """Initialize the pipeline with the model"""
-    global pipe, model_name
     if model_name is None:
         model_name, _ = check_and_download_model()
     print(f"Initializing pipeline with {model_name}...")
-    pipe = pipeline("text-generation", model=model_name)
     print("✓ Pipeline initialized successfully!")
 # API Endpoints
@@ -103,7 +109,7 @@ def generate_text(prompt: str = "Who are you?"):
         initialize_pipeline()
     messages = [{"role": "user", "content": prompt}]
-    result = pipe(messages, max_new_tokens=100)
     return {"response": result[0]["generated_text"]}
 @app.post("/chat")
@@ -136,7 +142,7 @@ def openai_chat_completions(request: dict):
     messages = request.get("messages", [])
     model = request.get("model", model_name)
-    max_tokens = request.get("max_tokens", 1000)
     temperature = request.get("temperature", 0.7)
     print('\n\n request')
@@ -188,11 +194,28 @@ def openai_chat_completions(request: dict):
             }
         ],
         "usage": {
-            "prompt_tokens": 0,  # Would need tokenizer to calculate
             "completion_tokens": 0,
             "total_tokens": 0
         }
     }
     print('\n\n return_json')
     print(return_json)
     print('return over! \n\n')

 """
 FastAPI application for FunctionGemma with HuggingFace login support.
 This file is designed to be run with: uvicorn app:app --host 0.0.0.0 --port 7860
+修复：增加token计算
 """
 import os
 # Global variables
 model_name = None
 pipe = None
+tokenizer = None # Add global tokenizer
 app = FastAPI(title="FunctionGemma API", version="1.0.0")
 def check_and_download_model():
     """Check if model exists in cache, if not download it"""
+    global model_name, tokenizer # Include tokenizer in global
     # Use TinyLlama - a fully public model
     # model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
     if snapshot_path.exists() and any(snapshot_path.iterdir()):
         print(f"✓ Model {model_name} already exists in cache")
+        tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) # Load tokenizer if model exists
         return model_name, cache_dir
     print(f"✗ Model {model_name} not found in cache")
 def initialize_pipeline():
     """Initialize the pipeline with the model"""
+    global pipe, model_name, tokenizer # Include tokenizer in global
     if model_name is None:
         model_name, _ = check_and_download_model()
+    if tokenizer is None: # Ensure tokenizer is loaded
+        tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir="./my_model_cache")
     print(f"Initializing pipeline with {model_name}...")
+    pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer) # Pass tokenizer to pipeline
     print("✓ Pipeline initialized successfully!")
 # API Endpoints
         initialize_pipeline()
     messages = [{"role": "user", "content": prompt}]
+    result = pipe(messages, max_new_tokens=1000)
     return {"response": result[0]["generated_text"]}
 @app.post("/chat")
     messages = request.get("messages", [])
     model = request.get("model", model_name)
+    max_tokens = request.get("max_tokens", 100)
     temperature = request.get("temperature", 0.7)
     print('\n\n request')
             }
         ],
         "usage": {
+            "prompt_tokens": 0,
             "completion_tokens": 0,
             "total_tokens": 0
         }
     }
+    # Calculate prompt tokens
+    if tokenizer:
+        prompt_text = ""
+        for message in messages:
+            prompt_text += message.get("content", "") + " "
+        prompt_tokens = len(tokenizer.encode(prompt_text.strip()))
+        return_json["usage"]["prompt_tokens"] = prompt_tokens
+    # Calculate completion tokens
+    if tokenizer and result["generations"]:
+        completion_text = result["generations"][0][0]["text"]
+        completion_tokens = len(tokenizer.encode(completion_text))
+        return_json["usage"]["completion_tokens"] = completion_tokens
+    return_json["usage"]["total_tokens"] = return_json["usage"]["prompt_tokens"] + return_json["usage"]["completion_tokens"]
     print('\n\n return_json')
     print(return_json)
     print('return over! \n\n')