gen

Sleeping

premalt commited on Sep 29, 2024

Commit

97bf850

1 Parent(s): 5867919

max new tokens now positive

Files changed (1) hide show

main.py CHANGED Viewed

@@ -13,6 +13,7 @@ client = InferenceClient("openai-community/gpt2")
 SYSTEM_PROMPT = "You are a very powerful AI to generate interesting stories for short-form content consumption. Make sure to hook the readers attention in the first few seconds. Make sure to be engaging and creative in your responses."
 MAX_TOTAL_TOKENS = 1024
 class Item(BaseModel):
     prompt: str
@@ -37,9 +38,18 @@ def generate(item: Item):
     formatted_prompt = format_prompt(f"{SYSTEM_PROMPT}, {item.prompt}", item.history)
-    # Use the text_generation method to get the number of input tokens
-    input_tokens = client.text_generation(formatted_prompt, max_new_tokens=0).details.input_tokens
-    max_new_tokens = min(item.max_new_tokens, MAX_TOTAL_TOKENS - input_tokens)
     stream = client.text_generation(
         formatted_prompt,

 SYSTEM_PROMPT = "You are a very powerful AI to generate interesting stories for short-form content consumption. Make sure to hook the readers attention in the first few seconds. Make sure to be engaging and creative in your responses."
 MAX_TOTAL_TOKENS = 1024
+TOKEN_COUNTING_TOKENS = 1  # Use a small number of tokens for counting
 class Item(BaseModel):
     prompt: str
     formatted_prompt = format_prompt(f"{SYSTEM_PROMPT}, {item.prompt}", item.history)
+    # Count input tokens by generating a small number of tokens
+    token_count_response = client.text_generation(
+        formatted_prompt,
+        max_new_tokens=TOKEN_COUNTING_TOKENS,
+        details=True,
+        return_full_text=False
+    )
+    input_tokens = token_count_response.details.input_tokens
+    # Calculate available tokens for generation
+    available_tokens = MAX_TOTAL_TOKENS - input_tokens - TOKEN_COUNTING_TOKENS
+    max_new_tokens = min(item.max_new_tokens, available_tokens)
     stream = client.text_generation(
         formatted_prompt,