gen

Sleeping

App Files Files Community

premalt commited on Sep 29, 2024

Commit

4bba0ce

1 Parent(s): 97bf850

fix input tokens

Browse files

Files changed (2) hide show

main.py +15 -30
requirements.txt +2 -1

main.py CHANGED Viewed

@@ -5,56 +5,43 @@ from fastapi import FastAPI
 from pydantic import BaseModel, Field
 from huggingface_hub import InferenceClient
 from typing import List
 app = FastAPI()
-client = InferenceClient("openai-community/gpt2")
 SYSTEM_PROMPT = "You are a very powerful AI to generate interesting stories for short-form content consumption. Make sure to hook the readers attention in the first few seconds. Make sure to be engaging and creative in your responses."
 MAX_TOTAL_TOKENS = 1024
-TOKEN_COUNTING_TOKENS = 1  # Use a small number of tokens for counting
 class Item(BaseModel):
     prompt: str
     history: List[str] = []
-    temperature: float = Field(default=0.0, ge=0.0, le=1.0)
     max_new_tokens: int = Field(default=512, ge=1, le=MAX_TOTAL_TOKENS)
-    top_p: float = Field(default=0.15, ge=0.0, le=1.0)
     repetition_penalty: float = Field(default=1.0, ge=0.0)
 def format_prompt(message, history):
-    prompt = "<s>"
     for user_prompt, bot_response in history:
-        prompt += f"[INST] {user_prompt} [/INST]"
-        prompt += f" {bot_response}</s> "
-    prompt += f"[INST] {message} [/INST]"
     return prompt
 def generate(item: Item):
     temperature = max(float(item.temperature), 1e-2)
-    formatted_prompt = format_prompt(f"{SYSTEM_PROMPT}, {item.prompt}", item.history)
-    # Count input tokens by generating a small number of tokens
-    token_count_response = client.text_generation(
-        formatted_prompt,
-        max_new_tokens=TOKEN_COUNTING_TOKENS,
-        details=True,
-        return_full_text=False
-    )
-    input_tokens = token_count_response.details.input_tokens
-    # Calculate available tokens for generation
-    available_tokens = MAX_TOTAL_TOKENS - input_tokens - TOKEN_COUNTING_TOKENS
-    max_new_tokens = min(item.max_new_tokens, available_tokens)
     stream = client.text_generation(
         formatted_prompt,
-        temperature=temperature,
         max_new_tokens=max_new_tokens,
         top_p=float(item.top_p),
         repetition_penalty=item.repetition_penalty,
         do_sample=True,
@@ -63,20 +50,18 @@ def generate(item: Item):
         details=True,
         return_full_text=False,
     )
-    output = "".join(response.token.text for response in stream)
-    output = re.sub(r"<[^>]+>", "", output)
     output = re.sub(r"\s+", " ", output).strip()
     return output
 @app.get("/generate/")
 async def generate_text(
     prompt: str,
     history: List[str] = [],
-    temperature: float = 0.0,
     max_new_tokens: int = 512,
-    top_p: float = 0.15,
     repetition_penalty: float = 1.0,
 ):
     item = Item(

 from pydantic import BaseModel, Field
 from huggingface_hub import InferenceClient
 from typing import List
+from transformers import GPT2TokenizerFast
 app = FastAPI()
+client = InferenceClient("gpt2")
+tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
 SYSTEM_PROMPT = "You are a very powerful AI to generate interesting stories for short-form content consumption. Make sure to hook the readers attention in the first few seconds. Make sure to be engaging and creative in your responses."
 MAX_TOTAL_TOKENS = 1024
 class Item(BaseModel):
     prompt: str
     history: List[str] = []
+    temperature: float = Field(default=0.7, ge=0.0, le=1.0)
     max_new_tokens: int = Field(default=512, ge=1, le=MAX_TOTAL_TOKENS)
+    top_p: float = Field(default=0.9, ge=0.0, le=1.0)
     repetition_penalty: float = Field(default=1.0, ge=0.0)
 def format_prompt(message, history):
+    prompt = ""
     for user_prompt, bot_response in history:
+        prompt += f"Human: {user_prompt}\nAI: {bot_response}\n"
+    prompt += f"Human: {message}\nAI:"
     return prompt
 def generate(item: Item):
     temperature = max(float(item.temperature), 1e-2)
+    formatted_prompt = format_prompt(f"{SYSTEM_PROMPT}\n{item.prompt}", item.history)
+    input_tokens = len(tokenizer.encode(formatted_prompt))
+    max_new_tokens = min(item.max_new_tokens, MAX_TOTAL_TOKENS - input_tokens)
     stream = client.text_generation(
         formatted_prompt,
         max_new_tokens=max_new_tokens,
+        temperature=temperature,
         top_p=float(item.top_p),
         repetition_penalty=item.repetition_penalty,
         do_sample=True,
         details=True,
         return_full_text=False,
     )
+    output = "".join(chunk.token.text for chunk in stream)
     output = re.sub(r"\s+", " ", output).strip()
     return output
 @app.get("/generate/")
 async def generate_text(
     prompt: str,
     history: List[str] = [],
+    temperature: float = 0.7,
     max_new_tokens: int = 512,
+    top_p: float = 0.9,
     repetition_penalty: float = 1.0,
 ):
     item = Item(

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 fastapi
 uvicorn
 huggingface_hub
-pydantic

 fastapi
 uvicorn
 huggingface_hub
+pydantic
+transformers