gen

Sleeping

premalt commited on Sep 29, 2024

Commit

72d1491

1 Parent(s): 4bba0ce

fix cache directory

Files changed (2) hide show

main.py CHANGED Viewed

@@ -1,15 +1,16 @@
-import uvicorn
 import re
 import asyncio
 from fastapi import FastAPI
 from pydantic import BaseModel, Field
 from huggingface_hub import InferenceClient
 from typing import List
-from transformers import GPT2TokenizerFast
 app = FastAPI()
 client = InferenceClient("gpt2")
-tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
 SYSTEM_PROMPT = "You are a very powerful AI to generate interesting stories for short-form content consumption. Make sure to hook the readers attention in the first few seconds. Make sure to be engaging and creative in your responses."
@@ -35,10 +36,11 @@ def generate(item: Item):
     formatted_prompt = format_prompt(f"{SYSTEM_PROMPT}\n{item.prompt}", item.history)
-    input_tokens = len(tokenizer.encode(formatted_prompt))
-    max_new_tokens = min(item.max_new_tokens, MAX_TOTAL_TOKENS - input_tokens)
-    stream = client.text_generation(
         formatted_prompt,
         max_new_tokens=max_new_tokens,
         temperature=temperature,
@@ -46,12 +48,10 @@ def generate(item: Item):
         repetition_penalty=item.repetition_penalty,
         do_sample=True,
         seed=42,
-        stream=True,
-        details=True,
-        return_full_text=False,
     )
-    output = "".join(chunk.token.text for chunk in stream)
-    output = re.sub(r"\s+", " ", output).strip()
     return output

+import os
 import re
 import asyncio
 from fastapi import FastAPI
 from pydantic import BaseModel, Field
 from huggingface_hub import InferenceClient
 from typing import List
+# Set the cache directory to a writable location
+os.environ['TRANSFORMERS_CACHE'] = '/tmp/huggingface'
 app = FastAPI()
 client = InferenceClient("gpt2")
 SYSTEM_PROMPT = "You are a very powerful AI to generate interesting stories for short-form content consumption. Make sure to hook the readers attention in the first few seconds. Make sure to be engaging and creative in your responses."
     formatted_prompt = format_prompt(f"{SYSTEM_PROMPT}\n{item.prompt}", item.history)
+    # A simple approximation for token count
+    estimated_input_tokens = len(formatted_prompt.split())
+    max_new_tokens = min(item.max_new_tokens, MAX_TOTAL_TOKENS - estimated_input_tokens)
+    response = client.text_generation(
         formatted_prompt,
         max_new_tokens=max_new_tokens,
         temperature=temperature,
         repetition_penalty=item.repetition_penalty,
         do_sample=True,
         seed=42,
     )
+    output = response.strip()
+    output = re.sub(r"\s+", " ", output)
     return output

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 fastapi
 uvicorn
 huggingface_hub
-pydantic
-transformers

 fastapi
 uvicorn
 huggingface_hub
+pydantic