gen

Sleeping

App Files Files Community

TheUntraceable commited on Sep 29, 2024

Commit

13c78d8

1 Parent(s): 013032b

Format with Ruff

Browse files

Files changed (1) hide show

main.py +9 -5

main.py CHANGED Viewed

@@ -7,14 +7,15 @@ from huggingface_hub import InferenceClient
 from typing import List
 # Set the cache directory to a writable location
-os.environ['TRANSFORMERS_CACHE'] = '/tmp/huggingface'
 app = FastAPI()
 client = InferenceClient("facebook/opt-1.3b")
 SYSTEM_PROMPT = "You are a very powerful AI to generate interesting stories for short-form content consumption. Make sure to hook the readers attention in the first few seconds. Make sure to be engaging and creative in your responses."
-MAX_TOTAL_TOKENS = 2048
 class Item(BaseModel):
     prompt: str
@@ -24,6 +25,7 @@ class Item(BaseModel):
     top_p: float = Field(default=0.9, ge=0.0, le=1.0)
     repetition_penalty: float = Field(default=1.1, ge=0.0)
 def format_prompt(message, history):
     prompt = "".join(
         f"Human: {user_prompt}\nAI: {bot_response}\n"
@@ -32,11 +34,12 @@ def format_prompt(message, history):
     prompt += f"Human: {message}\nAI:"
     return prompt
 def generate(item: Item):
     temperature = max(float(item.temperature), 1e-2)
     formatted_prompt = format_prompt(f"{SYSTEM_PROMPT}\n{item.prompt}", item.history)
     # A simple approximation for token count
     estimated_input_tokens = len(formatted_prompt.split())
     max_new_tokens = min(item.max_new_tokens, MAX_TOTAL_TOKENS - estimated_input_tokens)
@@ -50,12 +53,13 @@ def generate(item: Item):
         do_sample=True,
         seed=42,
     )
     output = response.strip()
     output = re.sub(r"\s+", " ", output)
     return output
 @app.get("/generate/")
 async def generate_text(
     prompt: str,
@@ -76,4 +80,4 @@ async def generate_text(
     response = await asyncio.to_thread(generate, item)
-    return {"response": response}

 from typing import List
 # Set the cache directory to a writable location
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
 app = FastAPI()
 client = InferenceClient("facebook/opt-1.3b")
 SYSTEM_PROMPT = "You are a very powerful AI to generate interesting stories for short-form content consumption. Make sure to hook the readers attention in the first few seconds. Make sure to be engaging and creative in your responses."
+MAX_TOTAL_TOKENS = 2048
 class Item(BaseModel):
     prompt: str
     top_p: float = Field(default=0.9, ge=0.0, le=1.0)
     repetition_penalty: float = Field(default=1.1, ge=0.0)
 def format_prompt(message, history):
     prompt = "".join(
         f"Human: {user_prompt}\nAI: {bot_response}\n"
     prompt += f"Human: {message}\nAI:"
     return prompt
 def generate(item: Item):
     temperature = max(float(item.temperature), 1e-2)
     formatted_prompt = format_prompt(f"{SYSTEM_PROMPT}\n{item.prompt}", item.history)
     # A simple approximation for token count
     estimated_input_tokens = len(formatted_prompt.split())
     max_new_tokens = min(item.max_new_tokens, MAX_TOTAL_TOKENS - estimated_input_tokens)
         do_sample=True,
         seed=42,
     )
     output = response.strip()
     output = re.sub(r"\s+", " ", output)
     return output
 @app.get("/generate/")
 async def generate_text(
     prompt: str,
     response = await asyncio.to_thread(generate, item)
+    return {"response": response}