gen

Sleeping

App Files Files Community

pr0methium commited on Sep 29, 2024

Commit

72c4724

verified ·

1 Parent(s): 13c78d8

Update main.py

Browse files

Files changed (1) hide show

main.py +8 -13

main.py CHANGED Viewed

@@ -7,16 +7,15 @@ from huggingface_hub import InferenceClient
 from typing import List
 # Set the cache directory to a writable location
-os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
 app = FastAPI()
-client = InferenceClient("facebook/opt-1.3b")
 SYSTEM_PROMPT = "You are a very powerful AI to generate interesting stories for short-form content consumption. Make sure to hook the readers attention in the first few seconds. Make sure to be engaging and creative in your responses."
 MAX_TOTAL_TOKENS = 2048
 class Item(BaseModel):
     prompt: str
     history: List[str] = []
@@ -25,21 +24,18 @@ class Item(BaseModel):
     top_p: float = Field(default=0.9, ge=0.0, le=1.0)
     repetition_penalty: float = Field(default=1.1, ge=0.0)
 def format_prompt(message, history):
-    prompt = "".join(
-        f"Human: {user_prompt}\nAI: {bot_response}\n"
-        for user_prompt, bot_response in history
-    )
     prompt += f"Human: {message}\nAI:"
     return prompt
 def generate(item: Item):
     temperature = max(float(item.temperature), 1e-2)
     formatted_prompt = format_prompt(f"{SYSTEM_PROMPT}\n{item.prompt}", item.history)
     # A simple approximation for token count
     estimated_input_tokens = len(formatted_prompt.split())
     max_new_tokens = min(item.max_new_tokens, MAX_TOTAL_TOKENS - estimated_input_tokens)
@@ -53,13 +49,12 @@ def generate(item: Item):
         do_sample=True,
         seed=42,
     )
     output = response.strip()
     output = re.sub(r"\s+", " ", output)
     return output
 @app.get("/generate/")
 async def generate_text(
     prompt: str,
@@ -80,4 +75,4 @@ async def generate_text(
     response = await asyncio.to_thread(generate, item)
-    return {"response": response}

 from typing import List
 # Set the cache directory to a writable location
+os.environ['TRANSFORMERS_CACHE'] = '/tmp/huggingface'
 app = FastAPI()
+client = InferenceClient("EleutherAI/gpt-neo-125M")
 SYSTEM_PROMPT = "You are a very powerful AI to generate interesting stories for short-form content consumption. Make sure to hook the readers attention in the first few seconds. Make sure to be engaging and creative in your responses."
 MAX_TOTAL_TOKENS = 2048
 class Item(BaseModel):
     prompt: str
     history: List[str] = []
     top_p: float = Field(default=0.9, ge=0.0, le=1.0)
     repetition_penalty: float = Field(default=1.1, ge=0.0)
 def format_prompt(message, history):
+    prompt = ""
+    for user_prompt, bot_response in history:
+        prompt += f"Human: {user_prompt}\nAI: {bot_response}\n"
     prompt += f"Human: {message}\nAI:"
     return prompt
 def generate(item: Item):
     temperature = max(float(item.temperature), 1e-2)
     formatted_prompt = format_prompt(f"{SYSTEM_PROMPT}\n{item.prompt}", item.history)
     # A simple approximation for token count
     estimated_input_tokens = len(formatted_prompt.split())
     max_new_tokens = min(item.max_new_tokens, MAX_TOTAL_TOKENS - estimated_input_tokens)
         do_sample=True,
         seed=42,
     )
     output = response.strip()
     output = re.sub(r"\s+", " ", output)
     return output
 @app.get("/generate/")
 async def generate_text(
     prompt: str,
     response = await asyncio.to_thread(generate, item)
+    return {"response": response}