HFHAB commited on
Commit
2b8372f
·
verified ·
1 Parent(s): d5d515c

initial version fromm iiced

Browse files
Files changed (1) hide show
  1. main.py +11 -6
main.py CHANGED
@@ -12,8 +12,10 @@ class Item(BaseModel):
12
  prompt: str
13
  history: list
14
  system_prompt: str
15
- temperature: float = 0.3
16
- max_new_tokens: int = 4000
 
 
17
 
18
  def format_prompt(message, history):
19
  prompt = "<s>"
@@ -27,15 +29,19 @@ def generate(item: Item):
27
  temperature = float(item.temperature)
28
  if temperature < 1e-2:
29
  temperature = 1e-2
 
30
 
31
  generate_kwargs = dict(
32
  temperature=temperature,
33
  max_new_tokens=item.max_new_tokens,
34
- do_sample=True
 
 
 
35
  )
36
 
37
  formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
38
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True)
39
  output = ""
40
 
41
  for response in stream:
@@ -44,5 +50,4 @@ def generate(item: Item):
44
 
45
  @app.post("/generate/")
46
  async def generate_text(item: Item):
47
- return {"response": generate(item)}
48
-
 
12
  prompt: str
13
  history: list
14
  system_prompt: str
15
+ temperature: float = 0.0
16
+ max_new_tokens: int = 1048
17
+ top_p: float = 0.15
18
+ repetition_penalty: float = 1.0
19
 
20
  def format_prompt(message, history):
21
  prompt = "<s>"
 
29
  temperature = float(item.temperature)
30
  if temperature < 1e-2:
31
  temperature = 1e-2
32
+ top_p = float(item.top_p)
33
 
34
  generate_kwargs = dict(
35
  temperature=temperature,
36
  max_new_tokens=item.max_new_tokens,
37
+ top_p=top_p,
38
+ repetition_penalty=item.repetition_penalty,
39
+ do_sample=True,
40
+ seed=42,
41
  )
42
 
43
  formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
44
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
45
  output = ""
46
 
47
  for response in stream:
 
50
 
51
  @app.post("/generate/")
52
  async def generate_text(item: Item):
53
+ return {"response": generate(item)}