MightyOctopus commited on
Commit
19ecd82
·
verified ·
1 Parent(s): ecdc97d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -44
app.py CHANGED
@@ -1,13 +1,13 @@
1
  ##########====================================================================################
2
  ##########====================PRODUCTION VERSION -- vLLM, GRADIO=====================###########
3
  ##########====================================================================################
4
- import os
5
  import requests
6
  from typing import List, Dict, Tuple
7
  from datetime import datetime
8
  from anthropic import Anthropic
9
  from openai import OpenAI
10
- import time, gradio as gr
11
  from tqdm import tqdm
12
 
13
  ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
@@ -22,6 +22,18 @@ open_source_client = OpenAI(api_key="EMPTY", base_url=VLLM_API)
22
  claude_client = Anthropic(api_key=ANTHROPIC_API_KEY)
23
 
24
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def invoke_messages(
26
  rows_num: int,
27
  business_category: str,
@@ -76,39 +88,24 @@ def pass_claude_msg(file_format: str, content: str) -> Tuple[str, str]:
76
 
77
 
78
  def generate_output(messages):
79
- enable_model()
80
-
81
- inputs = tokenizer.apply_chat_template(
82
- messages,
83
- return_tensors="pt",
84
- return_dict=True, ### IMPORTANT: to get a mapping
85
- tokenize=True,
86
- add_generation_prompt=True,
87
- padding=True,
88
- return_attention_mask=True
89
- ).to(model.device)
90
-
91
- # print(inputs)
92
-
93
- outputs = model.generate(
94
- **inputs,
95
- max_new_tokens=400,
96
- temperature=0.2
97
  )
98
 
99
- ### Get the length(num of tokens) of the input prompt
100
- prompt_len = inputs["input_ids"].shape[1]
101
-
102
- ### Slice the generated sequence to skip the prompt length
103
- gen_tokens = outputs[0][prompt_len:]
104
-
105
- # print(tokenizer.decode(gen_tokens, skip_special_tokens=True))
106
-
107
- return gen_tokens
108
 
109
 
110
  def launch_claude_api(sys_msg, user_msg):
111
- response = claude.messages.create(
 
 
 
 
112
  model=CLAUDE_MODEL,
113
  system=sys_msg,
114
  max_tokens=400,
@@ -124,6 +121,7 @@ def launch_claude_api(sys_msg, user_msg):
124
 
125
  def generate_mockup_data(category, num_data_rows, columns, a_instruction,
126
  progress=gr.Progress()):
 
127
  progress(0.2, desc="Generating...")
128
  msg = invoke_messages(
129
  rows_num=int(num_data_rows or 10),
@@ -132,16 +130,10 @@ def generate_mockup_data(category, num_data_rows, columns, a_instruction,
132
  instruction=a_instruction
133
  )
134
 
135
- resp = open_source_client.chat.completions.create(
136
- model=QWEN_MODEL,
137
- messages=msg,
138
- max_tokens=400,
139
- temperature=0.2,
140
- stream=False
141
- )
142
  progress(1.0, desc="Done")
143
 
144
- return resp.choices[0].message.content
145
 
146
 
147
  def show_hidden_row():
@@ -158,6 +150,9 @@ def make_file(btn_sort: str, category: str, content: str):
158
  if not content or not content.strip():
159
  raise gr.Error("The result content is empty. Cannot create a file.")
160
 
 
 
 
161
  try:
162
  sys_msg, user_msg = pass_claude_msg(btn_sort, content)
163
  claude_output = launch_claude_api(sys_msg, user_msg)
@@ -242,12 +237,6 @@ def render_interface():
242
  outputs=btn_json
243
  )
244
 
245
- ### Pre-warming the model right upon the page load
246
- ### in order to save the model load time when user submitting the form.
247
- demo.load(lambda: enable_model(), queue=False)
248
-
249
- return demo
250
-
251
 
252
  if __name__ == "__main__":
253
  app = render_interface()
 
1
  ##########====================================================================################
2
  ##########====================PRODUCTION VERSION -- vLLM, GRADIO=====================###########
3
  ##########====================================================================################
4
+ import os, subprocess, threading, time
5
  import requests
6
  from typing import List, Dict, Tuple
7
  from datetime import datetime
8
  from anthropic import Anthropic
9
  from openai import OpenAI
10
+ import gradio as gr
11
  from tqdm import tqdm
12
 
13
  ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
 
22
  claude_client = Anthropic(api_key=ANTHROPIC_API_KEY)
23
 
24
 
25
+ def wait_for_vllm_ready(timeout=120):
26
+ start = time.time()
27
+ while time.time() - start < timeout:
28
+ try:
29
+ r = requests.get("http://localhost:8000/health", timeout=3)
30
+ if r.status_code == 200:
31
+ return True
32
+ except Exception:
33
+ pass
34
+ time.sleep(2)
35
+ raise RuntimeError("vLLM did not start within timeout")
36
+
37
  def invoke_messages(
38
  rows_num: int,
39
  business_category: str,
 
88
 
89
 
90
  def generate_output(messages):
91
+
92
+ resp = open_source_client.chat.completions.create(
93
+ model=QWEN_MODEL,
94
+ messages=messages,
95
+ max_tokens=400,
96
+ temperature=0.2,
97
+ stream=False
 
 
 
 
 
 
 
 
 
 
 
98
  )
99
 
100
+ return resp.choices[0].message.content
 
 
 
 
 
 
 
 
101
 
102
 
103
  def launch_claude_api(sys_msg, user_msg):
104
+
105
+ if not claude_client:
106
+ return None
107
+
108
+ response = claude_client.messages.create(
109
  model=CLAUDE_MODEL,
110
  system=sys_msg,
111
  max_tokens=400,
 
121
 
122
  def generate_mockup_data(category, num_data_rows, columns, a_instruction,
123
  progress=gr.Progress()):
124
+ wait_for_vllm_ready()
125
  progress(0.2, desc="Generating...")
126
  msg = invoke_messages(
127
  rows_num=int(num_data_rows or 10),
 
130
  instruction=a_instruction
131
  )
132
 
133
+ result = generate_output(msg)
 
 
 
 
 
 
134
  progress(1.0, desc="Done")
135
 
136
+ return result
137
 
138
 
139
  def show_hidden_row():
 
150
  if not content or not content.strip():
151
  raise gr.Error("The result content is empty. Cannot create a file.")
152
 
153
+ if not claude_client:
154
+ raise gr.Error("File formatting requires ANTHROPIC_API_KEY.")
155
+
156
  try:
157
  sys_msg, user_msg = pass_claude_msg(btn_sort, content)
158
  claude_output = launch_claude_api(sys_msg, user_msg)
 
237
  outputs=btn_json
238
  )
239
 
 
 
 
 
 
 
240
 
241
  if __name__ == "__main__":
242
  app = render_interface()