GiantPandas commited on
Commit
2502d01
·
verified ·
1 Parent(s): ac1f276

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -24
app.py CHANGED
@@ -54,6 +54,27 @@ preset_prompts = [
54
  "Reformat this document as Markdown with clear sections and lists.",
55
  ]
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None):
59
  url = f"{openai_api_base}{route}"
@@ -66,9 +87,6 @@ def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None)
66
  response = requests.post(url, files=files, headers=headers)
67
  return response
68
 
69
-
70
-
71
-
72
  async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
73
  """使用aiohttp异步发送PDF"""
74
  url = f"{server_ip}{route}"
@@ -95,32 +113,16 @@ def extract_makrdown(text):
95
  return m.group(1).strip()
96
  else:
97
  return text
 
98
 
99
- openai_api_key = "EMPTY"
100
-
101
- openai_api_base = os.environ.get("openai_api_base")
102
-
103
- IP = os.environ.get("IP")
104
-
105
- PORT = os.environ.get("PORT")
106
-
107
- Authorization = os.environ.get("Authorization")
108
-
109
- client = AsyncOpenAI(
110
- api_key=openai_api_key,
111
- base_url=openai_api_base + "/v1",
112
- http_client=httpx.AsyncClient(verify=False)
113
- )
114
-
115
-
116
- async def request(messages):
117
 
118
  chat_completion_from_base64 = await client.chat.completions.create(
119
  messages=messages,
120
  extra_headers={
121
  "Authorization": f"Bearer {Authorization}"
122
  },
123
- model="Qwen2_5VL",
124
  max_completion_tokens=4096,
125
  stream=True,
126
  temperature=0.0,
@@ -204,7 +206,10 @@ def download_markdown_file(md_text):
204
  return str(filepath)
205
 
206
 
207
- async def doc_parser(doc_path, prompt):
 
 
 
208
 
209
  doc_path = Path(doc_path)
210
  if not doc_path.is_file():
@@ -231,7 +236,7 @@ async def doc_parser(doc_path, prompt):
231
  all_pages_raw = []
232
  for query in queries:
233
  pages = ""
234
- async for chunk in request(query):
235
  pages += chunk
236
  yield extract_makrdown(pages), pages
237
  all_pages.append(extract_makrdown(pages))
@@ -417,6 +422,15 @@ if __name__ == '__main__':
417
  ]
418
 
419
  with gr.Column(variant='panel', scale=5):
 
 
 
 
 
 
 
 
 
420
  with gr.Accordion("Examples", open=True):
421
  example_root = "examples"
422
  file_path = [
@@ -520,6 +534,10 @@ if __name__ == '__main__':
520
  fn=doc_parser,
521
  inputs=[file, prompts],
522
  outputs=[md, md_text]
 
 
 
 
523
  )
524
 
525
  clear_bu.add([file, md, md_text])
 
54
  "Reformat this document as Markdown with clear sections and lists.",
55
  ]
56
 
57
+ openai_api_key = "EMPTY"
58
+
59
+ AVAILABLE_MODELS = {
60
+ "infinity_parser1": {
61
+ "name": os.environ.get("infinity_parser1_name"),
62
+ "client": AsyncOpenAI(
63
+ api_key=openai_api_key,
64
+ base_url=os.environ.get("infinity_parser1_api") + "/v1",
65
+ ),
66
+ "Authorization": os.environ.get("infinity_parser1_Authorization")
67
+
68
+ },
69
+ "infinity_parser2": {
70
+ "name": os.environ.get("infinity_parser2_name"),
71
+ "client": AsyncOpenAI(
72
+ api_key=openai_api_key,
73
+ base_url=os.environ.get("infinity_parser2_api") + "/v1",
74
+ ),
75
+ "Authorization": os.environ.get("infinity_parser2_Authorization")
76
+ }
77
+ }
78
 
79
  def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None):
80
  url = f"{openai_api_base}{route}"
 
87
  response = requests.post(url, files=files, headers=headers)
88
  return response
89
 
 
 
 
90
  async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
91
  """使用aiohttp异步发送PDF"""
92
  url = f"{server_ip}{route}"
 
113
  return m.group(1).strip()
114
  else:
115
  return text
116
+
117
 
118
+ async def request(messages, model_name, client, Authorization):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  chat_completion_from_base64 = await client.chat.completions.create(
121
  messages=messages,
122
  extra_headers={
123
  "Authorization": f"Bearer {Authorization}"
124
  },
125
+ model=model_name,
126
  max_completion_tokens=4096,
127
  stream=True,
128
  temperature=0.0,
 
206
  return str(filepath)
207
 
208
 
209
+ async def doc_parser(doc_path, prompt, model_id):
210
+ model_name = AVAILABLE_MODELS[model_id]["name"]
211
+ client = AVAILABLE_MODELS[model_id]["client"]
212
+ Authorization = AVAILABLE_MODELS[model_id]["Authorization"]
213
 
214
  doc_path = Path(doc_path)
215
  if not doc_path.is_file():
 
236
  all_pages_raw = []
237
  for query in queries:
238
  pages = ""
239
+ async for chunk in request(query, model_name, client, Authorization):
240
  pages += chunk
241
  yield extract_makrdown(pages), pages
242
  all_pages.append(extract_makrdown(pages))
 
422
  ]
423
 
424
  with gr.Column(variant='panel', scale=5):
425
+
426
+ model_selector = gr.Dropdown(
427
+ choices=[(v["name"], k) for k, v in AVAILABLE_MODELS.items()],
428
+ value=list(AVAILABLE_MODELS.keys())[0], # 默认选择第一个模型
429
+ label="Model Selection",
430
+ info="Select the model to use for parsing",
431
+ interactive=True,
432
+ )
433
+
434
  with gr.Accordion("Examples", open=True):
435
  example_root = "examples"
436
  file_path = [
 
534
  fn=doc_parser,
535
  inputs=[file, prompts],
536
  outputs=[md, md_text]
537
+ ).then(
538
+ fn=doc_parser,
539
+ inputs=[file, prompts, model_selector],
540
+ outputs=[md, md_text]
541
  )
542
 
543
  clear_bu.add([file, md, md_text])