Spaces:
Runtime error
Runtime error
้ณๆธ
commited on
Commit
ยท
1a06ab4
1
Parent(s):
9a3f6f1
Update
Browse files- app.py +7 -4
- requirements.txt +8 -1
app.py
CHANGED
|
@@ -12,11 +12,12 @@ import gradio as gr
|
|
| 12 |
from datetime import datetime
|
| 13 |
from modelscope.pipelines import pipeline
|
| 14 |
from modelscope import snapshot_download
|
|
|
|
| 15 |
from PIL import Image, ImageDraw, ImageFont
|
| 16 |
|
| 17 |
from PCAgent.api import inference_chat
|
| 18 |
from PCAgent.icon_localization import det
|
| 19 |
-
from PCAgent.
|
| 20 |
from PCAgent.prompt_qwen import get_subtask_prompt as get_subtask_prompt
|
| 21 |
from PCAgent.chat import init_action_chat, init_memory_chat, add_response
|
| 22 |
from PCAgent.prompt_qwen import get_action_prompt, get_process_prompt, get_memory_prompt
|
|
@@ -26,8 +27,10 @@ vl_model_version = os.environ.get('vl_model_version')
|
|
| 26 |
llm_model_version = os.environ.get('llm_model_version')
|
| 27 |
API_url = os.environ.get('API_url')
|
| 28 |
token = os.environ.get('token')
|
| 29 |
-
os.environ["OCR_ACCESS_KEY_ID"] = os.environ.get('OCR_ACCESS_KEY_ID')
|
| 30 |
-
os.environ["OCR_ACCESS_KEY_SECRET"] = os.environ.get('OCR_ACCESS_KEY_SECRET')
|
|
|
|
|
|
|
| 31 |
tff_file = os.environ.get('tff_file')
|
| 32 |
radius = 100
|
| 33 |
|
|
@@ -127,7 +130,7 @@ def get_perception_infos(screenshot_file, screenshot_som_file, font_path):
|
|
| 127 |
|
| 128 |
for i, img in enumerate(img_list):
|
| 129 |
width, height = Image.open(img).size
|
| 130 |
-
sub_text, sub_coordinates = ocr(img) # for api
|
| 131 |
for coordinate in sub_coordinates:
|
| 132 |
coordinate[0] = int(max(0, img_x_list[i] + coordinate[0] - padding))
|
| 133 |
coordinate[2] = int(min(total_width, img_x_list[i] + coordinate[2] + padding))
|
|
|
|
| 12 |
from datetime import datetime
|
| 13 |
from modelscope.pipelines import pipeline
|
| 14 |
from modelscope import snapshot_download
|
| 15 |
+
from modelscope.utils.constant import Tasks
|
| 16 |
from PIL import Image, ImageDraw, ImageFont
|
| 17 |
|
| 18 |
from PCAgent.api import inference_chat
|
| 19 |
from PCAgent.icon_localization import det
|
| 20 |
+
from PCAgent.text_localization_old import ocr
|
| 21 |
from PCAgent.prompt_qwen import get_subtask_prompt as get_subtask_prompt
|
| 22 |
from PCAgent.chat import init_action_chat, init_memory_chat, add_response
|
| 23 |
from PCAgent.prompt_qwen import get_action_prompt, get_process_prompt, get_memory_prompt
|
|
|
|
| 27 |
llm_model_version = os.environ.get('llm_model_version')
|
| 28 |
API_url = os.environ.get('API_url')
|
| 29 |
token = os.environ.get('token')
|
| 30 |
+
# os.environ["OCR_ACCESS_KEY_ID"] = os.environ.get('OCR_ACCESS_KEY_ID')
|
| 31 |
+
# os.environ["OCR_ACCESS_KEY_SECRET"] = os.environ.get('OCR_ACCESS_KEY_SECRET')
|
| 32 |
+
ocr_detection = pipeline(Tasks.ocr_detection, model='damo/cv_resnet18_ocr-detection-line-level_damo')
|
| 33 |
+
ocr_recognition = pipeline(Tasks.ocr_recognition, model='damo/cv_convnextTiny_ocr-recognition-document_damo')
|
| 34 |
tff_file = os.environ.get('tff_file')
|
| 35 |
radius = 100
|
| 36 |
|
|
|
|
| 130 |
|
| 131 |
for i, img in enumerate(img_list):
|
| 132 |
width, height = Image.open(img).size
|
| 133 |
+
sub_text, sub_coordinates = ocr(img, ocr_detection, ocr_recognition) # for api
|
| 134 |
for coordinate in sub_coordinates:
|
| 135 |
coordinate[0] = int(max(0, img_x_list[i] + coordinate[0] - padding))
|
| 136 |
coordinate[2] = int(min(total_width, img_x_list[i] + coordinate[2] + padding))
|
requirements.txt
CHANGED
|
@@ -11,4 +11,11 @@ transformers
|
|
| 11 |
torchvision
|
| 12 |
pycocotools
|
| 13 |
timm
|
| 14 |
-
termcolor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
torchvision
|
| 12 |
pycocotools
|
| 13 |
timm
|
| 14 |
+
termcolor
|
| 15 |
+
TensorFlow==2.9.1
|
| 16 |
+
keras==2.9.0
|
| 17 |
+
SentencePiece
|
| 18 |
+
tf_slim
|
| 19 |
+
tf_keras==2.15.0
|
| 20 |
+
pyclipper
|
| 21 |
+
numpy==1.26.4
|