meccatronis commited on
Commit
b2d242d
·
verified ·
1 Parent(s): 34f3963

Upload taobao_auto.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. taobao_auto.py +216 -0
taobao_auto.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Taobao Auto Scraper - Sistema Completo com Visão
4
+ Combina RPA (mouse/teclado) + Visão Computacional para extrair produtos do Taobao
5
+ """
6
+
7
+ import sys
8
+ import json
9
+ import time
10
+ import subprocess
11
+ import re
12
+ from pathlib import Path
13
+ from ctypes import *
14
+ from urllib.parse import quote
15
+ from datetime import datetime
16
+
17
+ # ==================== CONFIGURAÇÃO ====================
18
+ SCREEN_WIDTH = 1360
19
+ SCREEN_HEIGHT = 768
20
+ OUTPUT_DIR = Path("/tmp/taobao_results")
21
+ OUTPUT_DIR.mkdir(exist_ok=True)
22
+
23
+ # ==================== X11 MOUSE & KEYBOARD ====================
24
+ X11 = CDLL("libX11.so.6")
25
+ Xtst = CDLL("libXtst.so.6")
26
+
27
+ XOpenDisplay = X11.XOpenDisplay
28
+ XOpenDisplay.restype = c_void_p
29
+ XFlush = X11.XFlush
30
+ XFlush.argtypes = [c_void_p]
31
+
32
+ XTestFakeKeyEvent = Xtst.XTestFakeKeyEvent
33
+ XTestFakeKeyEvent.argtypes = [c_void_p, c_uint, c_int, c_ulong]
34
+
35
+ XTestFakeMotionEvent = Xtst.XTestFakeMotionEvent
36
+ XTestFakeMotionEvent.argtypes = [c_void_p, c_int, c_int, c_ulong]
37
+
38
+ XTestFakeButtonEvent = Xtst.XTestFakeButtonEvent
39
+ XTestFakeButtonEvent.argtypes = [c_void_p, c_uint, c_int, c_ulong]
40
+
41
+ XKeysymToKeycode = X11.XKeysymToKeycode
42
+ XKeysymToKeycode.argtypes = [c_void_p, c_uint]
43
+ XKeysymToKeycode.restype = c_uint
44
+
45
+ XSync = X11.XSync
46
+ XSync.argtypes = [c_void_p, c_int]
47
+
48
+ display = XOpenDisplay(None)
49
+
50
+ # Keycodes
51
+ XK_Return = 0xFF0D
52
+ XK_Tab = 0xFF09
53
+ XK_Escape = 0xFF1B
54
+ XK_Down = 0xFF54
55
+ XK_Up = 0xFF52
56
+ XK_Left = 0xFF51
57
+ XK_Right = 0xFF53
58
+ XK_Home = 0xFF50
59
+ XK_End = 0xFF57
60
+ XK_BackSpace = 0xFF08
61
+ XK_Control_L = 0xFFE3
62
+ XK_Shift_L = 0xFFE1
63
+
64
+ # ==================== FUNÇÕES X11 ====================
65
+
66
+ def keycode(keysym):
67
+ return XKeysymToKeycode(display, keysym)
68
+
69
+ def press(keycode, delay=0.05):
70
+ XTestFakeKeyEvent(display, keycode, 1, 0)
71
+ XFlush(display)
72
+ time.sleep(delay)
73
+ XTestFakeKeyEvent(display, keycode, 0, 0)
74
+ XFlush(display)
75
+ time.sleep(delay/2)
76
+
77
+ def move(x, y):
78
+ XTestFakeMotionEvent(display, 0, x, y, 0)
79
+ XFlush(display)
80
+ time.sleep(0.15)
81
+
82
+ def click(button=1):
83
+ XTestFakeButtonEvent(display, button, 1, 0)
84
+ XFlush(display)
85
+ time.sleep(0.15)
86
+ XTestFakeButtonEvent(display, button, 0, 0)
87
+ XFlush(display)
88
+
89
+ def scroll(times=3):
90
+ for _ in range(times):
91
+ press(keycode(XK_Down), 0.03)
92
+ time.sleep(0.05)
93
+
94
+ def type_text(text, delay=0.03):
95
+ for char in text:
96
+ if char == '\n':
97
+ press(keycode(XK_Return))
98
+ elif char == ' ':
99
+ press(0x20)
100
+ elif char.isupper():
101
+ press(keycode(XK_Shift_L), 0)
102
+ kc = keycode(ord(char.lower()))
103
+ if kc:
104
+ press(kc, delay)
105
+ press(keycode(XK_Shift_L), 0)
106
+ else:
107
+ kc = keycode(ord(char))
108
+ if kc:
109
+ press(kc, delay)
110
+ time.sleep(0.02)
111
+
112
+ def ctrl_a():
113
+ press(keycode(XK_Control_L), 0)
114
+ press(keycode(0x61), 0.05)
115
+ press(keycode(0x61), 0)
116
+ press(keycode(XK_Control_L), 0)
117
+
118
+ # ==================== SCREENSHOT ====================
119
+
120
+ def screenshot(name=None):
121
+ if name is None:
122
+ name = f"screen_{datetime.now().strftime('%H%M%S')}.png"
123
+ path = OUTPUT_DIR / name
124
+
125
+ subprocess.run([
126
+ "ffmpeg", "-f", "x11grab",
127
+ "-video_size", f"{SCREEN_WIDTH}x{SCREEN_HEIGHT}",
128
+ "-i", ":0",
129
+ "-frames:v", "1",
130
+ "-y", str(path)
131
+ ], capture_output=True, timeout=10)
132
+
133
+ return path
134
+
135
+ # ==================== TAOBAO RPA ====================
136
+
137
+ def open_browser(url="https://taobao.com"):
138
+ print(f"🌐 Abrindo: {url}")
139
+ subprocess.Popen(['firefox', '--new-window', url])
140
+ time.sleep(5)
141
+ return screenshot("01_opened.png")
142
+
143
+ def search(query):
144
+ print(f"🔍 Buscando: {query}")
145
+
146
+ # Clica barra de busca
147
+ move(SCREEN_WIDTH//2, 180)
148
+ time.sleep(0.3)
149
+ click()
150
+ time.sleep(0.5)
151
+
152
+ # Limpa e digita
153
+ ctrl_a()
154
+ time.sleep(0.2)
155
+ type_text(query)
156
+ time.sleep(0.5)
157
+
158
+ # Enter
159
+ press(keycode(XK_Return))
160
+ time.sleep(6)
161
+
162
+ return screenshot("02_search.png")
163
+
164
+ def scrape_page(page_num):
165
+ print(f"📦 Página {page_num}")
166
+
167
+ path = screenshot(f"03_page_{page_num}.png")
168
+ print(f" 📸 {path}")
169
+
170
+ # Scroll para mais produtos
171
+ scroll(5)
172
+ time.sleep(2)
173
+
174
+ return path
175
+
176
+ # ==================== MAIN ====================
177
+
178
+ def main():
179
+ query = sys.argv[1] if len(sys.argv) > 1 else "DDR4 16GB"
180
+
181
+ print("=" * 60)
182
+ print(" TAOBAO AUTO SCRAPER")
183
+ print("=" * 60)
184
+ print(f"Query: {query}")
185
+ print(f"Output: {OUTPUT_DIR}")
186
+ print("=" * 60)
187
+
188
+ # Fluxo
189
+ print("\n1️⃣ Abrindo navegador...")
190
+ open_browser()
191
+
192
+ print("2️⃣ Buscando produto...")
193
+ search(query)
194
+
195
+ print("3️⃣ Extraindo produtos...")
196
+ for i in range(3):
197
+ scrape_page(i+1)
198
+
199
+ # Salva manifest
200
+ manifest = {
201
+ 'query': query,
202
+ 'timestamp': datetime.now().isoformat(),
203
+ 'screenshots': [str(p) for p in sorted(OUTPUT_DIR.glob("*.png"))],
204
+ 'output_dir': str(OUTPUT_DIR)
205
+ }
206
+
207
+ with open(OUTPUT_DIR / "manifest.json", 'w') as f:
208
+ json.dump(manifest, f, indent=2, ensure_ascii=False)
209
+
210
+ print("\n✅ Concluído!")
211
+ print(f"📁 Screenshots salvos em: {OUTPUT_DIR}")
212
+ print(f"📋 Manifest: {OUTPUT_DIR / 'manifest.json'}")
213
+ print("\n💡 Use a ferramenta de visão para analisar as imagens")
214
+
215
+ if __name__ == "__main__":
216
+ main()