Spaces:
Runtime error
Runtime error
Niv Sardi
commited on
Commit
·
4b45b50
1
Parent(s):
861b56b
ts: add simple REST endpoint
Browse files- deno/index.ts +52 -55
deno/index.ts
CHANGED
|
@@ -1,27 +1,11 @@
|
|
| 1 |
import PQueue from "https://deno.land/x/p_queue@1.0.1/mod.ts"
|
|
|
|
| 2 |
|
| 3 |
import * as CSV from './csv.ts';
|
| 4 |
import Puppet from './puppet.ts';
|
| 5 |
import selectors from './selectors.ts';
|
| 6 |
|
| 7 |
const puppet = new Puppet();
|
| 8 |
-
const queue = new PQueue({
|
| 9 |
-
concurrency: 10,
|
| 10 |
-
timeout: 60000
|
| 11 |
-
})
|
| 12 |
-
let count = 0
|
| 13 |
-
let statInterval
|
| 14 |
-
queue.addEventListener("active", () =>
|
| 15 |
-
console.log(`Working on item #${++count}. Size: ${queue.size} Pending: ${queue.pending}`))
|
| 16 |
-
queue.addEventListener("next", () =>
|
| 17 |
-
console.log(`task finished, Size: ${queue.size} Pending: ${queue.pending}`))
|
| 18 |
-
|
| 19 |
-
queue.addEventListener("idle", async () => {
|
| 20 |
-
clearInterval(statInterval)
|
| 21 |
-
await puppet.close()
|
| 22 |
-
console.log("all done")
|
| 23 |
-
})
|
| 24 |
-
|
| 25 |
async function get_logos(page, selector): {}[] {
|
| 26 |
const logos = await page.$$(selector) || [];
|
| 27 |
for (const i in logos) {
|
|
@@ -36,14 +20,8 @@ async function get_logos(page, selector): {}[] {
|
|
| 36 |
return logos;
|
| 37 |
}
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
const promises: Promise<void>[] = [];
|
| 42 |
-
|
| 43 |
-
return puppet.run(async page => {
|
| 44 |
-
const url = o.url.replace('http:', 'https:');
|
| 45 |
-
promises.push(new Promise<void>((accept, _reject) => {
|
| 46 |
-
page.once('load', async () => {
|
| 47 |
try {
|
| 48 |
const imgs = await get_logos(page, selectors.img_logo);
|
| 49 |
const ids = await get_logos(page, selectors.id_logo);
|
|
@@ -60,49 +38,68 @@ function process(o: { id: int, url: string, bco: string, name: string }): Promis
|
|
| 60 |
|| (bb.height < 10)
|
| 61 |
|| (bb.x + bb.width < 0)
|
| 62 |
|| (bb.y + bb.height < 0)) continue;
|
| 63 |
-
console.log('got bb',
|
| 64 |
|
| 65 |
try {
|
| 66 |
-
await logos[i].screenshot({
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
annotations +=
|
| 68 |
-
`${
|
| 69 |
} catch (e) {
|
| 70 |
console.error(`couldn't screenshot logo: ${e}`);
|
| 71 |
}
|
| 72 |
}
|
| 73 |
if (logos.length) {
|
| 74 |
-
await Deno.writeTextFile(
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
| 78 |
} catch (err) {
|
| 79 |
console.error(`error in screenshot: ${err}`);
|
| 80 |
}
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
| 90 |
}
|
| 91 |
-
|
|
|
|
| 92 |
})
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
text = await Deno.readTextFile("./data/entities.csv")
|
| 99 |
-
} catch (e) {
|
| 100 |
-
console.error(`couldn't read csv: ${e}`)
|
| 101 |
-
}
|
| 102 |
-
if (!text) return setTimeout(run, 1000)
|
| 103 |
-
statInterval = setInterval(() =>
|
| 104 |
-
console.log(`Size: ${queue.size} Pending: ${queue.pending}`), 1000);
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
| 1 |
import PQueue from "https://deno.land/x/p_queue@1.0.1/mod.ts"
|
| 2 |
+
import { Application, Router } from "https://deno.land/x/oak@v9.0.0/mod.ts";
|
| 3 |
|
| 4 |
import * as CSV from './csv.ts';
|
| 5 |
import Puppet from './puppet.ts';
|
| 6 |
import selectors from './selectors.ts';
|
| 7 |
|
| 8 |
const puppet = new Puppet();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
async function get_logos(page, selector): {}[] {
|
| 10 |
const logos = await page.$$(selector) || [];
|
| 11 |
for (const i in logos) {
|
|
|
|
| 20 |
return logos;
|
| 21 |
}
|
| 22 |
|
| 23 |
+
async function fetch_logos(page, id, dest) {
|
| 24 |
+
console.error(`getting logos for: ${id}`)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
try {
|
| 26 |
const imgs = await get_logos(page, selectors.img_logo);
|
| 27 |
const ids = await get_logos(page, selectors.id_logo);
|
|
|
|
| 38 |
|| (bb.height < 10)
|
| 39 |
|| (bb.x + bb.width < 0)
|
| 40 |
|| (bb.y + bb.height < 0)) continue;
|
| 41 |
+
console.log('got bb', bb)
|
| 42 |
|
| 43 |
try {
|
| 44 |
+
await logos[i].screenshot({
|
| 45 |
+
path: dest
|
| 46 |
+
.replace('images', 'logos')
|
| 47 |
+
.replace('.png', `.${i}.png`)
|
| 48 |
+
})
|
| 49 |
annotations +=
|
| 50 |
+
`${id} ${bb.x + bb.width / 2} ${bb.y + bb.height / 2} ${bb.width} ${bb.height}\n`
|
| 51 |
} catch (e) {
|
| 52 |
console.error(`couldn't screenshot logo: ${e}`);
|
| 53 |
}
|
| 54 |
}
|
| 55 |
if (logos.length) {
|
| 56 |
+
await Deno.writeTextFile(dest
|
| 57 |
+
.replace('images', 'labels')
|
| 58 |
+
.replace('png', 'txt'),
|
| 59 |
+
annotations);
|
| 60 |
+
}
|
| 61 |
} catch (err) {
|
| 62 |
console.error(`error in screenshot: ${err}`);
|
| 63 |
}
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
const app = new Application();
|
| 67 |
+
const router = new Router();
|
| 68 |
+
|
| 69 |
+
const stats = {
|
| 70 |
+
in_flight: 0,
|
| 71 |
+
done: 0
|
| 72 |
+
}
|
| 73 |
+
router.post('/screenshot', async (ctx) => {
|
| 74 |
+
const {request, response} = ctx;
|
| 75 |
+
const q = await request.body().value;
|
| 76 |
|
| 77 |
+
stats.in_flight++;
|
| 78 |
+
const ret = await puppet.run(async page => {
|
| 79 |
+
console.error('running', q, stats)
|
| 80 |
+
await page.goto(q.url, {waitUntil: 'networkidle2', timeout: 60000})
|
| 81 |
+
await page.screenshot({ path: q.path, fullPage: true })
|
| 82 |
+
if (q.logos) {
|
| 83 |
+
await fetch_logos(page, q.id, q.logos)
|
| 84 |
}
|
| 85 |
+
console.error(`screenshot ok: ${q.path}`)
|
| 86 |
+
return {response: 'ok'}
|
| 87 |
})
|
| 88 |
+
stats.in_flight--;
|
| 89 |
+
stats.done++
|
| 90 |
+
response.body = ret
|
| 91 |
+
})
|
| 92 |
+
router.post('/bco', async (ctx) => {
|
| 93 |
+
const {request, response} = ctx;
|
| 94 |
+
const q = await request.body().value;
|
| 95 |
+
const ret = await process(q)
|
| 96 |
|
| 97 |
+
console.error(`ret: ${ret}`)
|
| 98 |
+
response.body = ret
|
| 99 |
+
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
+
app.use(router.routes())
|
| 102 |
+
app.use(router.allowedMethods())
|
| 103 |
+
const addr = '0.0.0.0:8000'
|
| 104 |
+
console.error(`listen on ${addr}`)
|
| 105 |
+
app.listen(addr)
|