File size: 2,459 Bytes
2db46c6
786536d
2db46c6
786536d
9c1fde7
2db46c6
76b0304
0e460eb
 
8cbe070
 
 
 
2db46c6
 
 
 
0e460eb
 
 
 
 
 
 
 
 
 
2db46c6
0e460eb
 
2db46c6
0e460eb
 
 
 
 
 
 
 
 
 
 
 
 
 
2db46c6
0e460eb
 
 
e67626f
 
 
47faa40
e67626f
 
 
47faa40
2245c2f
 
 
 
 
2db46c6
 
8cbe070
76b0304
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import express from 'express';
import { chromium } from 'playwright';
import { runLogin } from './login.js';
import { JSDOM } from 'jsdom';
import { Readability } from '@mozilla/readability';

const PORT = process.env.PORT || 7860;
const TIMEOUT = 15000;
const API_KEY = process.env.API_KEY || '';
const SPACE_ID = process.env.SPACE_ID || '';           //  <-- provided by HF
const HOSTNAME = SPACE_ID
  ? `https://${SPACE_ID.replace(/\//g, '-').replace(/_/g, '-')}.hf.space`
  : '(running outside HF)';

const app = express();
app.use(express.json());

/* ---- screenshot endpoint ---- */
async function shot(url, res) {
  if (!url?.startsWith('http')) return res.status(400).json({ error: 'bad url' });
  const browser = await chromium.launch({ args: ['--no-sandbox'] });
  const page    = await browser.newPage({ viewport: { width:1366, height:768 } });
  await page.goto(url, { waitUntil:'networkidle', timeout: TIMEOUT });
  const buf = await page.screenshot({ type:'png', fullPage:true });
  await browser.close();
  res.type('png').send(buf);
}

app.get ('/shot', (req,res)=> shot(req.query.url,res));
app.post('/shot', (req,res)=> shot(req.body.url,res));

/* ---- extract endpoint ---- */
async function extract(url, res){
  if (!url?.startsWith('http')) return res.status(400).json({ error:'bad url'});
  const browser=await chromium.launch({ args:['--no-sandbox'] });
  const page=await browser.newPage();
  await page.goto(url,{ waitUntil:'networkidle', timeout: TIMEOUT});
  const html=await page.content();
  const title=await page.title();
  const dom=new JSDOM(html,{url});
  const reader=new Readability(dom.window.document).parse() || {};
  const links=[...dom.window.document.querySelectorAll('a[href]')].map(a=>a.href);
  await browser.close();
  res.json({ url,title, text:reader.textContent||'', html:reader.content||'', links});
}

app.get ('/extract',(req,res)=>extract(req.query.url,res));
app.post('/extract',(req,res)=>extract(req.body.url,res));

/* POST /login  → { ok, screenshot_b64 } */
app.post('/login', async (_req, res) => {
  const out = await runLogin();

  if (!out.ok) {
    return res.status(500).json(out);
  }

  const b64 = out.png.toString('base64');           // <-- encode once
  res.json({
    ok: true,
    screenshot_b64: `data:image/png;base64,${b64}`, // or just b64
  });
});

console.log('🛰️  Public URL will be:', HOSTNAME);
app.listen(PORT, '0.0.0.0', ()=>console.log('🚀 API ready on',PORT));