Instructions to use ayjays132/Phillnet-2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use ayjays132/Phillnet-2 with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="ayjays132/Phillnet-2", trust_remote_code=True)
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("ayjays132/Phillnet-2", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("ayjays132/Phillnet-2", trust_remote_code=True)
messages = [
    {"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use ayjays132/Phillnet-2 with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "ayjays132/Phillnet-2"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "ayjays132/Phillnet-2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/ayjays132/Phillnet-2

SGLang

How to use ayjays132/Phillnet-2 with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "ayjays132/Phillnet-2" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "ayjays132/Phillnet-2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "ayjays132/Phillnet-2" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "ayjays132/Phillnet-2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use ayjays132/Phillnet-2 with Docker Model Runner:
```
docker model run hf.co/ayjays132/Phillnet-2
```

Phillnet-2 / Tools /browser_ocr.js

ayjays132

Upload 478 files

101858b verified 3 days ago

raw

history blame contribute delete

11.4 kB

	'use strict';

	/**
	* ocr.js — Advanced Tesseract OCR pipeline
	*
	* BACKWARDS COMPATIBLE: original shape { text, confidence } is always present.
	* All new extraction passes always run and enrich the output alongside it.
	*
	* Usage:
	* node ocr.js <imagePath> [options]
	*
	* Options (all optional — nothing breaks without them):
	* --lang=<lang> Tesseract language code(s), e.g. eng, eng+fra (default: eng)
	* --psm=<0-13> Page segmentation mode (default: multi-pass best-wins)
	* --oem=<0-3> OCR engine mode (default: 3 = LSTM+legacy)
	* --no-words Omit per-word detail from output
	* --no-lines Omit per-line detail from output
	* --no-paragraphs Omit per-paragraph detail from output
	* --hocr Include raw hOCR string in output
	* --tsv Include raw TSV string in output
	* --threshold=<0-100> Minimum confidence to include a word (default: 0)
	* --pretty Pretty-print JSON output
	*
	* Output (always includes legacy fields, new fields added alongside):
	* {
	* text: string — full extracted text (LEGACY)
	* confidence: number — overall confidence 0-100 (LEGACY)
	* lang: string — language(s) used
	* psm: number — winning PSM mode used
	* oem: number — OEM used
	* words: Word[] — per-word detail (unless --no-words)
	* lines: Line[] — per-line detail (unless --no-lines)
	* paragraphs: Para[] — per-paragraph detail (unless --no-paragraphs)
	* blocks: Block[] — per-block detail
	* multiPass: Pass[] — all PSM attempts with their scores
	* hocr: string — raw hOCR markup (if --hocr)
	* tsv: string — raw TSV data (if --tsv)
	* version: string — pipeline version for consumers
	* }
	*/

	const Tesseract = require('tesseract.js');

	// ─── Pipeline version ────────────────────────────────────────────────────────
	const PIPELINE_VERSION = '2.0.0';

	// ─── PSM descriptions (for metadata) ─────────────────────────────────────────
	const PSM_LABELS = {
	0: 'Orientation and script detection only',
	1: 'Automatic page segmentation with OSD',
	2: 'Automatic page segmentation, no OSD or OCR',
	3: 'Fully automatic page segmentation, no OSD (default)',
	4: 'Assume a single column of text of variable sizes',
	5: 'Assume a single uniform block of vertically aligned text',
	6: 'Assume a single uniform block of text',
	7: 'Treat the image as a single text line',
	8: 'Treat the image as a single word',
	9: 'Treat the image as a single word in a circle',
	10: 'Treat the image as a single character',
	11: 'Sparse text - find as much text as possible',
	12: 'Sparse text with OSD',
	13: 'Raw line - treat the image as a single text line, bypassing Tesseract hacks',
	};

	// ─── Argument parser ──────────────────────────────────────────────────────────
	function parseArgs(argv) {
	const args = argv.slice(2);
	const opts = {
	imagePath: null,
	lang: 'eng',
	psm: null, // null = multi-pass
	oem: 3,
	noWords: false,
	noLines: false,
	noParagraphs: false,
	hocr: false,
	tsv: false,
	threshold: 0,
	pretty: false,
	};

	for (const arg of args) {
	if (arg.startsWith('--lang=')) { opts.lang = arg.split('=')[1]; }
	else if (arg.startsWith('--psm=')) { opts.psm = parseInt(arg.split('=')[1], 10); }
	else if (arg.startsWith('--oem=')) { opts.oem = parseInt(arg.split('=')[1], 10); }
	else if (arg === '--no-words') { opts.noWords = true; }
	else if (arg === '--no-lines') { opts.noLines = true; }
	else if (arg === '--no-paragraphs') { opts.noParagraphs = true; }
	else if (arg === '--hocr') { opts.hocr = true; }
	else if (arg === '--tsv') { opts.tsv = true; }
	else if (arg.startsWith('--threshold=')) { opts.threshold = parseFloat(arg.split('=')[1]); }
	else if (arg === '--pretty') { opts.pretty = true; }
	else if (!arg.startsWith('--')) { opts.imagePath = arg; }
	}
	return opts;
	}

	// ─── Run a single Tesseract pass ──────────────────────────────────────────────
	async function runPass(imagePath, lang, psm, oem) {
	const result = await Tesseract.recognize(imagePath, lang, {
	tessedit_pageseg_mode: psm,
	tessedit_ocr_engine_mode: oem,
	// Always request rich data so we can extract words/lines/blocks
	tessjs_create_hocr: '1',
	tessjs_create_tsv: '1',
	});
	return result;
	}

	// ─── Extract structured data from a Tesseract result ─────────────────────────
	function extractStructured(result, opts) {
	const data = result.data \|\| {};
	const words = (data.words \|\| [])
	.filter(w => (w.confidence ?? 0) >= opts.threshold)
	.map(w => ({
	text: w.text,
	confidence: w.confidence ?? null,
	bbox: w.bbox ? { x0: w.bbox.x0, y0: w.bbox.y0, x1: w.bbox.x1, y1: w.bbox.y1 } : null,
	fontName: w.font_name ?? null,
	fontSize: w.font_size ?? null,
	bold: w.is_bold ?? null,
	italic: w.is_italic ?? null,
	underlined: w.is_underlined ?? null,
	inDict: w.in_dict ?? null,
	}));

	const lines = (data.lines \|\| []).map(l => ({
	text: l.text,
	confidence: l.confidence ?? null,
	bbox: l.bbox ? { x0: l.bbox.x0, y0: l.bbox.y0, x1: l.bbox.x1, y1: l.bbox.y1 } : null,
	wordCount: (l.words \|\| []).length,
	}));

	const paragraphs = (data.paragraphs \|\| []).map(p => ({
	text: p.text,
	confidence: p.confidence ?? null,
	bbox: p.bbox ? { x0: p.bbox.x0, y0: p.bbox.y0, x1: p.bbox.x1, y1: p.bbox.y1 } : null,
	lineCount: (p.lines \|\| []).length,
	}));

	const blocks = (data.blocks \|\| []).map(b => ({
	text: b.text,
	confidence: b.confidence ?? null,
	bbox: b.bbox ? { x0: b.bbox.x0, y0: b.bbox.y0, x1: b.bbox.x1, y1: b.bbox.y1 } : null,
	paragraphCount: (b.paragraphs \|\| []).length,
	}));

	return { words, lines, paragraphs, blocks };
	}

	// ─── Multi-pass strategy: try several PSMs, pick best confidence ──────────────
	const MULTI_PASS_PSMS = [3, 6, 11, 4]; // ordered by general utility

	async function multiPass(imagePath, lang, oem, opts) {
	const passes = [];

	for (const psm of MULTI_PASS_PSMS) {
	try {
	const result = await runPass(imagePath, lang, psm, oem);
	const data = result.data \|\| {};
	const text = (data.text \|\| '').trim();
	const confidence = data.confidence ?? 0;
	passes.push({
	psm,
	psmLabel: PSM_LABELS[psm] ?? 'unknown',
	text,
	confidence,
	result, // keep full result for winner extraction
	});
	} catch (err) {
	passes.push({ psm, psmLabel: PSM_LABELS[psm] ?? 'unknown', error: String(err) });
	}
	}

	// Pick the pass with the highest confidence that also produced text
	const ranked = passes
	.filter(p => p.text && !p.error)
	.sort((a, b) => b.confidence - a.confidence);

	const winner = ranked[0] \|\| passes.find(p => !p.error) \|\| passes[0];

	return { passes, winner };
	}

	// ─── Main ─────────────────────────────────────────────────────────────────────
	async function main() {
	const opts = parseArgs(process.argv);

	if (!opts.imagePath) {
	// LEGACY: exact same error format as original
	console.error(JSON.stringify({ error: 'missing image path' }));
	process.exit(1);
	}

	try {
	let winnerResult, winnerPsm, allPasses;

	if (opts.psm !== null) {
	// Single-pass mode (explicit --psm)
	const result = await runPass(opts.imagePath, opts.lang, opts.psm, opts.oem);
	winnerResult = result;
	winnerPsm = opts.psm;
	allPasses = [{
	psm: opts.psm,
	psmLabel: PSM_LABELS[opts.psm] ?? 'unknown',
	text: (result.data?.text \|\| '').trim(),
	confidence: result.data?.confidence ?? null,
	}];
	} else {
	// Multi-pass mode: run all strategies, pick winner
	const { passes, winner } = await multiPass(opts.imagePath, opts.lang, opts.oem, opts);
	winnerResult = winner.result;
	winnerPsm = winner.psm;
	allPasses = passes.map(p => ({
	psm: p.psm,
	psmLabel: p.psmLabel,
	text: p.text ?? null,
	confidence: p.confidence ?? null,
	error: p.error ?? undefined,
	}));
	}

	const data = winnerResult?.data \|\| {};

	// ── Extract all structured layers (always runs) ──────────────────────────
	const structured = extractStructured(winnerResult, opts);

	// ── Build output — LEGACY fields first, new fields appended ─────────────
	const output = {
	// ── LEGACY (always present, never moved or renamed) ───────────────────
	text: (data.text \|\| '').trim(),
	confidence: data.confidence ?? null,

	// ── New metadata ──────────────────────────────────────────────────────
	version: PIPELINE_VERSION,
	lang: opts.lang,
	psm: winnerPsm,
	psmLabel: PSM_LABELS[winnerPsm] ?? 'unknown',
	oem: opts.oem,

	// ── Multi-pass summary ────────────────────────────────────────────────
	multiPass: allPasses,

	// ── Structured layers (conditionally omitted by flags) ────────────────
	...(!opts.noWords && { words: structured.words }),
	...(!opts.noLines && { lines: structured.lines }),
	...(!opts.noParagraphs && { paragraphs: structured.paragraphs }),
	blocks: structured.blocks,

	// ── Optional raw formats ──────────────────────────────────────────────
	...(opts.hocr && { hocr: data.hocr ?? null }),
	...(opts.tsv && { tsv: data.tsv ?? null }),
	};

	// LEGACY: same stdout channel, same JSON shape (with additions)
	console.log(opts.pretty
	? JSON.stringify(output, null, 2)
	: JSON.stringify(output));

	} catch (err) {
	// LEGACY: exact same error format as original
	console.error(JSON.stringify({ error: String(err) }));
	process.exit(1);
	}
	}

	main();