nomagick's picture
fix: remove readerlm automatic retry
bc8dea9 unverified
import { AsyncService } from 'civkit/async-service';
import { singleton } from 'tsyringe';
import { PageSnapshot } from './puppeteer';
import { GlobalLogger } from './logger';
import _ from 'lodash';
import { AssertionFailureError } from 'civkit';
import { LLMManager } from '../shared/services/common-llm';
import { JSDomControl } from './jsdom';
const tripleBackTick = '```';
@singleton()
export class LmControl extends AsyncService {
logger = this.globalLogger.child({ service: this.constructor.name });
constructor(
protected globalLogger: GlobalLogger,
protected commonLLM: LLMManager,
protected jsdomControl: JSDomControl,
) {
super(...arguments);
}
override async init() {
await this.dependencyReady();
this.emit('ready');
}
async* geminiFromBrowserSnapshot(snapshot?: PageSnapshot & {
pageshotUrl?: string,
}) {
const pageshot = snapshot?.pageshotUrl || snapshot?.pageshot;
if (!pageshot) {
throw new AssertionFailureError('Screenshot of the page is not available');
}
const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg');
const it = this.commonLLM.iterRun('vertex-gemini-1.5-flash-002', {
prompt: [
`HTML: \n${html}\n\nSCREENSHOT: \n`,
typeof pageshot === 'string' ? new URL(pageshot) : pageshot,
`Convert this webpage into a markdown source file that does not contain HTML tags, retaining the page language and visual structures.`,
],
options: {
system: 'You are ReaderLM-v7, a model that generates Markdown source files only. No HTML, notes and chit-chats allowed',
stream: true
}
});
const chunks: string[] = [];
for await (const txt of it) {
chunks.push(txt);
const output: PageSnapshot = {
...snapshot,
parsed: {
...snapshot?.parsed,
textContent: chunks.join(''),
}
};
yield output;
}
return;
}
async* readerLMMarkdownFromSnapshot(snapshot?: PageSnapshot) {
if (!snapshot) {
throw new AssertionFailureError('Snapshot of the page is not available');
}
const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg');
const it = this.commonLLM.iterRun('readerlm-v2', {
prompt: `Extract the main content from the given HTML and convert it to Markdown format.\n\n${tripleBackTick}html\n${html}\n${tripleBackTick}\n`,
options: {
// system: 'You are an AI assistant developed by VENDOR_NAME',
stream: true,
modelSpecific: {
top_k: 1,
temperature: 0,
repetition_penalty: 1.13,
presence_penalty: 0.25,
frequency_penalty: 0.25,
max_tokens: 8192,
}
},
maxTry: 1,
});
const chunks: string[] = [];
for await (const txt of it) {
chunks.push(txt);
const output: PageSnapshot = {
...snapshot,
parsed: {
...snapshot?.parsed,
textContent: chunks.join(''),
}
};
yield output;
}
return;
}
async* readerLMFromSnapshot(schema?: string, instruction: string = 'Infer useful information from the HTML and present it in a structured JSON object.', snapshot?: PageSnapshot) {
if (!snapshot) {
throw new AssertionFailureError('Snapshot of the page is not available');
}
const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg');
const it = this.commonLLM.iterRun('readerlm-v2', {
prompt: `${instruction}\n\n${tripleBackTick}html\n${html}\n${tripleBackTick}\n${schema ? `The JSON schema:\n${tripleBackTick}json\n${schema}\n${tripleBackTick}\n` : ''}`,
options: {
// system: 'You are an AI assistant developed by VENDOR_NAME',
stream: true,
modelSpecific: {
top_k: 1,
temperature: 0,
repetition_penalty: 1.13,
presence_penalty: 0.25,
frequency_penalty: 0.25,
max_tokens: 8192,
}
},
maxTry: 1,
});
const chunks: string[] = [];
for await (const txt of it) {
chunks.push(txt);
const output: PageSnapshot = {
...snapshot,
parsed: {
...snapshot?.parsed,
textContent: chunks.join(''),
}
};
yield output;
}
return;
}
}