picoclaww / GeminiWebAPI.cjs
Luis Milke
Deploy hybrid Picoclaw with GeminiWebAPI
ee68afa
/**
* GeminiWebAPI v2.1.0
*
* A clean, reusable class for interacting with gemini.google.com via browser automation.
* Supports streaming responses, file uploads, model switching, and tool activation.
*
* @author Luis M.
* @version 2.1.0
* @license MIT
*/
const { chromium } = require('playwright');
const TurndownService = require('turndown');
const fs = require('fs');
const path = require('path');
// ============================================================================
// SELECTORS - Update these if Google changes the UI
// ============================================================================
const SELECTORS = {
// Input area
textbox: 'div[contenteditable="true"][role="textbox"]',
sendButton: 'button[aria-label*="Senden"], button[aria-label*="Send"], button[aria-label="Nachricht senden"]',
micButton: 'button[aria-label*="Mikrofon"]',
// Model switcher
modelSwitcher: '.input-area-switch',
modelMenu: 'div[role="menu"], .mat-mdc-menu-content',
menuItem: 'div[role="menuitem"], button[role="menuitem"], .mat-mdc-menu-item',
// Tools menu
toolsButton: 'button[aria-label="Tools"], button:has-text("Tools")',
// File upload
plusButton: [
'button[aria-label*="Menü „Datei hochladen" öffnen"]',
'button[aria-label*="Hinzufügen"]',
'button[aria-label*="Add"]',
'button:has(mat-icon[data-mat-icon-name="add_2"])',
'button:has(mat-icon[fonticon="add_2"])'
].join(','),
uploadOption: 'text=/Dateien hochladen|Upload files|Datei hochladen|Upload file/',
uploadPreview: 'img[src^="blob:"], mat-chip-row, .file-preview',
progressBar: 'mat-progress-bar, [role="progressbar"]',
// Navigation
hamburgerMenu: 'button[data-test-id="side-nav-menu-button"]',
newChatButton: 'div[role="button"][aria-label="Neuer Chat"], div[role="button"][aria-label="New chat"]',
tempChatButton: 'button[data-test-id="temp-chat-button"]',
// Response
responseContainer: '.markdown',
// Thinking/Reasoning
thinkingContainer: '.thinking-content, [data-thinking], .thought-process, .thought-bubble, [class*="thinking"], [class*="reasoning"]',
// Login indicators
loginIndicators: [
'button[aria-label*="Google Account"]',
'img[alt*="User"], img[alt*="Nutzer"]',
'.gb_d',
'div[aria-label*="Gemini"]'
]
};
// ============================================================================
// DEFAULTS
// ============================================================================
const DEFAULTS = {
headless: true,
model: null,
timeout: 60000,
sessionPath: './user_session.json',
userDataDir: './user_data_server_13'
};
// ============================================================================
// CLASS DEFINITION
// ============================================================================
class GeminiWebAPI {
constructor(options = {}) {
this.options = { ...DEFAULTS, ...options };
this.context = null;
this.page = null;
this.isAuthenticated = false;
this._isClosed = false;
// Initialize Turndown for HTML -> Markdown
this.turndown = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced'
});
this._setupTurndownRules();
}
_setupTurndownRules() {
this.turndown.addRule('code_blocks', {
filter: (node) => node.nodeName === 'PRE' ||
(node.nodeName === 'CODE' && node.parentNode.nodeName !== 'PRE'),
replacement: (content, node) => {
if (node.nodeName === 'PRE') {
const codeNode = node.querySelector('code');
const langMatch = codeNode?.className.match(/language-(\w+)/);
const lang = langMatch ? langMatch[1] : '';
return `\n\`\`\`${lang}\n${node.textContent.trim()}\n\`\`\`\n`;
}
return `\`${content}\``;
}
});
}
// ========================================================================
// PAGE VALIDATION
// ========================================================================
async _ensurePage() {
if (this._isClosed) {
throw new Error("GeminiWebAPI has been closed.");
}
if (!this.context) {
throw new Error("Browser context not initialized. Call auth() first.");
}
try {
if (!this.page || this.page.isClosed()) {
console.log("Page was closed. Creating new page...");
this.page = await this.context.newPage();
await this.page.goto('https://gemini.google.com/app', {
waitUntil: 'networkidle',
timeout: 30000
});
await this.page.waitForTimeout(2000);
}
} catch (e) {
if (e.message.includes('closed') || e.message.includes('Target')) {
throw new Error("Browser was closed. Please call auth() again.");
}
throw e;
}
const url = this.page.url();
if (!url.includes('gemini.google.com')) {
await this.page.goto('https://gemini.google.com/app', {
waitUntil: 'networkidle',
timeout: 30000
});
await this.page.waitForTimeout(2000);
}
}
// ========================================================================
// AUTHENTICATION
// ========================================================================
async auth() {
if (this._isClosed) {
this._isClosed = false;
}
console.log("Initializing GeminiWebAPI...");
try {
this.context = await chromium.launchPersistentContext(
path.resolve(this.options.userDataDir),
{
headless: this.options.headless,
args: ['--disable-blink-features=AutomationControlled'],
viewport: { width: 1920, height: 1080 }
}
);
this.context.on('close', () => {
this._isClosed = true;
this.isAuthenticated = false;
console.log("Browser context was closed.");
});
this.page = this.context.pages()[0] || await this.context.newPage();
await this._loadSession();
console.log("Loading gemini.google.com...");
try {
await this.page.goto('https://gemini.google.com/app', {
waitUntil: 'domcontentloaded',
timeout: 60000
});
} catch (e) {
console.warn("Navigation timeout, checking if page loaded anyway...");
}
await this.page.waitForTimeout(3000);
const isLoggedIn = await this._checkLogin();
if (!isLoggedIn) {
console.log("\n" + "=".repeat(60));
console.log("LOGIN REQUIRED");
console.log("Please log in manually in the browser window.");
console.log("The script will continue automatically once logged in.");
console.log("=".repeat(60) + "\n");
await this._waitForLogin();
}
await this._saveSession();
this.isAuthenticated = true;
console.log("Authentication successful.");
if (this.options.model) {
await this.setModel(this.options.model);
}
return this;
} catch (e) {
console.error("Auth failed:", e.message);
await this.close();
throw e;
}
}
async _checkLogin() {
try {
const textbox = this.page.locator(SELECTORS.textbox).first();
if (await textbox.isVisible({ timeout: 1000 })) {
return true;
}
} catch { }
for (const selector of SELECTORS.loginIndicators) {
try {
const el = this.page.locator(selector).first();
if (await el.isVisible({ timeout: 1000 })) {
return true;
}
} catch { }
}
return false;
}
async _waitForLogin(maxWait = 300000) {
const start = Date.now();
while (Date.now() - start < maxWait) {
if (await this._checkLogin()) return true;
await this.page.waitForTimeout(2000);
}
throw new Error("Login timeout - please try again.");
}
async _loadSession() {
try {
if (fs.existsSync(this.options.sessionPath)) {
const cookies = JSON.parse(fs.readFileSync(this.options.sessionPath, 'utf8'));
await this.context.addCookies(cookies);
console.log("Session loaded.");
}
} catch {
console.log("No existing session found.");
}
}
async _saveSession() {
try {
const cookies = await this.context.cookies();
fs.writeFileSync(this.options.sessionPath, JSON.stringify(cookies, null, 2));
} catch (e) {
console.warn("Could not save session:", e.message);
}
}
// ========================================================================
// SIDEBAR MANAGEMENT
// ========================================================================
async _closeSidebarIfOpen() {
try {
const isOpen = await this.page.evaluate(() => {
const nav = document.querySelector('nav') ||
document.querySelector('mat-sidenav') ||
document.querySelector('[data-test-id="sidenav"]');
if (!nav) return false;
return nav.getBoundingClientRect().width > 100;
});
if (isOpen) {
console.log("Sidebar detected. Closing it...");
const menuBtn = this.page.locator(SELECTORS.hamburgerMenu).first();
if (await menuBtn.isVisible({ timeout: 500 })) {
await menuBtn.click();
await this.page.waitForTimeout(500);
}
}
} catch (e) {
console.log("Error checking sidebar:", e.message);
}
}
async _disableGemClicks() {
try {
await this.page.evaluate(() => {
const selectors = [
'.bots-list-container',
'.bot-list-item',
'bot-list-item',
'.bot-item',
'.bot-new-conversation-button',
'[data-test-id="item"]',
'.bot-name',
'bot-actions-menu'
];
for (const sel of selectors) {
document.querySelectorAll(sel).forEach(el => {
el.style.pointerEvents = 'none';
});
}
});
} catch (e) {
console.warn('Could not disable Gem clicks:', e.message);
}
}
async _dismissOverlays() {
const overlaySelectors = [
'button[aria-label*="Close"]',
'button[aria-label*="Schließen"]',
'button:has-text("Got it")',
'button:has-text("OK")',
'button:has-text("Verstanden")'
];
for (const selector of overlaySelectors) {
try {
const btn = this.page.locator(selector).first();
if (await btn.isVisible({ timeout: 300 })) {
await btn.click({ force: true });
await this.page.waitForTimeout(200);
}
} catch { }
}
}
// ========================================================================
// MAIN API METHODS
// ========================================================================
/**
* Send a message and get the response.
* @param {string} text - The message to send
* @param {string} [filePath] - Optional file path to upload
* @returns {Promise<string>} The response text in markdown
*/
async ask(text, filePath = null) {
await this._ensurePage();
if (!this.isAuthenticated) {
throw new Error("Not authenticated. Call auth() first.");
}
try {
await this._disableGemClicks();
await this.page.waitForSelector(SELECTORS.textbox, { timeout: this.options.timeout });
await this._closeSidebarIfOpen();
await this._dismissOverlays();
if (filePath) {
await this._uploadFile(filePath);
}
await this._injectText(text);
await this._clickSendButton();
return await this._waitForResponse();
} catch (e) {
if (e.message.includes('closed') || e.message.includes('Target')) {
this.isAuthenticated = false;
throw new Error("Browser was closed. Please call auth() again.");
}
throw e;
}
}
/**
* Send a message and stream the response in real-time.
* @param {string} text - The message to send
* @param {Function} onChunk - Callback with (currentText, thinking) on each update
* @param {string} [filePath] - Optional file path to upload
* @returns {Promise<{response: string, thinking: string|null}>}
*/
async askStream(text, onChunk, filePath = null) {
await this._ensurePage();
if (!this.isAuthenticated) {
throw new Error("Not authenticated. Call auth() first.");
}
try {
await this._disableGemClicks();
await this.page.waitForSelector(SELECTORS.textbox, { timeout: this.options.timeout });
await this._closeSidebarIfOpen();
await this._dismissOverlays();
const initialResponseCount = await this.page.locator(SELECTORS.responseContainer).count();
if (filePath) {
await this._uploadFile(filePath);
}
await this._injectText(text);
await this.page.mouse.move(0, 0);
await this._clickSendButton();
return await this._streamResponse(onChunk, initialResponseCount);
} catch (e) {
if (e.message.includes('closed') || e.message.includes('Target')) {
this.isAuthenticated = false;
throw new Error("Browser was closed. Please call auth() again.");
}
throw e;
}
}
async _injectText(text) {
await this.page.evaluate(({ selector, text }) => {
if (document.activeElement) document.activeElement.blur();
const el = document.querySelector(selector);
if (el) {
el.innerText = text;
el.dispatchEvent(new Event('input', { bubbles: true }));
el.focus();
}
}, { selector: SELECTORS.textbox, text });
await this.page.waitForTimeout(300);
}
async _clickSendButton() {
const sendSelectors = [
'button[aria-label*="Senden"]',
'button[aria-label*="Send"]',
'button[aria-label="Nachricht senden"]',
'button[data-test-id="send-button"]',
'button.send-button',
'.send-button-container button',
'button:has(mat-icon[data-mat-icon-name="send"])',
'button:has(mat-icon[fonticon="send"])'
];
for (const sel of sendSelectors) {
try {
const btn = this.page.locator(sel).first();
if (await btn.isVisible({ timeout: 300 })) {
await btn.evaluate(b => b.click());
return;
}
} catch { }
}
console.warn("Send button not found. Using Enter key as fallback.");
await this.page.keyboard.press('Enter');
}
async _uploadFile(filePath) {
console.log(`Uploading: ${path.basename(filePath)}...`);
try {
const plusBtn = this.page.locator(SELECTORS.plusButton).first();
if (await plusBtn.isVisible({ timeout: 5000 })) {
await plusBtn.click();
await this.page.waitForTimeout(1000);
const uploadOpt = this.page.locator(SELECTORS.uploadOption).first();
if (await uploadOpt.isVisible({ timeout: 3000 })) {
const chooser = this.page.waitForEvent('filechooser');
await uploadOpt.click();
const fileChooser = await chooser;
await fileChooser.setFiles(filePath);
try {
await this.page.locator(SELECTORS.uploadPreview).first()
.waitFor({ state: 'visible', timeout: 30000 });
const progress = this.page.locator(SELECTORS.progressBar);
if (await progress.count() > 0 && await progress.first().isVisible()) {
await progress.first().waitFor({ state: 'hidden', timeout: 30000 });
}
await this.page.waitForTimeout(1000);
} catch {
console.warn("Upload verification failed, proceeding...");
}
}
}
} catch (e) {
console.warn("File upload failed:", e.message);
}
}
async _waitForResponse() {
console.log("Waiting for response...");
try {
await this.page.waitForTimeout(1000);
const indicator = this.page.locator(`${SELECTORS.sendButton}, ${SELECTORS.micButton}`);
await indicator.first().waitFor({ state: 'visible', timeout: this.options.timeout });
await this.page.waitForTimeout(500);
} catch (e) {
console.warn(`Response timeout after ${this.options.timeout / 1000}s`);
}
let attempts = 3;
while (attempts > 0) {
const responses = await this.page.$$(SELECTORS.responseContainer);
if (responses.length > 0) {
const lastResponse = responses[responses.length - 1];
const html = await lastResponse.innerHTML();
const markdown = this.turndown.turndown(html);
if (markdown.trim()) {
return markdown;
}
}
attempts--;
if (attempts > 0) await this.page.waitForTimeout(1000);
}
return "Error: No response found.";
}
async _streamResponse(onChunk, initialCount = 0) {
let lastText = '';
let lastThinking = '';
let isComplete = false;
const startTime = Date.now();
while (!isComplete && (Date.now() - startTime) < this.options.timeout) {
try {
const responses = await this.page.$$(SELECTORS.responseContainer);
const currentCount = responses.length;
if (currentCount > initialCount) {
const lastResponse = responses[responses.length - 1];
const html = await lastResponse.innerHTML();
const currentText = this.turndown.turndown(html);
let thinking = null;
try {
const thinkingEl = await this.page.$(SELECTORS.thinkingContainer);
if (thinkingEl) {
const thinkingHtml = await thinkingEl.innerHTML();
thinking = this.turndown.turndown(thinkingHtml);
}
} catch { }
if ((currentText && currentText !== lastText) || (thinking && thinking !== lastThinking)) {
lastText = currentText;
lastThinking = thinking || lastThinking;
if (onChunk) onChunk(currentText, thinking);
}
}
const indicator = this.page.locator(`${SELECTORS.sendButton}, ${SELECTORS.micButton}`);
isComplete = await indicator.first().isVisible({ timeout: 100 });
if (!isComplete) {
await this.page.waitForTimeout(200);
}
} catch {
await this.page.waitForTimeout(200);
}
}
await this.page.waitForTimeout(300);
const responses = await this.page.$$(SELECTORS.responseContainer);
let finalText = lastText;
if (responses.length > 0) {
const lastResponse = responses[responses.length - 1];
const html = await lastResponse.innerHTML();
finalText = this.turndown.turndown(html);
}
return {
response: finalText || "Error: No response found.",
thinking: lastThinking || null
};
}
/**
* Get the thinking/reasoning content from the last response.
* @returns {Promise<string|null>}
*/
async getThinking() {
await this._ensurePage();
try {
const el = await this.page.$(SELECTORS.thinkingContainer);
if (el) {
const html = await el.innerHTML();
return this.turndown.turndown(html);
}
} catch (e) {
console.warn("Could not extract thinking:", e.message);
}
return null;
}
// ========================================================================
// SESSION CONTROL
// ========================================================================
/**
* Start a new chat session.
* @param {boolean} [temporary=false] - If true, starts a temporary chat
*/
async startNewChat(temporary = false) {
await this._ensurePage();
console.log(`Starting ${temporary ? 'temporary' : 'new'} chat...`);
await this._disableGemClicks();
try {
const newBtn = this.page.locator(SELECTORS.newChatButton).first();
const tempBtn = this.page.locator(SELECTORS.tempChatButton).first();
const isNewBtnVisible = await newBtn.isVisible({ timeout: 1000 }).catch(() => false);
const isTempBtnVisible = await tempBtn.isVisible({ timeout: 1000 }).catch(() => false);
if (!isNewBtnVisible && !isTempBtnVisible) {
const menu = this.page.locator(SELECTORS.hamburgerMenu).first();
if (await menu.isVisible({ timeout: 3000 })) {
await menu.click();
await this.page.waitForTimeout(500);
}
}
if (temporary) {
if (await tempBtn.isVisible({ timeout: 3000 })) {
await tempBtn.click();
await this.page.waitForTimeout(2000);
await this.page.waitForSelector(SELECTORS.textbox, { timeout: 10000 });
return;
}
}
if (await newBtn.isVisible({ timeout: 3000 })) {
await newBtn.click();
} else {
console.warn("New chat button not found, navigating to home.");
await this.page.goto('https://gemini.google.com/app');
}
await this.page.waitForTimeout(1000);
await this.page.waitForSelector(SELECTORS.textbox, { timeout: 10000 });
} catch (e) {
console.warn("New chat failed, navigating to home:", e.message);
await this.page.goto('https://gemini.google.com/app');
await this.page.waitForTimeout(2000);
}
}
/**
* Switch to a different model.
* @param {string} modelName - The model name (e.g., "Flash", "Pro", "Thinking")
*/
async setModel(modelName) {
await this._ensurePage();
console.log(`Switching to model: ${modelName}...`);
try {
const switcher = this.page.locator(SELECTORS.modelSwitcher).first();
await switcher.waitFor({ state: 'visible', timeout: 5000 });
await switcher.click();
await this.page.waitForTimeout(500);
const options = this.page.locator(SELECTORS.menuItem);
const count = await options.count();
for (let i = 0; i < count; i++) {
const text = await options.nth(i).innerText();
// Exact match
if (text.trim() === modelName) {
await options.nth(i).click({ force: true });
console.log(`Switched to ${modelName}.`);
await this.page.waitForTimeout(500);
return;
}
// Word match
const words = text.split(/\s+/).map(w => w.toLowerCase().replace(/[^a-z0-9]/g, ''));
if (words.includes(modelName.toLowerCase())) {
await options.nth(i).click({ force: true });
console.log(`Switched to ${modelName}.`);
await this.page.waitForTimeout(500);
return;
}
}
console.warn(`Model '${modelName}' not found.`);
await this.page.keyboard.press('Escape');
} catch (e) {
console.warn("Model switch failed:", e.message);
try { await this.page.keyboard.press('Escape'); } catch { }
}
}
/**
* Activate a tool from the Tools menu.
* @param {string} toolName - The tool name (e.g., "Deep Research", "Canvas")
*/
async activateTool(toolName) {
await this._ensurePage();
console.log(`Activating tool: ${toolName}...`);
try {
const toolsBtn = this.page.locator(SELECTORS.toolsButton).first();
if (await toolsBtn.isVisible({ timeout: 5000 })) {
await toolsBtn.click();
await this.page.waitForTimeout(1000);
const tool = this.page.locator(`text="${toolName}"`).first();
if (await tool.isVisible({ timeout: 3000 })) {
await tool.click({ force: true });
console.log(`Tool '${toolName}' activated.`);
} else {
console.warn(`Tool '${toolName}' not found.`);
await this.page.keyboard.press('Escape');
}
} else {
console.warn("Tools button not found.");
}
} catch (e) {
console.warn("Tool activation failed:", e.message);
try { await this.page.keyboard.press('Escape'); } catch { }
}
}
// ========================================================================
// CLEANUP
// ========================================================================
/**
* Close the browser and cleanup.
*/
async close() {
if (this._isClosed) return;
console.log("Closing GeminiWebAPI...");
this._isClosed = true;
this.isAuthenticated = false;
try {
await this._saveSession();
} catch { }
try {
if (this.context) {
await this.context.close();
}
} catch (e) {
console.warn("Error closing browser:", e.message);
}
this.context = null;
this.page = null;
}
}
module.exports = GeminiWebAPI;