Echo-AI-official's picture
Upload 280 files
0e759d2 verified
import { z } from "zod";
import { logger as _logger } from "../../../lib/logger";
import { robustFetch } from "./fetch";
import fs from "fs/promises";
import { configDotenv } from "dotenv";
configDotenv();
// Define schemas outside the function scope
const tokenUsageDetailSchema = z.object({
input_tokens: z.number().int(),
output_tokens: z.number().int(),
total_cost: z.number().nullable(), // Allows number or null
});
// Schema for an individual scraped page object
const scrapedPageSchema = z.object({
html: z.string(),
reason: z.string(),
page: z.union([z.string(), z.number()]),
});
// Main schema for the structure returned by the smart-scrape endpoint
const smartScrapeResultSchema = z.object({
sessionId: z.string(),
success: z.boolean(),
scrapedPages: z.array(scrapedPageSchema),
tokenUsage: z.number(),
// z.record(
// z.string(), // Key is the model name (string)
// tokenUsageDetailSchema, // Value matches the detail schema
// ),
});
// Infer the TypeScript type from the Zod schema
export type SmartScrapeResult = z.infer<typeof smartScrapeResultSchema>;
/**
* Sends a POST request to the internal /smart-scrape endpoint to extract
* structured data from a URL based on a prompt.
*
* @param url The URL of the page to scrape.
* @param prompt The prompt guiding the data extraction.
* @returns A promise that resolves to an object matching the SmartScrapeResult type.
* @throws Throws an error if the request fails or the response is invalid.
*/
export async function smartScrape({
url,
prompt,
sessionId,
extractId,
scrapeId,
beforeSubmission,
}: {
url: string,
prompt: string,
sessionId?: string,
extractId?: string,
scrapeId?: string,
beforeSubmission?: () => unknown,
}): Promise<SmartScrapeResult> {
let logger = _logger.child({
method: "smartScrape",
module: "smartScrape",
extractId,
url,
prompt,
sessionId,
scrapeId,
});
try {
logger.info("Initiating smart scrape request");
// Pass schema type as generic parameter to robustFeth
const response = await robustFetch<typeof smartScrapeResultSchema>({
url: `${process.env.SMART_SCRAPE_API_URL}/smart-scrape`,
method: "POST",
body: {
url,
prompt,
userProvidedId: sessionId ?? undefined,
extractId,
scrapeId,
models: {
thinkingModel: {
model: "gemini-2.5-pro-preview-03-25",
provider: "vertex",
supportTools: true,
toolChoice: "required",
cost: {
input: 1.3,
output: 5,
},
},
toolModel: {
model: "gemini-2.0-flash",
provider: "google",
},
},
},
schema: smartScrapeResultSchema, // Pass the schema instance for validation
logger,
mock: null, // Keep mock null if not mocking
});
// Check if the response indicates a 500 error
// Use type assertion to handle the error response structure
const errorResponse = response as unknown as {
success: boolean;
error?: string;
details?: string;
};
if (
errorResponse &&
errorResponse.success === false &&
errorResponse.error
) {
if (errorResponse.error === "Cost limit exceeded") {
throw new Error("Cost limit exceeded", {
cause: { tokenUsage: (errorResponse as any).tokenUsage },
});
}
logger.error("Smart scrape returned error response", {
url,
prompt,
error: errorResponse.error,
details: errorResponse.details || "No details provided",
});
throw new Error(
`Smart scrape failed: ${errorResponse.error}${errorResponse.details ? ` - ${errorResponse.details}` : ""}`,
);
}
logger.info("Smart scrape successful", {
sessionId: response.sessionId,
});
logger.info("Smart scrape cost $" + response.tokenUsage);
return response; // The response type now matches SmartScrapeResult
} catch (error) {
// Safely extract error information without circular references
const errorInfo = {
message: error instanceof Error ? error.message : String(error),
name: error instanceof Error ? error.name : "Unknown",
stack: error instanceof Error ? error.stack : undefined,
// Extract cause safely if it exists
cause:
error instanceof Error && error.cause
? error.cause instanceof Error
? {
message: error.cause.message,
name: error.cause.name,
stack: error.cause.stack,
}
: typeof error.cause === "object"
? {
...Object.fromEntries(
Object.entries(error.cause).filter(
([_, v]) => v !== null && typeof v !== "object",
),
),
error:
(error.cause as any)?.error?.message ||
(error.cause as any)?.error,
}
: String(error.cause)
: undefined,
};
logger.error("Smart scrape request failed", {
error: JSON.stringify(errorInfo),
});
// Rethrowing the error to be handled by the caller
throw new Error(`Failed to smart scrape URL: ${url}`, { cause: error });
}
}