File size: 2,363 Bytes
0e759d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import {
  Document,
} from "../../../controllers/v1/types";
import { Meta } from "..";
import { logger } from "../../../lib/logger";
import { parseMarkdown } from "../../../lib/html-to-markdown";
import { smartScrape, SmartScrapeResult } from "../lib/smartScrape";


export async function performAgent(
  meta: Meta,
  document: Document,
): Promise<Document> {
  if (meta.options.agent?.prompt) {
    const url: string | undefined = document.url || document.metadata.sourceURL

    if (!url) {
      logger.error("document.url or document.metadata.sourceURL is undefined -- this is unexpected");
      // throw new Error("document.url or document.metadata.sourceURL is undefined -- this is unexpected");
      return document;
    }

    const prompt = meta.options.agent?.prompt ?? undefined
    const sessionId = meta.options.agent?.sessionId ?? undefined

    let smartscrapeResults: SmartScrapeResult;
    try {
      smartscrapeResults = await smartScrape({
        url,
        prompt,
        sessionId,
        scrapeId: meta.id,
      })
    } catch (error) {
      if (error instanceof Error && error.message === "Cost limit exceeded") {
        logger.error("Cost limit exceeded", { error })
        document.warning = "Smart scrape cost limit exceeded." + (document.warning ? " " + document.warning : "")
        return document;
      } else {
        throw error;
      }
    }

    const html = smartscrapeResults.scrapedPages[smartscrapeResults.scrapedPages.length - 1].html

    if (meta.options.formats.includes("markdown")) {
      const markdown = await parseMarkdown(html)
      document.markdown = markdown
    }
    if (meta.options.formats.includes("html")) {
      document.html = html
    }

    if (document.metadata.costTracking) {
      document.metadata.costTracking.smartScrapeCallCount++;
      document.metadata.costTracking.smartScrapeCost = document.metadata.costTracking.smartScrapeCost + smartscrapeResults.tokenUsage;
      document.metadata.costTracking.totalCost = document.metadata.costTracking.totalCost + smartscrapeResults.tokenUsage;
    } else {
      document.metadata.costTracking = {
        smartScrapeCallCount: 1,
        smartScrapeCost: smartscrapeResults.tokenUsage,
        otherCallCount: 0,
        otherCost: 0,
        totalCost: smartscrapeResults.tokenUsage,
      }
    }
  }

  return document;
}