nomagick commited on
Commit
8a2b095
·
unverified ·
1 Parent(s): 4f284f5

fix: give expireAt for image cache

Browse files
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -53,8 +53,6 @@ export class CrawlerHost extends RPCHost {
53
 
54
  turnDownPlugins = [require('turndown-plugin-gfm').gfm];
55
 
56
- imageShortUrlPrefix?: string;
57
-
58
  constructor(
59
  protected globalLogger: Logger,
60
  protected puppeteerControl: PuppeteerControl,
@@ -78,13 +76,13 @@ export class CrawlerHost extends RPCHost {
78
 
79
  let contentText = '';
80
  if (toBeTurnedToMd) {
81
- const urlToAltMap: { [k: string]: { shortDigest: string, alt?: string; }; } = {};
82
  const tasks = (snapshot.imgs || []).map(async (x) => {
83
- const r = await this.altTextService.getAltTextAndShortDigest(x).catch((err)=> {
84
  this.logger.warn(`Failed to get alt text for ${x.src}`, { err: marshalErrorLike(err) });
85
  return undefined;
86
  });
87
- if (r) {
88
  urlToAltMap[x.src.trim()] = r;
89
  }
90
  });
@@ -103,7 +101,7 @@ export class CrawlerHost extends RPCHost {
103
  const mapped = urlToAltMap[src];
104
  imgIdx++;
105
  if (mapped) {
106
- return `![Image ${imgIdx}: ${mapped.alt || alt}](${this.imageShortUrlPrefix ? `${this.imageShortUrlPrefix}/${mapped.shortDigest}` : src})`;
107
  }
108
  return `![Image ${imgIdx}: ${alt}](${src})`;
109
  }
@@ -115,7 +113,7 @@ export class CrawlerHost extends RPCHost {
115
  if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
116
  contentText = turnDownService.turndown(snapshot.html);
117
  }
118
- if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
119
  contentText = snapshot.text;
120
  }
121
 
 
53
 
54
  turnDownPlugins = [require('turndown-plugin-gfm').gfm];
55
 
 
 
56
  constructor(
57
  protected globalLogger: Logger,
58
  protected puppeteerControl: PuppeteerControl,
 
76
 
77
  let contentText = '';
78
  if (toBeTurnedToMd) {
79
+ const urlToAltMap: { [k: string]: string | undefined; } = {};
80
  const tasks = (snapshot.imgs || []).map(async (x) => {
81
+ const r = await this.altTextService.getAltText(x).catch((err: any) => {
82
  this.logger.warn(`Failed to get alt text for ${x.src}`, { err: marshalErrorLike(err) });
83
  return undefined;
84
  });
85
+ if (r && x.src) {
86
  urlToAltMap[x.src.trim()] = r;
87
  }
88
  });
 
101
  const mapped = urlToAltMap[src];
102
  imgIdx++;
103
  if (mapped) {
104
+ return `![Image ${imgIdx}: ${mapped || alt}](${src})`;
105
  }
106
  return `![Image ${imgIdx}: ${alt}](${src})`;
107
  }
 
113
  if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
114
  contentText = turnDownService.turndown(snapshot.html);
115
  }
116
+ if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
117
  contentText = snapshot.text;
118
  }
119
 
backend/functions/src/services/alt-text.ts CHANGED
@@ -44,32 +44,33 @@ export class AltTextService extends AsyncService {
44
  }
45
  }
46
 
47
- async getAltTextAndShortDigest(imgBrief: ImgBrief) {
48
  if (!imgBrief.src) {
49
  return undefined;
50
  }
 
 
 
51
  const digest = md5Hasher.hash(imgBrief.src);
52
  const shortDigest = Buffer.from(digest, 'hex').toString('base64url');
53
 
54
  const existing = await ImgAlt.fromFirestore(shortDigest);
55
 
56
- if (existing?.generatedAlt) {
57
- return {
58
- shortDigest,
59
- alt: existing.generatedAlt,
60
- };
61
  }
62
 
63
- let generatedCaption;
64
 
65
- if (!imgBrief.alt) {
66
- try {
67
- generatedCaption = await this.caption(imgBrief.src);
68
- } catch (err) {
69
- this.logger.warn(`Unable to generate alt text for ${imgBrief.src}`, { err });
70
- }
71
  }
72
 
 
 
 
73
  await ImgAlt.COLLECTION.doc(shortDigest).set(
74
  {
75
  _id: shortDigest,
@@ -79,13 +80,11 @@ export class AltTextService extends AsyncService {
79
  urlDigest: digest,
80
  originalAlt: imgBrief.alt || '',
81
  generatedAlt: generatedCaption || '',
82
- createdAt: new Date()
 
83
  }, { merge: true }
84
  );
85
 
86
- return {
87
- shortDigest,
88
- alt: generatedCaption,
89
- };
90
  }
91
  }
 
44
  }
45
  }
46
 
47
+ async getAltText(imgBrief: ImgBrief) {
48
  if (!imgBrief.src) {
49
  return undefined;
50
  }
51
+ if (imgBrief.alt) {
52
+ return imgBrief.alt;
53
+ }
54
  const digest = md5Hasher.hash(imgBrief.src);
55
  const shortDigest = Buffer.from(digest, 'hex').toString('base64url');
56
 
57
  const existing = await ImgAlt.fromFirestore(shortDigest);
58
 
59
+ if (existing) {
60
+ return existing.generatedAlt || existing.originalAlt || '';
 
 
 
61
  }
62
 
63
+ let generatedCaption = '';
64
 
65
+ try {
66
+ generatedCaption = await this.caption(imgBrief.src);
67
+ } catch (err) {
68
+ this.logger.warn(`Unable to generate alt text for ${imgBrief.src}`, { err });
 
 
69
  }
70
 
71
+ // Don't try again until the next day
72
+ const expireMixin = generatedCaption ? {} : { expireAt: new Date(Date.now() + 1000 * 3600 * 24) };
73
+
74
  await ImgAlt.COLLECTION.doc(shortDigest).set(
75
  {
76
  _id: shortDigest,
 
80
  urlDigest: digest,
81
  originalAlt: imgBrief.alt || '',
82
  generatedAlt: generatedCaption || '',
83
+ createdAt: new Date(),
84
+ ...expireMixin
85
  }, { merge: true }
86
  );
87
 
88
+ return generatedCaption;
 
 
 
89
  }
90
  }