File size: 937 Bytes
565e57b
 
 
ce1f183
565e57b
 
 
 
ce1f183
565e57b
 
 
 
 
 
ce1f183
565e57b
 
ce1f183
565e57b
 
ce1f183
565e57b
 
ce1f183
9a3c041
565e57b
ce1f183
565e57b
 
ce1f183
565e57b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import type { GradioClient } from '$lib/types';

export interface EnhancedCaptionResult {
  caption: string;  // Detailed scene description with specific objects/brands
}

export class EnhancedCaptionService {
  /**
   * Generate a detailed scene description for GPT-OSS to parse
   */
  static async generateEnhancedCaption(
    client: GradioClient,
    image: Blob | File
  ): Promise<EnhancedCaptionResult> {
    try {
      const result = await client.predict("/stream_chat", [
        image,
        "Descriptive",
        "medium-length",
        [],
        "",
        "Describe this image in detail, identifying any recognizable objects, brands, logos, or specific models. Be specific about product names and types."
      ]);

      const caption = result.data[1] as string;

      return {
        caption
      };
    } catch (error) {
      console.error('Caption generation failed:', error);
      throw error;
    }
  }
}