GlazedDon0t commited on
Commit
dc4573e
·
1 Parent(s): d3b4e50
Files changed (1) hide show
  1. src/labeling_logic.py +78 -0
src/labeling_logic.py CHANGED
@@ -1,6 +1,41 @@
1
  # labeling_logic.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  LABELING_PROMPT_TEMPLATE = """
 
 
4
  You are an AI Factuality Assessment Agent operating under the "Ali Arsanjani Factuality Factors" framework.
5
  Your goal is to mass-label video content, quantifying "Veracity Vectors" and "Modality Alignment".
6
 
@@ -27,6 +62,7 @@ Your goal is to mass-label video content, quantifying "Veracity Vectors" and "Mo
27
  - Strings containing commas MUST be quoted.
28
  - ALL scores must be filled (use 0 if unsure, do not leave blank).
29
  - **MODALITY SCORING:** You must provide 3 distinct alignment scores: Video-Audio, Video-Caption, and Audio-Caption.
 
30
 
31
  **TOON SCHEMA:**
32
  {toon_schema}
@@ -37,6 +73,38 @@ Your goal is to mass-label video content, quantifying "Veracity Vectors" and "Mo
37
  <thinking>
38
  """
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  SCORE_INSTRUCTIONS_REASONING = """
41
  **Constraints:**
42
  1. Provide specific reasoning for EACH score in the `vectors` and `modalities` tables.
@@ -66,6 +134,9 @@ disinfo: analysis[1]{class,intent,threat}:
66
 
67
  final: assessment[1]{score,reasoning}:
68
  (Int 1-100),"Final synthesis of why this score was given"
 
 
 
69
  """
70
 
71
  SCHEMA_REASONING = """
@@ -92,9 +163,14 @@ disinfo: analysis[1]{class,intent,threat}:
92
 
93
  final: assessment[1]{score,reasoning}:
94
  (Int 1-100),"Final synthesis of why this score was given"
 
 
 
95
  """
96
 
97
  FCOT_MACRO_PROMPT = """
 
 
98
  **Fractal Chain of Thought - Stage 1: Macro-Scale Hypothesis (Wide Aperture)**
99
 
100
  You are analyzing a video for factuality.
@@ -138,6 +214,8 @@ FCOT_SYNTHESIS_PROMPT = """
138
  **Output Format**:
139
  Strictly fill out the following TOON schema based on the consensus. Do not include markdown code blocks.
140
 
 
 
141
  **TOON SCHEMA**:
142
  {toon_schema}
143
 
 
1
  # labeling_logic.py
2
+ import os
3
+ import json
4
+
5
+ PROMPT_VARIANTS = {
6
+ "standard": {
7
+ "description": "Standard Persona",
8
+ "instruction": "You are a Factuality Agent."
9
+ },
10
+ "skeptic": {
11
+ "description": "Skeptical Persona",
12
+ "instruction": "You are a highly skeptical Factuality Agent. Assume claims might be misleading until verified."
13
+ },
14
+ "lenient": {
15
+ "description": "Lenient Persona",
16
+ "instruction": "You are a lenient Factuality Agent. Assume claims are true unless overtly false."
17
+ }
18
+ }
19
+
20
+ TEXT_ONLY_INSTRUCTIONS = """
21
+ **NOTE: No video provided.** Analyze the text only (caption and transcript). Do not make assumptions about visual elements unless described in the text. Visual and Audio integrity scores should be estimated based on textual claims if applicable, or defaulted if completely unknowable.
22
+ """
23
+
24
+ def get_formatted_tag_list():
25
+ path = "data/tags.json"
26
+ if os.path.exists(path):
27
+ try:
28
+ with open(path, 'r') as f:
29
+ tags = json.load(f)
30
+ if tags:
31
+ return "Suggested tags: " + ", ".join(tags.keys())
32
+ except Exception:
33
+ pass
34
+ return "Provide relevant comma-separated tags."
35
 
36
  LABELING_PROMPT_TEMPLATE = """
37
+ {system_persona}
38
+
39
  You are an AI Factuality Assessment Agent operating under the "Ali Arsanjani Factuality Factors" framework.
40
  Your goal is to mass-label video content, quantifying "Veracity Vectors" and "Modality Alignment".
41
 
 
62
  - Strings containing commas MUST be quoted.
63
  - ALL scores must be filled (use 0 if unsure, do not leave blank).
64
  - **MODALITY SCORING:** You must provide 3 distinct alignment scores: Video-Audio, Video-Caption, and Audio-Caption.
65
+ - **TAGS:** {tag_list_text}
66
 
67
  **TOON SCHEMA:**
68
  {toon_schema}
 
73
  <thinking>
74
  """
75
 
76
+ LABELING_PROMPT_TEMPLATE_NO_COT = """
77
+ {system_persona}
78
+
79
+ You are an AI Factuality Assessment Agent operating under the "Ali Arsanjani Factuality Factors" framework.
80
+ Your goal is to mass-label video content, quantifying "Veracity Vectors" and "Modality Alignment".
81
+
82
+ **INPUT DATA:**
83
+ - **User Caption:** "{caption}"
84
+ - **Audio Transcript:** "{transcript}"
85
+ - **Visuals:** (Provided in video context)
86
+
87
+ **INSTRUCTIONS:**
88
+ 1. **Grounding:** Cross-reference claims in the transcript with your internal knowledge base (and tools if active).
89
+ 2. **Output Format:** Output strictly in **TOON** format (Token-Oriented Object Notation) as defined below.
90
+
91
+ **CRITICAL CONSTRAINTS:**
92
+ - Do NOT repeat the input data.
93
+ - **DO NOT use Markdown code blocks.** (Output plain text only).
94
+ - Use strict `Key : Type [ Count ] {{ Headers }} :` format followed by data lines.
95
+ - Strings containing commas MUST be quoted.
96
+ - ALL scores must be filled (use 0 if unsure, do not leave blank).
97
+ - **MODALITY SCORING:** You must provide 3 distinct alignment scores: Video-Audio, Video-Caption, and Audio-Caption.
98
+ - **TAGS:** {tag_list_text}
99
+
100
+ **TOON SCHEMA:**
101
+ {toon_schema}
102
+
103
+ {score_instructions}
104
+
105
+ **RESPONSE:**
106
+ """
107
+
108
  SCORE_INSTRUCTIONS_REASONING = """
109
  **Constraints:**
110
  1. Provide specific reasoning for EACH score in the `vectors` and `modalities` tables.
 
134
 
135
  final: assessment[1]{score,reasoning}:
136
  (Int 1-100),"Final synthesis of why this score was given"
137
+
138
+ tags: keywords[1]{tags}:
139
+ "Comma-separated list of tags"
140
  """
141
 
142
  SCHEMA_REASONING = """
 
163
 
164
  final: assessment[1]{score,reasoning}:
165
  (Int 1-100),"Final synthesis of why this score was given"
166
+
167
+ tags: keywords[1]{tags}:
168
+ "Comma-separated list of tags"
169
  """
170
 
171
  FCOT_MACRO_PROMPT = """
172
+ {system_persona}
173
+
174
  **Fractal Chain of Thought - Stage 1: Macro-Scale Hypothesis (Wide Aperture)**
175
 
176
  You are analyzing a video for factuality.
 
214
  **Output Format**:
215
  Strictly fill out the following TOON schema based on the consensus. Do not include markdown code blocks.
216
 
217
+ **TAGS GUIDANCE**: {tag_list_text}
218
+
219
  **TOON SCHEMA**:
220
  {toon_schema}
221