|
|
from google.adk.agents import Agent, SequentialAgent, ParallelAgent, LoopAgent |
|
|
from google.adk.models import Gemini |
|
|
from google.genai import types, Client |
|
|
import logging |
|
|
from typing import Optional |
|
|
from functools import cached_property |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class CustomGemini(Gemini): |
|
|
api_key: Optional[str] = None |
|
|
|
|
|
@cached_property |
|
|
def api_client(self) -> Client: |
|
|
"""Overrides the default api_client to use the provided API key.""" |
|
|
return Client( |
|
|
api_key=self.api_key, |
|
|
http_options=types.HttpOptions( |
|
|
headers=self._tracking_headers(), |
|
|
retry_options=self.retry_options, |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
retry_config = types.HttpRetryOptions( |
|
|
attempts=5, |
|
|
exp_base=7, |
|
|
initial_delay=1, |
|
|
http_status_codes=[429, 500, 503, 504], |
|
|
) |
|
|
|
|
|
def get_video_analysis_system(api_key: Optional[str] = None, model_name: str = "gemini-2.1-flash-lite"): |
|
|
""" |
|
|
Factory function to create a configured VideoAnalysisSystem. |
|
|
""" |
|
|
|
|
|
|
|
|
llm_model = CustomGemini( |
|
|
model=model_name, |
|
|
api_key=api_key, |
|
|
retry_options=retry_config |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
context_summary_agent = Agent( |
|
|
name="ContextSummaryAgent", |
|
|
model=llm_model, |
|
|
instruction=""" |
|
|
<thinking> |
|
|
1. Scan the video, caption, and transcript for key entities and events. |
|
|
2. Identify the main narrative thread or objective of the content. |
|
|
3. Synthesize the findings into a neutral, objective summary. |
|
|
</thinking> |
|
|
Output format: summary: text[1]{text}: "Summary text" """, |
|
|
) |
|
|
|
|
|
|
|
|
political_bias_agent = Agent( |
|
|
name="PoliticalBiasAgent", |
|
|
model=llm_model, |
|
|
instruction=""" |
|
|
<thinking> |
|
|
1. Examine the language used for loaded terms or rhetorical devices. |
|
|
2. Analyze the context of mentioned political figures or institutions. |
|
|
3. Determine the leaning (Left/Right/Center) and evaluate the intensity of bias. |
|
|
</thinking> |
|
|
Output format: political_bias: details[1]{score,reasoning}: (Int),"Reasoning" """, |
|
|
) |
|
|
|
|
|
|
|
|
criticism_level_agent = Agent( |
|
|
name="CriticismLevelAgent", |
|
|
model=llm_model, |
|
|
instruction=""" |
|
|
<thinking> |
|
|
1. Assess the emotional valence of the speaker and visual cues. |
|
|
2. Identify instances of direct criticism, sarcasm, or praise. |
|
|
3. Quantify the overall hostility level on a neutral-to-supportive scale. |
|
|
</thinking> |
|
|
Output format: criticism_level: details[1]{score,reasoning}: (Int),"Reasoning" """, |
|
|
) |
|
|
|
|
|
|
|
|
modalities_agent = Agent( |
|
|
name="ModalitiesAgent", |
|
|
model=llm_model, |
|
|
instruction=""" |
|
|
<thinking> |
|
|
1. Compare visual events with audio descriptions for sync issues or contradictions. |
|
|
2. Verify if the user-provided caption accurately reflects the visual content. |
|
|
3. Check the transcript against both the audio and the caption for discrepancies. |
|
|
</thinking> |
|
|
Output format: |
|
|
video_audio_pairing: details[1]{score,reasoning}: (Int),"Reasoning" |
|
|
video_caption_pairing: details[1]{score,reasoning}: (Int),"Reasoning" |
|
|
audio_caption_pairing: details[1]{score,reasoning}: (Int),"Reasoning" """, |
|
|
) |
|
|
|
|
|
|
|
|
disinformation_agent = Agent( |
|
|
name="DisinformationAgent", |
|
|
model=llm_model, |
|
|
instruction=""" |
|
|
<thinking> |
|
|
1. Search for signs of technical manipulation (deepfakes, AI artifacts). |
|
|
2. Analyze the intent behind potential misinformation (Political/Commercial). |
|
|
3. Evaluate the risk level and the specific threat vector used. |
|
|
</thinking> |
|
|
Output format: disinformation_analysis: details[1]{level,intent,threat_vector}: (Int),(Intent),(Vector) """, |
|
|
) |
|
|
|
|
|
|
|
|
sentiment_bias_agent = Agent( |
|
|
name="SentimentBiasAgent", |
|
|
model=llm_model, |
|
|
instruction=""" |
|
|
<thinking> |
|
|
1. Aggregate the emotional signals from the entire video duration. |
|
|
2. Identify recurring biased patterns or slanted perspectives. |
|
|
3. Synthesize a comprehensive overview of the sentiment and bias. |
|
|
</thinking> |
|
|
Output format: sentiment_and_bias: text[1]{text}: "Synthesis text" """, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
analysis_team = ParallelAgent( |
|
|
name="AnalysisTeam", |
|
|
sub_agents=[ |
|
|
context_summary_agent, |
|
|
political_bias_agent, |
|
|
criticism_level_agent, |
|
|
modalities_agent, |
|
|
disinformation_agent, |
|
|
sentiment_bias_agent |
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
system = SequentialAgent( |
|
|
name="VideoAnalysisSystem", |
|
|
sub_agents=[analysis_team], |
|
|
) |
|
|
|
|
|
return system |
|
|
|
|
|
if __name__ == "__main__": |
|
|
system = get_video_analysis_system() |
|
|
print("Video Analysis Agent System created successfully.") |
|
|
|