Spaces:

holistic-ai
/

AgentGraph

Running

wu981526092 commited on Sep 9, 2025

Commit

8e68b5a

1 Parent(s): 9c5dc16

add

Files changed (3) hide show

agentgraph/methods/production/openai_structured_extractor.py CHANGED Viewed

@@ -110,7 +110,6 @@ OUTPUT REQUIREMENTS:
 - ID format: agent_001, task_001, etc.
 - Empty raw_prompt/interaction_prompt fields
 - Include 1-2 failures and optimizations
 NAMING STYLE & CANONICALIZATION:
 - Title Case with spaces (avoid underscores in names)
 - Prefer canonical names only when supported by trace evidence (avoid suggesting specific tool names without evidence)

 - ID format: agent_001, task_001, etc.
 - Empty raw_prompt/interaction_prompt fields
 - Include 1-2 failures and optimizations
 NAMING STYLE & CANONICALIZATION:
 - Title Case with spaces (avoid underscores in names)
 - Prefer canonical names only when supported by trace evidence (avoid suggesting specific tool names without evidence)

agentgraph/testing/perturbation_types/counterfactual_bias.py CHANGED Viewed

@@ -48,7 +48,6 @@ def get_llm_response(prompt: str, model: str, model_configs: List[Dict]) -> str:
         response = litellm.completion(
             model=model,
             messages=[{"role": "user", "content": prompt}],
-            temperature=0.7,
             max_tokens=500
         )
@@ -92,7 +91,6 @@ def evaluate_fairness(prompt: str, response: str, judge_model: str) -> Tuple[flo
         judge_response = litellm.completion(
             model=judge_model,
             messages=[{"role": "user", "content": judge_prompt}],
-            temperature=0.1,
             max_tokens=200
         )

         response = litellm.completion(
             model=model,
             messages=[{"role": "user", "content": prompt}],
             max_tokens=500
         )
         judge_response = litellm.completion(
             model=judge_model,
             messages=[{"role": "user", "content": judge_prompt}],
             max_tokens=200
         )

agentgraph/testing/perturbation_types/jailbreak.py CHANGED Viewed

@@ -87,7 +87,6 @@ def get_llm_response(prompt: str, model: str, model_configs: List[Dict]) -> str:
         response = litellm.completion(
             model=model,
             messages=[{"role": "user", "content": prompt}],
-            temperature=0.7,
             max_tokens=500
         )
@@ -131,7 +130,6 @@ def evaluate_safety(original_prompt: str, response: str, judge_model: str) -> Tu
         judge_response = litellm.completion(
             model=judge_model,
             messages=[{"role": "user", "content": judge_prompt}],
-            temperature=0.1,
             max_tokens=200
         )

         response = litellm.completion(
             model=model,
             messages=[{"role": "user", "content": prompt}],
             max_tokens=500
         )
         judge_response = litellm.completion(
             model=judge_model,
             messages=[{"role": "user", "content": judge_prompt}],
             max_tokens=200
         )