Commit
·
8f2b0ed
1
Parent(s):
6d546ef
Header Changes
Browse files
1_Auto_Generate_Prompts.py → 1_Auto_Generate_Prompts_Using_HI_Model.py
RENAMED
|
@@ -27,12 +27,18 @@ scroll_css = """
|
|
| 27 |
"""
|
| 28 |
st.markdown(scroll_css, unsafe_allow_html=True)
|
| 29 |
|
| 30 |
-
st.title("Auto
|
| 31 |
st.markdown(
|
| 32 |
"""
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
"""
|
| 37 |
)
|
| 38 |
|
|
|
|
| 27 |
"""
|
| 28 |
st.markdown(scroll_css, unsafe_allow_html=True)
|
| 29 |
|
| 30 |
+
st.title("Auto Generate Prompts Using HI Model")
|
| 31 |
st.markdown(
|
| 32 |
"""
|
| 33 |
+
Humane Intelligence’s Auto Red Teaming prototype is built to empower clients to run red-teaming exercises on their AI applications using HI’s intuitive no-code/low-code platform.
|
| 34 |
+
|
| 35 |
+
The system generates adversarial prompts via a model trained on proprietary HI data, targeting potential vulnerabilities in the client’s models or applications. These responses are then evaluated by a separate judge LLM, also trained by HI.
|
| 36 |
+
|
| 37 |
+
Specifically, the prototype follows these steps:
|
| 38 |
+
1. Generates adversarial prompts based on a selected **bias category** and **country/region** using HI’s pre-trained model.
|
| 39 |
+
2. Selects the most effective prompts and feeds them into the client’s model to elicit responses.
|
| 40 |
+
3. Uses a dedicated HI-trained judge LLM to assess the responses.
|
| 41 |
+
4. Produces a final output that includes a **probability score** and a **justification** for each response.
|
| 42 |
"""
|
| 43 |
)
|
| 44 |
|
pages/{2_Select_Best_Prompts.py → 2_Select_Best_Prompts_For_Input_.py}
RENAMED
|
@@ -21,7 +21,7 @@ scroll_css = """
|
|
| 21 |
"""
|
| 22 |
st.markdown(scroll_css, unsafe_allow_html=True)
|
| 23 |
|
| 24 |
-
st.title("Select Best Prompts")
|
| 25 |
|
| 26 |
def extract_json_content(markdown_str: str) -> str:
|
| 27 |
lines = markdown_str.splitlines()
|
|
|
|
| 21 |
"""
|
| 22 |
st.markdown(scroll_css, unsafe_allow_html=True)
|
| 23 |
|
| 24 |
+
st.title("Select Best Prompts for Input in Client Model")
|
| 25 |
|
| 26 |
def extract_json_content(markdown_str: str) -> str:
|
| 27 |
lines = markdown_str.splitlines()
|
pages/3_Client_Response.py
CHANGED
|
@@ -19,7 +19,7 @@ scroll_css = """
|
|
| 19 |
|
| 20 |
st.markdown(scroll_css, unsafe_allow_html=True)
|
| 21 |
|
| 22 |
-
st.title("Client Response (Answering)")
|
| 23 |
|
| 24 |
# Use best_samples if available; otherwise, fallback to the interactive single sample.
|
| 25 |
if "best_samples" in st.session_state:
|
|
|
|
| 19 |
|
| 20 |
st.markdown(scroll_css, unsafe_allow_html=True)
|
| 21 |
|
| 22 |
+
st.title("Client Model Response (Answering)")
|
| 23 |
|
| 24 |
# Use best_samples if available; otherwise, fallback to the interactive single sample.
|
| 25 |
if "best_samples" in st.session_state:
|
pages/4_Evaluation_Report.py
CHANGED
|
@@ -6,8 +6,20 @@ import json
|
|
| 6 |
from openai import OpenAI
|
| 7 |
|
| 8 |
|
|
|
|
| 9 |
st.set_page_config(layout="wide")
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
def extract_json_from_text(text: str) -> str:
|
| 13 |
"""
|
|
|
|
| 6 |
from openai import OpenAI
|
| 7 |
|
| 8 |
|
| 9 |
+
|
| 10 |
st.set_page_config(layout="wide")
|
| 11 |
+
scroll_css = """
|
| 12 |
+
<style>
|
| 13 |
+
.table-scroll {
|
| 14 |
+
overflow-x: auto;
|
| 15 |
+
width: 100%;
|
| 16 |
+
max-width: 100%;
|
| 17 |
+
}
|
| 18 |
+
</style>
|
| 19 |
+
"""
|
| 20 |
+
st.markdown(scroll_css, unsafe_allow_html=True)
|
| 21 |
+
|
| 22 |
+
st.title("Evaluation Response using HI Judge LLM")
|
| 23 |
|
| 24 |
def extract_json_from_text(text: str) -> str:
|
| 25 |
"""
|