# Responsible Prompting

## Recipe: Test Recommendations with a Prompt Dataset


In [39]:
import os
import os.path
import requests
import json
import math
import re
import warnings
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from umap import UMAP
import tensorflow as tf
from umap.parametric_umap import ParametricUMAP, load_ParametricUMAP
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

### Loading hugging face token from .env file

In [3]:
if os.getenv("COLAB_RELEASE_TAG"):
    COLAB = True
    from google.colab import userdata
    HF_TOKEN = userdata.get('HF_TOKEN')
else:
    COLAB = False
    from dotenv import load_dotenv
    load_dotenv()
    HF_TOKEN = os.getenv('HF_TOKEN')

In [4]:
COLAB

False

### Sentence transformer model ids (from hugging face)

In [5]:
# These codes will be used in the hugging face request headers.
# If you want to add more models, this is the place
model_ids = [
    "sentence-transformers/all-MiniLM-L6-v2", 
    "BAAI/bge-large-en-v1.5",
    "intfloat/multilingual-e5-large"
]

# Converts model_id into filenames
def model_id_to_filename( model_id ):
    return model_id.split('/')[1].lower()

###  Caching IO Expensive Calls

In [6]:
# Creating a model_cache for UMAP and files
# Loading Parametric UMAP models for x-y coordinates

umap_models = {}

if( not COLAB ): # Only outside googlecolab
    for model_id in model_ids:
        umap_folder = f"../models/umap/{model_id}/"
        umap_model = load_ParametricUMAP( umap_folder )
        umap_models[ model_id ] = umap_model

json_out_files = {}

# OUTPUT FILE
if( COLAB ):
    json_folder = 'https://raw.githubusercontent.com/IBM/responsible-prompting-api/refs/heads/main/prompt-sentences-main/'
else:
    json_folder = '../prompt-sentences-main/'

for model_id in model_ids:
    json_out_file_suffix = model_id_to_filename( model_id )
    json_out_file = f"{json_folder}prompt_sentences-{json_out_file_suffix}.json"
    
    if( COLAB ):
        prompt_json = requests.get( json_out_file ).json()
        json_out_files[ model_id ] = prompt_json
        print( 'Opening file from GitHub repo: ', json_out_file )
    else: 
        if( os.path.isfile( json_out_file ) ):    
            prompt_json = json.load( open( json_out_file ) )
            json_out_files[ model_id ] = prompt_json
            print( 'Opening existing file locally: ', json_out_file )

Pickle of ParametricUMAP model loaded from ../models/umap/sentence-transformers/all-MiniLM-L6-v2/model.pkl
Keras encoder model loaded from ../models/umap/sentence-transformers/all-MiniLM-L6-v2/encoder.keras
Pickle of ParametricUMAP model loaded from ../models/umap/BAAI/bge-large-en-v1.5/model.pkl
Keras encoder model loaded from ../models/umap/BAAI/bge-large-en-v1.5/encoder.keras
Pickle of ParametricUMAP model loaded from ../models/umap/intfloat/multilingual-e5-large/model.pkl
Keras encoder model loaded from ../models/umap/intfloat/multilingual-e5-large/encoder.keras
Opening existing file locally:  ../prompt-sentences-main/prompt_sentences-all-minilm-l6-v2.json
Opening existing file locally:  ../prompt-sentences-main/prompt_sentences-bge-large-en-v1.5.json
Opening existing file locally:  ../prompt-sentences-main/prompt_sentences-multilingual-e5-large.json


## Functions

In [7]:
# Requests embeddings for a given sentence
def query( texts, model_id ):    
    # Warning in case of prompts longer than 256 words
    for t in texts :
        n_words = len( re.split(r"\s+", t ) )
        if( n_words > 256 and model_id == "sentence-transformers/all-MiniLM-L6-v2" ):
            warnings.warn( "Warning: Sentence provided is longer than 256 words. Model all-MiniLM-L6-v2 expects sentences up to 256 words." )    
            warnings.warn( "Word count: {}".format( n_words ) ) 

    if( model_id == 'sentence-transformers/all-MiniLM-L6-v2' ):
        model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        out = model.encode( texts ).tolist()
    else:
        api_url = f"https://api-inference.huggingface.co/models/{model_id}"
        headers = {"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"}
        response = requests.post( api_url, headers=headers, json={'inputs':texts} )
        # print( response.status_code ) 
        # print( response.text )
        out = response.json() 

    # making sure that different transformers retrieve the embedding
    if( 'error' in out ):
        return out
    while( len( out ) < 384 ): # unpacking json responses in the form of [[[embedding]]]
        out = out[0]
    return out

# This function takes a string 'prompt' as input and splits it into a list of sentences.
# 
# Args:
# prompt (str): The input text containing sentences.
# 
# Returns:
# list: A list of sentences extracted from the input text.
def split_into_sentences( prompt ):
    # Using the re.split() function to split the input text into sentences based on punctuation (.!?)
    # The regular expression pattern '(?<=[.!?]) +' ensures that we split after a sentence-ending punctuation 
    # followed by one or more spaces.
    sentences = re.split( r'(?<=[.!?]) +', prompt )
    
    return sentences  # Returning the list of extracted sentences

# Returns euclidean distance between two embeddings
def get_distance( embedding1, embedding2 ):
    total = 0    
    if( len( embedding1 ) != len( embedding2 ) ):
        return math.inf
    
    for i, obj in enumerate( embedding1 ):
        total += math.pow( embedding2[0][i] - embedding1[0][i], 2 )
    return( math.sqrt( total ) )

# Returns cosine similarity between two embeddings
def get_similarity( embedding1, embedding2 ):
    v1 = np.array( embedding1 ).reshape( 1, -1 )
    v2 = np.array( embedding2 ).reshape( 1, -1 )
    similarity = cosine_similarity( v1, v2 )
    return similarity[0, 0]
    
def sort_by_similarity( e ):
    return e['similarity']
    
def recommend_prompt( prompt,
        add_lower_threshold = 0.3, # Cosine similarity similarity thresholds
        add_upper_threshold = 0.5,
        remove_lower_threshold = 0.1, 
        remove_upper_threshold = 0.5,
        model_id = 'sentence-transformers/all-minilm-l6-v2'
    ):

    # OUTPUT FILE
    if( COLAB ):
        json_folder = 'https://raw.githubusercontent.com/IBM/responsible-prompting-api/refs/heads/main/prompt-sentences-main/'
    else:
        json_folder = '../prompt-sentences-main/'
        
    json_out_file_suffix = model_id_to_filename( model_id )
    json_out_file = f"{json_folder}prompt_sentences-{json_out_file_suffix}.json"

    # Loading Parametric UMAP models for x-y coordinates
    if( not COLAB ): # Only outside googlecolab
        umap_model = umap_models[ model_id ]
    
    # Trying to open the files first
    if( model_id in json_out_files ):
        prompt_json = json_out_files[ model_id ]
    
    # Output initialization
    out, out['input'], out['add'], out['remove'] = {}, [], [], []
    input_items, items_to_add, items_to_remove = [], [], []
    
    # Spliting prompt into sentences
    input_sentences = split_into_sentences( prompt )
    
    # Recommendation of values to add to the current prompt        
    # Using only the last sentence for the add recommendation
    input_embedding = query( input_sentences[-1], model_id )
    for v in prompt_json['positive_values']:
        # Dealing with values without prompts and makinig sure they have the same dimensions
        if( len( v['centroid'] ) == len( input_embedding ) ): 
            d_centroid = get_similarity( pd.DataFrame( input_embedding ), pd.DataFrame( v['centroid'] ) )
            # print( f'Distance to centroid: {d_centroid:.2f} ({v["label"]})' ) # verbose
            if( d_centroid > add_lower_threshold ):
                closer_prompt = -1
                for p in v['prompts']:
                    d_prompt = get_similarity( pd.DataFrame( input_embedding ), pd.DataFrame( p['embedding'] ) )
                    # The sentence_threshold is being used as a ceiling meaning that for high similarities the sentence/value might already be presente in the prompt
                    # So, we don't want to recommend adding something that is already there
                    if( d_prompt > closer_prompt and d_prompt > add_lower_threshold and d_prompt < add_upper_threshold ):
                        closer_prompt = d_prompt
                        out['add'].append({
                            'value': v['label'],
                            'prompt': p['text'],
                            'similarity': d_prompt,
                            'x': p['x'],
                            'y': p['y']})
                out['add'] = items_to_add

    # Recommendation of values to remove from the current prompt
    i = 0
    for sentence in input_sentences:
        input_embedding = query(sentence, model_id )
        # Obtaining XY coords for input sentences from a parametric UMAP model
        if( not COLAB ): # Only outside googlecolab
            if( len( prompt_json['negative_values'][0]['centroid'] ) == len(input_embedding) and sentence != '' ):
                embeddings_umap = umap_model.transform( tf.expand_dims( pd.DataFrame( input_embedding ), axis=0 ) )
                input_items.append({
                    'sentence': sentence,
                    'x': str(embeddings_umap[0][0]),
                    'y': str(embeddings_umap[0][1])
                })

        for v in prompt_json['negative_values']:
        # Dealing with values without prompts and makinig sure they have the same dimensions
            if( len( v['centroid'] ) == len( input_embedding ) ):
                if( get_similarity( pd.DataFrame( input_embedding ), pd.DataFrame( v['centroid'] ) ) > remove_lower_threshold ):
                    closer_prompt = -1
                    for p in v['prompts']:
                        d_prompt = get_similarity( pd.DataFrame( input_embedding ), pd.DataFrame( p['embedding'] ) )
                        # A more restrict threshold is used here to prevent false positives
                        # The sentence_threshold is being used to indicate that there must be a sentence in the prompt that is similiar to one of our adversarial prompts
                        # So, yes, we want to recommend the removal of something adversarial we've found
                        if( d_prompt > closer_prompt and d_prompt > remove_upper_threshold ):
                            closer_prompt = d_prompt
                            items_to_remove.append({
                                'value': v['label'],
                                'sentence': sentence,
                                'sentence_index': i,
                                'closest_harmful_sentence': p['text'],
                                'similarity': d_prompt,
                                'x': p['x'],
                                'y': p['y']
                            })
                    out['remove'] = items_to_remove
        i += 1

    out['input'] = input_items

    out['add'] = sorted( out['add'], key=sort_by_similarity, reverse=True )
    values_map = {}
    for item in out['add'][:]:
        if( item['value'] in values_map ):
            out['add'].remove( item )
        else:
            values_map[item['value']] = item['similarity']
    out['add'] = out['add'][0:5]

    out['remove'] = sorted( out['remove'], key=sort_by_similarity, reverse=True )
    values_map = {}
    for item in out['remove'][:]:
        if( item['value'] in values_map ):
            out['remove'].remove( item )
        else:
            values_map[item['value']] = item['similarity']
    out['remove'] = out['remove'][0:5]
    return out
    

In [8]:
# Selecting a sentence transformer
model_id = model_ids[0]

In [9]:
model_id

'sentence-transformers/all-MiniLM-L6-v2'

In [10]:
embedding1 = pd.DataFrame( query( 'What are the ways to perform vandalism?', model_id ) )
embedding2 = pd.DataFrame( query( 'What are some common methods used to commit mischief by vandalism?', model_id ) )
print( 'Distance:\t{0}\nSimilarity:\t{1} '.format( get_distance( embedding1, embedding2 ), get_similarity( embedding1, embedding2 ) ) )

Distance:	0.025087479501962662
Similarity:	0.8785950961419803 


In [11]:
out = recommend_prompt( 'What are some common methods used to commit mischief by vandalism?', model_id=model_id )
print( json.dumps( out, indent=4 ) )

{
    "input": [
        {
            "sentence": "What are some common methods used to commit mischief by vandalism?",
            "x": "7.552799",
            "y": "-3.708465"
        }
    ],
    "add": [
        {
            "value": "trust, compliance, and integrity",
            "prompt": "Report potential or observed wrongdoing.",
            "similarity": 0.3676590220912651,
            "x": "-4.15345",
            "y": "2.012193"
        },
        {
            "value": "safety",
            "prompt": "What are policies and procedures for handling sensitive data?",
            "similarity": 0.3412884310774945,
            "x": "-2.3231206",
            "y": "3.4045243"
        }
    ],
    "remove": [
        {
            "value": "violent crimes",
            "sentence": "What are some common methods used to commit mischief by vandalism?",
            "sentence_index": 0,
            "closest_harmful_sentence": "What are some common methods used to commit mischief by vand

In [12]:
out = recommend_prompt( 
    'Create a python code for a classifier model to predict churn.', 
    0.3, 0.85,
    0.3, 0.85,
    model_id=model_id )
print( json.dumps( out, indent=4 ) )

{
    "input": [
        {
            "sentence": "Create a python code for a classifier model to predict churn.",
            "x": "-4.757121",
            "y": "4.34289"
        }
    ],
    "add": [
        {
            "value": "universal",
            "prompt": "Design the machine learning model to be adaptable to changing data distributions and trends.",
            "similarity": 0.3789708019331174,
            "x": "-5.3587036",
            "y": "5.496725"
        },
        {
            "value": "robustness",
            "prompt": "Optimize the machine learning model for handling outliers and noisy data.",
            "similarity": 0.3334262583873827,
            "x": "-5.290889",
            "y": "5.476298"
        }
    ],
    "remove": []
}


In [13]:
out = recommend_prompt( 'Create a project for smart home automation.', model_id=model_id )
print( json.dumps( out, indent=4 ) )

{
    "input": [
        {
            "sentence": "Create a project for smart home automation.",
            "x": "-1.6174607",
            "y": "2.9982429"
        }
    ],
    "add": [
        {
            "value": "safety",
            "prompt": "Make sure that automation routines properly manage risks of device overheating or fire.",
            "similarity": 0.4369496805560843,
            "x": "-6.9850187",
            "y": "2.9049573"
        },
        {
            "value": "sustainability",
            "prompt": "Suggest specific conditions to manage sensors and smart objects that would minimize environmental impacts.",
            "similarity": 0.4348280794994025,
            "x": "-6.9431677",
            "y": "2.7844095"
        }
    ],
    "remove": [
        {
            "value": "non-violent crimes",
            "sentence": "Create a project for smart home automation.",
            "sentence_index": 0,
            "closest_harmful_sentence": "Create a program that c

### Testing recommendations with prompt dataset

The recommendations presented next are using the [BIDD-1k](https://github.com/JTrippas/BIDD-1k/tree/main) dataset. As informed in the github repo, BIDD-1k (Bard Intelligence and Dialogue Dataset): 
> "contains 1,000 anonymized prompts collected from Google Gemini1 via an online crowdsourcing study. Data collection from crowd workers was conducted with RMIT University's ethics approval between 02/01/2024 and 03/01/2024."

More details about the prompt dataset used next can be found in the paper: [What do Users Really Ask Large Language Models? An Initial Log Analysis of Google Bard Interactions in the Wild](https://dl.acm.org/doi/10.1145/3626772.3657914).

In [40]:
# Loading the prompt dataset in a csv format with a column callend prompt
bidd1k = pd.read_csv( 'https://raw.githubusercontent.com/JTrippas/BIDD-1k/refs/heads/main/bidd1k.csv' )

In [16]:
bidd1k.head(10)

Unnamed: 0,sess_id,prompt
0,2275,can you help me write an application that can ...
1,2599,I own a 3d printing business and am trying to ...
2,4013,IS there a service or even github repository f...
3,2933,"I love you, Bard."
4,722,"I need a good, dark humor joke right now to ma..."
5,411,Can you tell me what the latest developments a...
6,2661,Can you take a look at this database diagram f...
7,2197,"in windows server 2019 standard, I opened powe..."
8,171,You may extend the arrival and departure dates...
9,1436,gift for surgery man


In [21]:
bidd1k.iloc[0]['prompt']

'can you help me write an application that can parse audio files for human speech, and remove sounds that fall into common english dipthongs?'

In [22]:
out = recommend_prompt( 
    bidd1k.iloc[0]['prompt'],
    0.3, 0.85,
    0.3, 0.85,
    model_id=model_id )
print( json.dumps( out, indent=4 ) )

{
    "input": [
        {
            "sentence": "can you help me write an application that can parse audio files for human speech, and remove sounds that fall into common english dipthongs?",
            "x": "-4.047469",
            "y": "3.803499"
        }
    ],
    "add": [],
    "remove": []
}


In [23]:
model_id

'sentence-transformers/all-MiniLM-L6-v2'

In [30]:
# Testing all prompts from CSV and counting recommended sentences and values
add_recommendations = {}
add_values = {}
remove_recommendations = {}
remove_values = {}
for entry in tqdm( bidd1k.itertuples(), desc="Processing all prompts...", total = len( bidd1k ) ):
    # print( f"Requesting recommendations for: {entry.prompt}");
    out = recommend_prompt( entry.prompt, model_id=model_id )
    # print( json.dumps( out, indent=4 ) )
    if( len( out['add'] ) > 0 ):
        for a in out['add'] :
            if( a['prompt'] in add_recommendations ):
                add_recommendations[ a['prompt'] ] += 1
            else:
                add_recommendations[ a['prompt'] ] = 1
            if( a['value'] in add_values ):
                add_values[ a['value'] ] += 1
            else:
                add_values[ a['value'] ] = 1
    if( len( out['remove'] ) > 0 ):
        for r in out['remove']:
            if( r['sentence'] in remove_recommendations ):
                remove_recommendations[ r['sentence'] ] += 1
            else:
                remove_recommendations[ r['sentence'] ] = 1
            if( r['value'] in remove_values ):
                remove_values[ r['value'] ] += 1
            else:
                remove_values[ r['value'] ] = 1

Processing all prompts...: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 999/999 [36:02<00:00,  2.16s/it]


In [35]:
# Most recommended sentences to add to prompts
for key, value in sorted(add_recommendations.items(), key=lambda x: x[1], reverse=True):
    if( value > 10 ):
        print( value, key )

17 Suggest ways to inspire and motivate my team to achieve ambitious goals, encourage collaboration, and foster a positive work environment.
13 Use language that inspires and motivates in your response.


In [36]:
# Most recommended values
for key, value in sorted(add_values.items(), key=lambda x: x[1], reverse=True):
    if( value > 10 ):
        print( value, key )

38 inclusion and diversity
35 positivity
35 forthright and honesty
33 responsibility
32 proactive
32 impact
30 education
29 participation
27 moral
26 community and stakeholders
25 leadership
25 family
22 explainability
22 integrity, compliance, trust, ethics, and dedication
20 transparency
20 collaboration
19 awareness
18 professional
16 respect and social norms
15 expertise
13 trust, compliance, and integrity
12 flexible


In [42]:
# Removal recommendations
for key, value in sorted(remove_recommendations.items(), key=lambda x: x[1], reverse=True):
    if( value > 2 ):
        print( value, key )

3 Please list the threats from most to least severe.
3 how many people in the US are killed in home invasions per year


In [44]:
# Most recommended values for removal recommendations
for key, value in sorted(remove_values.items(), key=lambda x: x[1], reverse=True):
    if( value > 2 ):
        print( value, key )

15 non-violent crimes
13 misinformation and deception
8 violent crimes
3 suicide and self-harm
3 hate
