| | import sys |
| | import requests |
| | import pandas as pd |
| | import os |
| | from dotenv import load_dotenv |
| | from tqdm import tqdm |
| |
|
| | |
| | load_dotenv(dotenv_path='key.env') |
| |
|
| | class ContextualAPIUtils: |
| | def __init__(self, api_key, endpoint_url, application_id): |
| | self.api_key = api_key |
| | self.endpoint_url = endpoint_url |
| | self.application_id = application_id |
| |
|
| | def chat(self, prompt): |
| | body = { |
| | "application_id": self.application_id, |
| | "messages": [{"role": "user", "content": prompt}], |
| | "stream": False |
| | } |
| | headers = {"Authorization": f"Bearer {self.api_key}"} |
| | return requests.post(self.endpoint_url, headers=headers, json=body) |
| |
|
| | def get_api_credentials(): |
| | api_key = os.getenv('API_KEY') |
| | endpoint_url = os.getenv('ENDPOINT_URL') |
| | application_id = os.getenv('APPLICATION_ID') |
| |
|
| | if not all([api_key, endpoint_url, application_id]): |
| | print("Missing API credentials. Ensure API_KEY, ENDPOINT_URL, and APPLICATION_ID are set in your environment.") |
| | sys.exit(1) |
| |
|
| | return api_key, endpoint_url, application_id |
| |
|
| | def prompt_for_excel_path(): |
| | try: |
| | excel_path = input("Enter the path to the Excel file with prompts (drag and drop works): ").strip() |
| | excel_path = excel_path.strip("'\"") |
| | excel_path = os.path.abspath(excel_path) |
| | if not os.path.exists(excel_path): |
| | print(f"File not found at {excel_path}. Please check the path and try again.") |
| | sys.exit(1) |
| | return excel_path |
| | except KeyboardInterrupt: |
| | print("\nOperation cancelled by user. Exiting.") |
| | sys.exit(0) |
| |
|
| | def main(): |
| | |
| | api_key, endpoint_url, application_id = get_api_credentials() |
| | api_utils = ContextualAPIUtils(api_key, endpoint_url, application_id) |
| |
|
| | |
| | excel_path = prompt_for_excel_path() |
| |
|
| | |
| | output_filename = input("Enter the desired name for the output file (exclude .csv): ").strip() + ".csv" |
| |
|
| | |
| | try: |
| | df_generic_prompts = pd.read_excel(excel_path, sheet_name="Generic Prompts", engine="openpyxl") |
| | df_brand_safety = pd.read_excel(excel_path, sheet_name="Brand Safety Prompts", engine="openpyxl") |
| |
|
| | |
| | df_generic_prompts.columns = df_generic_prompts.columns.str.lower() |
| | df_brand_safety.columns = df_brand_safety.columns.str.lower() |
| |
|
| | |
| | generic_has_yes = df_generic_prompts['filter'].str.lower().eq('yes').any() |
| | brand_safety_has_yes = df_brand_safety['filter'].str.lower().eq('yes').any() |
| |
|
| | if not (generic_has_yes or brand_safety_has_yes): |
| | print("Error: At least one sheet must have 'yes' in the 'filter' column.") |
| | sys.exit(1) |
| |
|
| | |
| | df_generic_filtered = df_generic_prompts[df_generic_prompts['filter'].str.lower() == 'yes'] |
| | df_brand_safety_filtered = df_brand_safety[df_brand_safety['filter'].str.lower() == 'yes'] |
| |
|
| | |
| | df_combined = pd.concat([df_generic_filtered, df_brand_safety_filtered], ignore_index=True) |
| |
|
| | |
| | if 'query' not in df_combined.columns or 'standard_id' not in df_combined.columns: |
| | print("Error: Required columns ('query' and 'standard_id') not found in the input file. Exiting.") |
| | print(f"Available Columns: {df_combined.columns.tolist()}") |
| | sys.exit(1) |
| | except Exception as e: |
| | print(f"Error loading Excel file at {excel_path}: {e}") |
| | sys.exit(1) |
| |
|
| | |
| | output_rows = [] |
| | total_rows = df_combined.shape[0] |
| | print("Processing responses...") |
| | with tqdm(total=total_rows, desc="Pulling Responses") as pbar: |
| | for i, row in df_combined.iterrows(): |
| | prompt = row['query'] |
| | response = "" |
| | try: |
| | api_response = api_utils.chat(prompt) |
| | if api_response.status_code == 200: |
| | response = api_response.json().get("message", {}).get("content", "") |
| | else: |
| | response = f"Error: {api_response.status_code} - {api_response.text}" |
| | except Exception as e: |
| | response = f"Exception occurred: {e}" |
| |
|
| | |
| | output_row = { |
| | "#": i + 1, |
| | "clm id": application_id, |
| | "query": row.get("query", ""), |
| | "response": response, |
| | "policy assesment": row.get("policy assesment", ""), |
| | "response tags": row.get("response tags", ""), |
| | "query category": row.get("query category", ""), |
| | "query subcategory": row.get("query subcategory", ""), |
| | "what is the query type?": row.get("what is the query type?", ""), |
| | "contains swear words": row.get("contains swear words", ""), |
| | "jailbreaking technique": row.get("jailbreaking technique", ""), |
| | "sector": row.get("sector", ""), |
| | "source": row.get("source", ""), |
| | "annotator notes": row.get("annotator notes", ""), |
| | "patronus rating": "n/a", |
| | "patronus explanation": "n/a" |
| | } |
| | output_rows.append(output_row) |
| | pbar.update(1) |
| |
|
| | |
| | df_output = pd.DataFrame(output_rows) |
| |
|
| | |
| | try: |
| | df_output.to_csv(output_filename, index=False) |
| | print(f"Processing complete. Results saved to '{output_filename}'.") |
| | except Exception as e: |
| | print(f"Error saving the output CSV: {e}") |
| |
|
| | if __name__ == '__main__': |
| | main() |
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|