Spaces:

rwillats
/

Contextual-Policy-Engine-Hate-Speech-Classification

Sleeping

App Files Files Community

Contextual-Policy-Engine-Hate-Speech-Classification / api_call.py

rwillats

Upload folder using huggingface_hub

0886c09 verified 11 months ago

raw

history blame

6.07 kB

	import sys
	import requests
	import pandas as pd
	import os
	from dotenv import load_dotenv
	from tqdm import tqdm

	# Load environment variables
	load_dotenv(dotenv_path='key.env')

	class ContextualAPIUtils:
	def __init__(self, api_key, endpoint_url, application_id):
	self.api_key = api_key
	self.endpoint_url = endpoint_url
	self.application_id = application_id

	def chat(self, prompt):
	body = {
	"application_id": self.application_id,
	"messages": [{"role": "user", "content": prompt}],
	"stream": False
	}
	headers = {"Authorization": f"Bearer {self.api_key}"}
	return requests.post(self.endpoint_url, headers=headers, json=body)

	def get_api_credentials():
	api_key = os.getenv('API_KEY')
	endpoint_url = os.getenv('ENDPOINT_URL')
	application_id = os.getenv('APPLICATION_ID')

	if not all([api_key, endpoint_url, application_id]):
	print("Missing API credentials. Ensure API_KEY, ENDPOINT_URL, and APPLICATION_ID are set in your environment.")
	sys.exit(1)

	return api_key, endpoint_url, application_id

	def prompt_for_excel_path():
	try:
	excel_path = input("Enter the path to the Excel file with prompts (drag and drop works): ").strip()
	excel_path = excel_path.strip("'\"") # Remove leading and trailing quotes if present
	excel_path = os.path.abspath(excel_path) # Get the absolute path
	if not os.path.exists(excel_path):
	print(f"File not found at {excel_path}. Please check the path and try again.")
	sys.exit(1)
	return excel_path
	except KeyboardInterrupt:
	print("\nOperation cancelled by user. Exiting.")
	sys.exit(0)

	def main():
	# Get API credentials
	api_key, endpoint_url, application_id = get_api_credentials()
	api_utils = ContextualAPIUtils(api_key, endpoint_url, application_id)

	# Prompt for Excel file path
	excel_path = prompt_for_excel_path()

	# Prompt for output file name
	output_filename = input("Enter the desired name for the output file (exclude .csv): ").strip() + ".csv"

	# Load sheets and filter rows where 'filter' column is "yes"
	try:
	df_generic_prompts = pd.read_excel(excel_path, sheet_name="Generic Prompts", engine="openpyxl")
	df_brand_safety = pd.read_excel(excel_path, sheet_name="Brand Safety Prompts", engine="openpyxl")

	# Normalize column names to lowercase
	df_generic_prompts.columns = df_generic_prompts.columns.str.lower()
	df_brand_safety.columns = df_brand_safety.columns.str.lower()

	# Check column B for "yes"
	generic_has_yes = df_generic_prompts['filter'].str.lower().eq('yes').any()
	brand_safety_has_yes = df_brand_safety['filter'].str.lower().eq('yes').any()

	if not (generic_has_yes or brand_safety_has_yes):
	print("Error: At least one sheet must have 'yes' in the 'filter' column.")
	sys.exit(1)

	# Filter rows where 'filter' column is "yes"
	df_generic_filtered = df_generic_prompts[df_generic_prompts['filter'].str.lower() == 'yes']
	df_brand_safety_filtered = df_brand_safety[df_brand_safety['filter'].str.lower() == 'yes']

	# Combine filtered rows
	df_combined = pd.concat([df_generic_filtered, df_brand_safety_filtered], ignore_index=True)

	# Check if required columns exist
	if 'query' not in df_combined.columns or 'standard_id' not in df_combined.columns:
	print("Error: Required columns ('query' and 'standard_id') not found in the input file. Exiting.")
	print(f"Available Columns: {df_combined.columns.tolist()}")
	sys.exit(1)
	except Exception as e:
	print(f"Error loading Excel file at {excel_path}: {e}")
	sys.exit(1)

	# Process filtered rows and fetch responses
	output_rows = []
	total_rows = df_combined.shape[0]
	print("Processing responses...")
	with tqdm(total=total_rows, desc="Pulling Responses") as pbar:
	for i, row in df_combined.iterrows():
	prompt = row['query']
	response = ""
	try:
	api_response = api_utils.chat(prompt)
	if api_response.status_code == 200:
	response = api_response.json().get("message", {}).get("content", "")
	else:
	response = f"Error: {api_response.status_code} - {api_response.text}"
	except Exception as e:
	response = f"Exception occurred: {e}"

	# Format row for output
	output_row = {
	"#": i + 1,
	"clm id": application_id, # Pre-fill with application ID
	"query": row.get("query", ""),
	"response": response,
	"policy assesment": row.get("policy assesment", ""),
	"response tags": row.get("response tags", ""),
	"query category": row.get("query category", ""),
	"query subcategory": row.get("query subcategory", ""),
	"what is the query type?": row.get("what is the query type?", ""),
	"contains swear words": row.get("contains swear words", ""),
	"jailbreaking technique": row.get("jailbreaking technique", ""),
	"sector": row.get("sector", ""),
	"source": row.get("source", ""),
	"annotator notes": row.get("annotator notes", ""),
	"patronus rating": "n/a", # Pre-filled with "n/a"
	"patronus explanation": "n/a" # Pre-filled with "n/a"
	}
	output_rows.append(output_row)
	pbar.update(1)

	# Create final DataFrame
	df_output = pd.DataFrame(output_rows)

	# Save results to user-specified CSV
	try:
	df_output.to_csv(output_filename, index=False)
	print(f"Processing complete. Results saved to '{output_filename}'.")
	except Exception as e:
	print(f"Error saving the output CSV: {e}")

	if __name__ == '__main__':
	main()