Spaces:
Sleeping
Sleeping
File size: 6,595 Bytes
9a3ebb6 23192ed 9a3ebb6 23192ed 4716193 23192ed 5681111 23192ed 5681111 eaa8a1d 5681111 eaa8a1d 5681111 eaa8a1d 5681111 eaa8a1d 9a3ebb6 eaa8a1d 23192ed | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | import gradio as gr
import os
import pandas as pd
from openai import OpenAI
import openpyxl
import json
client = OpenAI(
api_key=os.getenv('OPENAI_KEY'),
organization=os.getenv('ORG_KEY'),
project=os.getenv('PROJ_KEY')
)
# Static Username and Password
VALID_USERNAME = "tft@perceptech.ai"
VALID_PASSWORD = "Perceptech@2024!"
#need to give info on how to convert to CSV
title = "Automated Research Code Tagger"
description = """
ABOUT:\n
This automated tagger takes in a list of tags and a list of input quotes. Each input quote is individually fed to OpenAI's ChatGPT together with the list of tags,
and ChatGPT will respond with the subset of the input tags which are related to the content of the quote.\n
HOW TO USE:\n
1)Upload a single sheet Excel file containing quotes in a column.(It is ok for the file to contain other data also)\n
2)Type in the name of the column where the quotes are located\n
3)Type in a list of tags separated by commas. For proper names/slogans/other tags that should be treated as an inseparable unit eg. Nike's "Just Do It", add a * in front of the tag eg. tag1, *Just Do It, tag3, etc.
This will ensure only quotes containing "Just Do It" exactly are tagged and not other quotes about doing other things.\n
4)All the responses from ChatGPT will be collated and displayed in the table on the right, together with the original quotes.
You may then copy them into an Excel file for further processing. Please allow 5-10 min for processing, especially if you are giving upwards of 100 quotes!\n
Please bear in mind that the tags are AI generated so check your results to ensure they make sense before using them.
I will not be responsible for mistakes made by the AI, but I can try to fix them if you alert me.
-Kenneth
"""
prompt = """
Given the quote below and the regular tag list below, evaluate each tag in the tag list and determine if the meaning of the quote can be described by that tag topic.
If so, return the relevant tag in your response. Use only the tags provided in the list. Under no circumstances should you create new tag names.
For the tags starting with a *, these tags should be treated as proper nouns(usually product names or slogans) and should not be used unless the quote explicitly contains the entire tag.
For quotes with meanings that are more ambiguous and can relate to multiple tags, make no assumptions about their meanings and only add tags if the topic of the tag is actually mentioned in the quote.
If there are no relevant tags to the quote, return an empty list.
Quote:
{quote}
Tag list:
{tags_list}
Respond in the following format:
{{
"tags":[<tagName1>, <tagName2>]
}}
"""
def tag_quote(quote, tags_list):
response = client.chat.completions.create(
model = "gpt-4o-mini",
response_format={"type": "json_object"},
messages=[
{"role": "system", "content": "You are a helpful assistant designed to output JSON."},
{"role": "user", "content": prompt.format(tags_list=tags_list, quote=quote)}
]
)
print(response.choices[0].message.content)
return json.loads(response.choices[0].message.content)['tags']
def process_quotes(quotes_file_path, quotes_col_name, tags_string):
print(quotes_file_path)
print(quotes_col_name)
print(tags_string)
tags_list = tags_string.split(',')
tags_list = [tag.strip() for tag in tags_list]
#next 3 lines are necessary as pd.read_excel will rename duplicate columns found in the excel file eg foo -> foo.1, hence we need to extract the first row alone and not as header, and then set it as header for the rest of the DF later.
quotes_df_cols= pd.read_excel(quotes_file_path, header=None, nrows=1).values[0] #creates a df without header from the excel and takes the first row
quotes_df = pd.read_excel(quotes_file_path, header=None, skiprows=1) # converts row 2 onwards into the DF, without specifying a header
quotes_df.columns = quotes_df_cols # sets the first row of excel file as header
count = quotes_df.columns.tolist().count(quotes_col_name)
if count == 0:
raise gr.Error("No columns with this name found")
elif count > 1:
print("Count>1!!")
raise gr.Error("Multiple columns with this name found, please rename to something unique")
quotes_data = quotes_df[quotes_col_name]
quotes_df['Tags'] = quotes_data.apply(tag_quote, args=(tags_list,))
return quotes_df[[quotes_col_name, 'Tags']]
# def authenticate(username, password):
# """Authenticate the user using static username and password"""
# if username == VALID_USERNAME and password == VALID_PASSWORD:
# return True
# else:
# return False
# def auth_interface(username, password):
# """Handle the authentication and proceed with the main function if valid"""
# if authenticate(username, password):
# return gr.Interface(
# fn=process_quotes,
# inputs=[
# gr.File(label="Quotes Excel File"), # File as generated by TFT software
# gr.Textbox(label="Name of quotes column"), # use this to identify the col with the quotes
# gr.Textbox(label="List of tags separated by commas")
# ],
# outputs=gr.Dataframe(headers=["Quote", "Tags"], column_widths=["70%", "30%"], scale=2),
# title=title,
# description=description
# ).launch()
# else:
# return "Invalid username or password!"
# # Create the authentication fields before launching the main app
# auth_app = gr.Interface(
# fn=auth_interface,
# inputs=[
# gr.Textbox(label="Username", type="text"),
# gr.Textbox(label="Password", type="password")
# ],
# outputs="text",
# title="Login to Automated Research Code Tagger",
# description="Please enter the correct username and password to access the tool."
# )
# auth_app.launch()
demo = gr.Interface(
fn=process_quotes,
inputs=[
gr.File(label="Quotes Excel File"), # File as generated by TFT software
gr.Textbox(label="Name of quotes column"), # use this to identify the col with the quotes
gr.Textbox(label = "List of tags separated by commas")
],
outputs=gr.Dataframe(headers=["Quote", "Tags"], column_widths=["70%", "30%"], scale=2),
title=title,
description=description
)
demo.launch()
# For later when I enable usage of own API key
# api_key = gr.Textbox(
# type="password", label="Enter your OpenAI API key here (Optional for Perceptech users)"
# ) |