Spaces:
Sleeping
Sleeping
feat: add per-quote logging
Browse filesAdd logging to track progress. Change from applying the tag_quote func
to using an explicit for loop to deliver logs after each quote.
app.py
CHANGED
|
@@ -7,6 +7,11 @@ import openpyxl # Requirement for reading Excel files into pandas Dataframes
|
|
| 7 |
import json # For conversion of OpenAI responses into json/dictionary objects so the contents can be extracted
|
| 8 |
from dotenv import load_dotenv # For loading environment variables in local environment
|
| 9 |
from collections import Counter # For tabulating tag occurrences
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# Load environment variables from local .env file if it exists; otherwise this does nothing
|
| 12 |
load_dotenv()
|
|
@@ -14,6 +19,7 @@ load_dotenv()
|
|
| 14 |
# Import prompt for requesting the tags from OpenAI
|
| 15 |
with open("prompt.txt", "r") as prompt_file:
|
| 16 |
PROMPT = prompt_file.read()
|
|
|
|
| 17 |
|
| 18 |
# Import user instructions for display on screen
|
| 19 |
with open("user_instructions.txt", "r") as user_instruction_file:
|
|
@@ -25,6 +31,7 @@ client = OpenAI(
|
|
| 25 |
organization=os.getenv('ORG_KEY'),
|
| 26 |
project=os.getenv('PROJ_KEY')
|
| 27 |
)
|
|
|
|
| 28 |
|
| 29 |
# Function to send the prompt with quote and tag list to OpenAI and get the tags for that quote back
|
| 30 |
def tag_quote(quote: str, tags_list: list) -> list:
|
|
@@ -42,6 +49,7 @@ def tag_quote(quote: str, tags_list: list) -> list:
|
|
| 42 |
Returns:
|
| 43 |
list: A list of tags that are relevant to the quote, as determined by the model.
|
| 44 |
"""
|
|
|
|
| 45 |
response = client.chat.completions.create(
|
| 46 |
model = "gpt-4o-mini",
|
| 47 |
response_format={"type": "json_object"},
|
|
@@ -134,10 +142,18 @@ def process_quotes(quotes_file_path: str, quotes_col_name: str, tags_string: str
|
|
| 134 |
quotes_data = quotes_df[quotes_col_name]
|
| 135 |
|
| 136 |
# Tag all the quotes one by one using tag_quote function
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
# Create hash table of tag occurrences using count_tags function
|
| 140 |
tags_counter_df = count_tags(tags_list, quotes_df['Tags'])
|
|
|
|
| 141 |
|
| 142 |
# Return only the quotes column and the new tags column
|
| 143 |
output_df = quotes_df[[quotes_col_name, 'Tags']]
|
|
@@ -145,6 +161,7 @@ def process_quotes(quotes_file_path: str, quotes_col_name: str, tags_string: str
|
|
| 145 |
with pd.ExcelWriter(output_file_path) as writer:
|
| 146 |
output_df.to_excel(writer, sheet_name='Coded Quotes', index=False)
|
| 147 |
tags_counter_df.to_excel(writer, sheet_name='Tag Count', index=False)
|
|
|
|
| 148 |
return output_df, tags_counter_df, output_file_path
|
| 149 |
|
| 150 |
# Define user interface structure
|
|
|
|
| 7 |
import json # For conversion of OpenAI responses into json/dictionary objects so the contents can be extracted
|
| 8 |
from dotenv import load_dotenv # For loading environment variables in local environment
|
| 9 |
from collections import Counter # For tabulating tag occurrences
|
| 10 |
+
import logging
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger()
|
| 13 |
+
logger.setLevel(logging.INFO)
|
| 14 |
+
logging.basicConfig(level=logging.INFO, force=True)
|
| 15 |
|
| 16 |
# Load environment variables from local .env file if it exists; otherwise this does nothing
|
| 17 |
load_dotenv()
|
|
|
|
| 19 |
# Import prompt for requesting the tags from OpenAI
|
| 20 |
with open("prompt.txt", "r") as prompt_file:
|
| 21 |
PROMPT = prompt_file.read()
|
| 22 |
+
logger.info(f"Loaded prompt: {PROMPT}")
|
| 23 |
|
| 24 |
# Import user instructions for display on screen
|
| 25 |
with open("user_instructions.txt", "r") as user_instruction_file:
|
|
|
|
| 31 |
organization=os.getenv('ORG_KEY'),
|
| 32 |
project=os.getenv('PROJ_KEY')
|
| 33 |
)
|
| 34 |
+
logger.info("Initialised OpenAI client")
|
| 35 |
|
| 36 |
# Function to send the prompt with quote and tag list to OpenAI and get the tags for that quote back
|
| 37 |
def tag_quote(quote: str, tags_list: list) -> list:
|
|
|
|
| 49 |
Returns:
|
| 50 |
list: A list of tags that are relevant to the quote, as determined by the model.
|
| 51 |
"""
|
| 52 |
+
logger.info(f"Tagging quote {quote}")
|
| 53 |
response = client.chat.completions.create(
|
| 54 |
model = "gpt-4o-mini",
|
| 55 |
response_format={"type": "json_object"},
|
|
|
|
| 142 |
quotes_data = quotes_df[quotes_col_name]
|
| 143 |
|
| 144 |
# Tag all the quotes one by one using tag_quote function
|
| 145 |
+
tags_column = []
|
| 146 |
+
for i, quote in enumerate(quotes_data):
|
| 147 |
+
logger.info(f"Tagging quote {i + 1}/{len(quotes_data)}: {quote}")
|
| 148 |
+
tags = tag_quote(quote, tags_list)
|
| 149 |
+
tags_column.append(tags)
|
| 150 |
+
|
| 151 |
+
quotes_df['Tags'] = tags_column
|
| 152 |
+
logger.info("Quotes tagged")
|
| 153 |
|
| 154 |
# Create hash table of tag occurrences using count_tags function
|
| 155 |
tags_counter_df = count_tags(tags_list, quotes_df['Tags'])
|
| 156 |
+
logger.info("Tags counted")
|
| 157 |
|
| 158 |
# Return only the quotes column and the new tags column
|
| 159 |
output_df = quotes_df[[quotes_col_name, 'Tags']]
|
|
|
|
| 161 |
with pd.ExcelWriter(output_file_path) as writer:
|
| 162 |
output_df.to_excel(writer, sheet_name='Coded Quotes', index=False)
|
| 163 |
tags_counter_df.to_excel(writer, sheet_name='Tag Count', index=False)
|
| 164 |
+
logger.info('Results written to Excel')
|
| 165 |
return output_df, tags_counter_df, output_file_path
|
| 166 |
|
| 167 |
# Define user interface structure
|