Spaces:

Perceptechai
/

code-tagging

Sleeping

kenleeyx commited on May 20, 2025

Commit

95e3994

1 Parent(s): 243f5cb

feat: add per-quote logging

Add logging to track progress. Change from applying the tag_quote func
to using an explicit for loop to deliver logs after each quote.

Files changed (1) hide show

app.py +18 -1

app.py CHANGED Viewed

@@ -7,6 +7,11 @@ import openpyxl # Requirement for reading Excel files into pandas Dataframes
 import json # For conversion of OpenAI responses into json/dictionary objects so the contents can be extracted
 from dotenv import load_dotenv # For loading environment variables in local environment
 from collections import Counter # For tabulating tag occurrences
 # Load environment variables from local .env file if it exists; otherwise this does nothing
 load_dotenv()
@@ -14,6 +19,7 @@ load_dotenv()
 # Import prompt for requesting the tags from OpenAI
 with open("prompt.txt", "r") as prompt_file:
     PROMPT = prompt_file.read()
 # Import user instructions for display on screen
 with open("user_instructions.txt", "r") as user_instruction_file:
@@ -25,6 +31,7 @@ client = OpenAI(
   organization=os.getenv('ORG_KEY'),
   project=os.getenv('PROJ_KEY')
 )
 # Function to send the prompt with quote and tag list to OpenAI and get the tags for that quote back
 def tag_quote(quote: str, tags_list: list) -> list:
@@ -42,6 +49,7 @@ def tag_quote(quote: str, tags_list: list) -> list:
     Returns:
         list: A list of tags that are relevant to the quote, as determined by the model.
     """
     response = client.chat.completions.create(
         model = "gpt-4o-mini",
         response_format={"type": "json_object"},
@@ -134,10 +142,18 @@ def process_quotes(quotes_file_path: str, quotes_col_name: str, tags_string: str
     quotes_data = quotes_df[quotes_col_name]
     # Tag all the quotes one by one using tag_quote function
-    quotes_df['Tags'] = quotes_data.apply(tag_quote, args=(tags_list,))
     # Create hash table of tag occurrences using count_tags function
     tags_counter_df = count_tags(tags_list, quotes_df['Tags'])
     # Return only the quotes column and the new tags column
     output_df = quotes_df[[quotes_col_name, 'Tags']]
@@ -145,6 +161,7 @@ def process_quotes(quotes_file_path: str, quotes_col_name: str, tags_string: str
     with pd.ExcelWriter(output_file_path) as writer:
         output_df.to_excel(writer, sheet_name='Coded Quotes', index=False)
         tags_counter_df.to_excel(writer, sheet_name='Tag Count', index=False)
     return output_df, tags_counter_df, output_file_path
 # Define user interface structure

 import json # For conversion of OpenAI responses into json/dictionary objects so the contents can be extracted
 from dotenv import load_dotenv # For loading environment variables in local environment
 from collections import Counter # For tabulating tag occurrences
+import logging
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+logging.basicConfig(level=logging.INFO, force=True)
 # Load environment variables from local .env file if it exists; otherwise this does nothing
 load_dotenv()
 # Import prompt for requesting the tags from OpenAI
 with open("prompt.txt", "r") as prompt_file:
     PROMPT = prompt_file.read()
+    logger.info(f"Loaded prompt: {PROMPT}")
 # Import user instructions for display on screen
 with open("user_instructions.txt", "r") as user_instruction_file:
   organization=os.getenv('ORG_KEY'),
   project=os.getenv('PROJ_KEY')
 )
+logger.info("Initialised OpenAI client")
 # Function to send the prompt with quote and tag list to OpenAI and get the tags for that quote back
 def tag_quote(quote: str, tags_list: list) -> list:
     Returns:
         list: A list of tags that are relevant to the quote, as determined by the model.
     """
+    logger.info(f"Tagging quote {quote}")
     response = client.chat.completions.create(
         model = "gpt-4o-mini",
         response_format={"type": "json_object"},
     quotes_data = quotes_df[quotes_col_name]
     # Tag all the quotes one by one using tag_quote function
+    tags_column = []
+    for i, quote in enumerate(quotes_data):
+        logger.info(f"Tagging quote {i + 1}/{len(quotes_data)}: {quote}")
+        tags = tag_quote(quote, tags_list)
+        tags_column.append(tags)
+    quotes_df['Tags'] = tags_column
+    logger.info("Quotes tagged")
     # Create hash table of tag occurrences using count_tags function
     tags_counter_df = count_tags(tags_list, quotes_df['Tags'])
+    logger.info("Tags counted")
     # Return only the quotes column and the new tags column
     output_df = quotes_df[[quotes_col_name, 'Tags']]
     with pd.ExcelWriter(output_file_path) as writer:
         output_df.to_excel(writer, sheet_name='Coded Quotes', index=False)
         tags_counter_df.to_excel(writer, sheet_name='Tag Count', index=False)
+    logger.info('Results written to Excel')
     return output_df, tags_counter_df, output_file_path
 # Define user interface structure