kenleeyx commited on
Commit
95e3994
·
1 Parent(s): 243f5cb

feat: add per-quote logging

Browse files

Add logging to track progress. Change from applying the tag_quote func
to using an explicit for loop to deliver logs after each quote.

Files changed (1) hide show
  1. app.py +18 -1
app.py CHANGED
@@ -7,6 +7,11 @@ import openpyxl # Requirement for reading Excel files into pandas Dataframes
7
  import json # For conversion of OpenAI responses into json/dictionary objects so the contents can be extracted
8
  from dotenv import load_dotenv # For loading environment variables in local environment
9
  from collections import Counter # For tabulating tag occurrences
 
 
 
 
 
10
 
11
  # Load environment variables from local .env file if it exists; otherwise this does nothing
12
  load_dotenv()
@@ -14,6 +19,7 @@ load_dotenv()
14
  # Import prompt for requesting the tags from OpenAI
15
  with open("prompt.txt", "r") as prompt_file:
16
  PROMPT = prompt_file.read()
 
17
 
18
  # Import user instructions for display on screen
19
  with open("user_instructions.txt", "r") as user_instruction_file:
@@ -25,6 +31,7 @@ client = OpenAI(
25
  organization=os.getenv('ORG_KEY'),
26
  project=os.getenv('PROJ_KEY')
27
  )
 
28
 
29
  # Function to send the prompt with quote and tag list to OpenAI and get the tags for that quote back
30
  def tag_quote(quote: str, tags_list: list) -> list:
@@ -42,6 +49,7 @@ def tag_quote(quote: str, tags_list: list) -> list:
42
  Returns:
43
  list: A list of tags that are relevant to the quote, as determined by the model.
44
  """
 
45
  response = client.chat.completions.create(
46
  model = "gpt-4o-mini",
47
  response_format={"type": "json_object"},
@@ -134,10 +142,18 @@ def process_quotes(quotes_file_path: str, quotes_col_name: str, tags_string: str
134
  quotes_data = quotes_df[quotes_col_name]
135
 
136
  # Tag all the quotes one by one using tag_quote function
137
- quotes_df['Tags'] = quotes_data.apply(tag_quote, args=(tags_list,))
 
 
 
 
 
 
 
138
 
139
  # Create hash table of tag occurrences using count_tags function
140
  tags_counter_df = count_tags(tags_list, quotes_df['Tags'])
 
141
 
142
  # Return only the quotes column and the new tags column
143
  output_df = quotes_df[[quotes_col_name, 'Tags']]
@@ -145,6 +161,7 @@ def process_quotes(quotes_file_path: str, quotes_col_name: str, tags_string: str
145
  with pd.ExcelWriter(output_file_path) as writer:
146
  output_df.to_excel(writer, sheet_name='Coded Quotes', index=False)
147
  tags_counter_df.to_excel(writer, sheet_name='Tag Count', index=False)
 
148
  return output_df, tags_counter_df, output_file_path
149
 
150
  # Define user interface structure
 
7
  import json # For conversion of OpenAI responses into json/dictionary objects so the contents can be extracted
8
  from dotenv import load_dotenv # For loading environment variables in local environment
9
  from collections import Counter # For tabulating tag occurrences
10
+ import logging
11
+
12
+ logger = logging.getLogger()
13
+ logger.setLevel(logging.INFO)
14
+ logging.basicConfig(level=logging.INFO, force=True)
15
 
16
  # Load environment variables from local .env file if it exists; otherwise this does nothing
17
  load_dotenv()
 
19
  # Import prompt for requesting the tags from OpenAI
20
  with open("prompt.txt", "r") as prompt_file:
21
  PROMPT = prompt_file.read()
22
+ logger.info(f"Loaded prompt: {PROMPT}")
23
 
24
  # Import user instructions for display on screen
25
  with open("user_instructions.txt", "r") as user_instruction_file:
 
31
  organization=os.getenv('ORG_KEY'),
32
  project=os.getenv('PROJ_KEY')
33
  )
34
+ logger.info("Initialised OpenAI client")
35
 
36
  # Function to send the prompt with quote and tag list to OpenAI and get the tags for that quote back
37
  def tag_quote(quote: str, tags_list: list) -> list:
 
49
  Returns:
50
  list: A list of tags that are relevant to the quote, as determined by the model.
51
  """
52
+ logger.info(f"Tagging quote {quote}")
53
  response = client.chat.completions.create(
54
  model = "gpt-4o-mini",
55
  response_format={"type": "json_object"},
 
142
  quotes_data = quotes_df[quotes_col_name]
143
 
144
  # Tag all the quotes one by one using tag_quote function
145
+ tags_column = []
146
+ for i, quote in enumerate(quotes_data):
147
+ logger.info(f"Tagging quote {i + 1}/{len(quotes_data)}: {quote}")
148
+ tags = tag_quote(quote, tags_list)
149
+ tags_column.append(tags)
150
+
151
+ quotes_df['Tags'] = tags_column
152
+ logger.info("Quotes tagged")
153
 
154
  # Create hash table of tag occurrences using count_tags function
155
  tags_counter_df = count_tags(tags_list, quotes_df['Tags'])
156
+ logger.info("Tags counted")
157
 
158
  # Return only the quotes column and the new tags column
159
  output_df = quotes_df[[quotes_col_name, 'Tags']]
 
161
  with pd.ExcelWriter(output_file_path) as writer:
162
  output_df.to_excel(writer, sheet_name='Coded Quotes', index=False)
163
  tags_counter_df.to_excel(writer, sheet_name='Tag Count', index=False)
164
+ logger.info('Results written to Excel')
165
  return output_df, tags_counter_df, output_file_path
166
 
167
  # Define user interface structure