BtB-ExpC commited on
Commit
480ddb6
·
verified ·
1 Parent(s): 6d1b6ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -5,13 +5,19 @@ import os
5
  openai.api_key = os.environ["OpenAPI_Key"]
6
 
7
  def chunk_HTMLs(text, delimiter="HTML (ID:"):
8
- """ This function splits the text into chunks based on a delimiter, removes trailing newlines from each chunk,
9
- ignores any text before the first delimiter, and stores the chunks in a list. """
10
  HTMLs = text.split(delimiter)
11
- # Exclude the first chunk if it doesn't start with the delimiter
12
- HTMLs = HTMLs[1:] if HTMLs[0] else HTMLs
13
- # Remove trailing newlines, prepend the delimiter, and store the chunks in a list
14
- return [delimiter + HTML.strip() for HTML in HTMLs]
 
 
 
 
 
 
 
15
 
16
  def process_course(input_text):
17
  """Processes input text, extracts and chunks it, and sends batches to the OpenAI model for analysis based on IDs."""
 
5
  openai.api_key = os.environ["OpenAPI_Key"]
6
 
7
  def chunk_HTMLs(text, delimiter="HTML (ID:"):
8
+ """ Splits the text into chunks based on a delimiter, extracts the ID, and stores chunks in a dictionary. """
 
9
  HTMLs = text.split(delimiter)
10
+ HTMLs = HTMLs[1:] if HTMLs[0] else HTMLs # Ignore any text before the first delimiter
11
+
12
+ html_dict = {}
13
+ for html in HTMLs:
14
+ end_id_index = html.find(")") # Find the end of the ID
15
+ if end_id_index != -1:
16
+ html_id = html[:end_id_index].strip() # Extract ID
17
+ html_content = html[end_id_index+1:].strip() # Get the content after the ID
18
+ html_dict[html_id] = delimiter + "HTML (ID:" + html_id + ") " + html_content # Store in dictionary with full label
19
+
20
+ return html_dict
21
 
22
  def process_course(input_text):
23
  """Processes input text, extracts and chunks it, and sends batches to the OpenAI model for analysis based on IDs."""