Spaces:

BtB-ExpC
/

PerspectiveChanger

Sleeping

BtB-ExpC commited on Apr 25, 2024

Commit

480ddb6

verified ·

1 Parent(s): 6d1b6ab

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,13 +5,19 @@ import os
 openai.api_key = os.environ["OpenAPI_Key"]
 def chunk_HTMLs(text, delimiter="HTML (ID:"):
-    """ This function splits the text into chunks based on a delimiter, removes trailing newlines from each chunk,
-        ignores any text before the first delimiter, and stores the chunks in a list. """
     HTMLs = text.split(delimiter)
-    # Exclude the first chunk if it doesn't start with the delimiter
-    HTMLs = HTMLs[1:] if HTMLs[0] else HTMLs
-    # Remove trailing newlines, prepend the delimiter, and store the chunks in a list
-    return [delimiter + HTML.strip() for HTML in HTMLs]
 def process_course(input_text):
     """Processes input text, extracts and chunks it, and sends batches to the OpenAI model for analysis based on IDs."""

 openai.api_key = os.environ["OpenAPI_Key"]
 def chunk_HTMLs(text, delimiter="HTML (ID:"):
+    """ Splits the text into chunks based on a delimiter, extracts the ID, and stores chunks in a dictionary. """
     HTMLs = text.split(delimiter)
+    HTMLs = HTMLs[1:] if HTMLs[0] else HTMLs  # Ignore any text before the first delimiter
+    html_dict = {}
+    for html in HTMLs:
+        end_id_index = html.find(")")  # Find the end of the ID
+        if end_id_index != -1:
+            html_id = html[:end_id_index].strip()  # Extract ID
+            html_content = html[end_id_index+1:].strip()  # Get the content after the ID
+            html_dict[html_id] = delimiter + "HTML (ID:" + html_id + ") " + html_content  # Store in dictionary with full label
+    return html_dict
 def process_course(input_text):
     """Processes input text, extracts and chunks it, and sends batches to the OpenAI model for analysis based on IDs."""