Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,13 +5,19 @@ import os
|
|
| 5 |
openai.api_key = os.environ["OpenAPI_Key"]
|
| 6 |
|
| 7 |
def chunk_HTMLs(text, delimiter="HTML (ID:"):
|
| 8 |
-
"""
|
| 9 |
-
ignores any text before the first delimiter, and stores the chunks in a list. """
|
| 10 |
HTMLs = text.split(delimiter)
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
def process_course(input_text):
|
| 17 |
"""Processes input text, extracts and chunks it, and sends batches to the OpenAI model for analysis based on IDs."""
|
|
|
|
| 5 |
openai.api_key = os.environ["OpenAPI_Key"]
|
| 6 |
|
| 7 |
def chunk_HTMLs(text, delimiter="HTML (ID:"):
|
| 8 |
+
""" Splits the text into chunks based on a delimiter, extracts the ID, and stores chunks in a dictionary. """
|
|
|
|
| 9 |
HTMLs = text.split(delimiter)
|
| 10 |
+
HTMLs = HTMLs[1:] if HTMLs[0] else HTMLs # Ignore any text before the first delimiter
|
| 11 |
+
|
| 12 |
+
html_dict = {}
|
| 13 |
+
for html in HTMLs:
|
| 14 |
+
end_id_index = html.find(")") # Find the end of the ID
|
| 15 |
+
if end_id_index != -1:
|
| 16 |
+
html_id = html[:end_id_index].strip() # Extract ID
|
| 17 |
+
html_content = html[end_id_index+1:].strip() # Get the content after the ID
|
| 18 |
+
html_dict[html_id] = delimiter + "HTML (ID:" + html_id + ") " + html_content # Store in dictionary with full label
|
| 19 |
+
|
| 20 |
+
return html_dict
|
| 21 |
|
| 22 |
def process_course(input_text):
|
| 23 |
"""Processes input text, extracts and chunks it, and sends batches to the OpenAI model for analysis based on IDs."""
|