Spaces:

cogcorp
/

assignment1

Sleeping

cogcorp commited on May 30, 2023

Commit

d48fb2c

1 Parent(s): 9a56aa1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -79,10 +79,15 @@ def pdf_to_text(file, user_prompt):
     # Tokenize aggregated_text
     tokens = nltk.word_tokenize(aggregated_text)
     # Split into chunks if tokens are more than 4096
-    if len(tokens) > 4096:
         # Here you may choose the strategy that fits best.
         # For instance, the first 4096 tokens could be used.
-        tokens = tokens[:4096]
     # Create a single persona from all text
     persona = create_persona(' '.join(tokens))
     # Using OpenAI API
@@ -90,6 +95,7 @@ def pdf_to_text(file, user_prompt):
     return response
 iface = gr.Interface(
     fn=pdf_to_text,
     inputs=[

     # Tokenize aggregated_text
     tokens = nltk.word_tokenize(aggregated_text)
     # Split into chunks if tokens are more than 4096
+    while len(tokens) > 4096:
         # Here you may choose the strategy that fits best.
         # For instance, the first 4096 tokens could be used.
+        chunk = tokens[:4096]
+        chunk_text = ' '.join(chunk)
+        # Use OpenAI API to summarize the chunk
+        summary = call_openai_api("a professional summarizer", f"Please summarize this text: {chunk_text}")
+        # Replace the original chunk with the summary
+        tokens = summary.split() + tokens[4096:]
     # Create a single persona from all text
     persona = create_persona(' '.join(tokens))
     # Using OpenAI API
     return response
 iface = gr.Interface(
     fn=pdf_to_text,
     inputs=[