Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -50,8 +50,8 @@ def call_openai_api(persona, user_prompt):
|
|
| 50 |
response = openai.ChatCompletion.create(
|
| 51 |
model="gpt-3.5-turbo",
|
| 52 |
messages=[
|
| 53 |
-
{"role": "system", "content": f"You are a knowledgeable assistant
|
| 54 |
-
{"role": "user", "content": user_prompt},
|
| 55 |
]
|
| 56 |
)
|
| 57 |
return response['choices'][0]['message']['content']
|
|
@@ -72,8 +72,15 @@ def pdf_to_text(file, user_prompt):
|
|
| 72 |
pdf = PdfReader(pdf_file_io)
|
| 73 |
for page in pdf.pages:
|
| 74 |
aggregated_text += page.extract_text()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
# Create a single persona from all text
|
| 76 |
-
persona = create_persona(
|
| 77 |
# Using OpenAI API
|
| 78 |
response = call_openai_api(persona, user_prompt)
|
| 79 |
return response
|
|
|
|
| 50 |
response = openai.ChatCompletion.create(
|
| 51 |
model="gpt-3.5-turbo",
|
| 52 |
messages=[
|
| 53 |
+
{"role": "system", "content": f"You are a knowledgeable assistant that provides short factual answers"},
|
| 54 |
+
{"role": "user", "content": f"{persona}{user_prompt}",
|
| 55 |
]
|
| 56 |
)
|
| 57 |
return response['choices'][0]['message']['content']
|
|
|
|
| 72 |
pdf = PdfReader(pdf_file_io)
|
| 73 |
for page in pdf.pages:
|
| 74 |
aggregated_text += page.extract_text()
|
| 75 |
+
# Tokenize aggregated_text
|
| 76 |
+
tokens = nltk.word_tokenize(aggregated_text)
|
| 77 |
+
# Split into chunks if tokens are more than 4096
|
| 78 |
+
if len(tokens) > 4096:
|
| 79 |
+
# Here you may choose the strategy that fits best.
|
| 80 |
+
# For instance, the first 4096 tokens could be used.
|
| 81 |
+
tokens = tokens[:4096]
|
| 82 |
# Create a single persona from all text
|
| 83 |
+
persona = create_persona(' '.join(tokens))
|
| 84 |
# Using OpenAI API
|
| 85 |
response = call_openai_api(persona, user_prompt)
|
| 86 |
return response
|