Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from PyPDF2 import PdfReader | |
| import zipfile | |
| import os | |
| import io | |
| import nltk | |
| import openai | |
| import time | |
| # Put your OpenAI API key here | |
| openai.api_key = os.getenv('OpenAPI') | |
| def call_openai_api(prompt): | |
| max_retries = 3 | |
| for attempt in range(max_retries): | |
| try: | |
| response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| ) | |
| return response['choices'][0]['message']['content'] | |
| except Exception as e: | |
| if attempt < max_retries - 1: # if it's not the last attempt | |
| time.sleep(1) # wait for 1 seconds before retrying | |
| continue | |
| else: | |
| return str(e) # return the exception message after the last attempt | |
| def pdf_to_text(file, user_prompt): | |
| z = zipfile.ZipFile(file.name, 'r') | |
| texts = [] | |
| for filename in z.namelist(): | |
| if filename.endswith('.pdf'): | |
| pdf_file_data = z.read(filename) | |
| pdf_file_io = io.BytesIO(pdf_file_data) | |
| pdf = PdfReader(pdf_file_io) | |
| text = '' | |
| for page in pdf.pages: | |
| text += page.extract_text() | |
| # Tokenize text | |
| tokens = nltk.word_tokenize(text) | |
| # If tokens are more than 2000, split into chunks | |
| if len(tokens) > 2000: | |
| for i in range(0, len(tokens), 2000): | |
| chunk = tokens[i:i + 2000] | |
| chunk_str = ' '.join(chunk) | |
| # Using OpenAI API | |
| response = call_openai_api(chunk_str) | |
| texts.append(response) | |
| else: | |
| # Using OpenAI API | |
| response = call_openai_api(text) | |
| texts.append(response) | |
| return '\n'.join(texts) | |
| iface = gr.Interface( | |
| fn=pdf_to_text, | |
| inputs=[ | |
| gr.inputs.File(label="PDF File", description="Upload a Zip file containing ONLY PDF files from which the knowledge will be extracted."), | |
| gr.inputs.Textbox(label="User Prompt", description="Enter a prompt to guide the AI's responses.") | |
| ], | |
| outputs=gr.outputs.Textbox(label="Extracted Text", description="Cognitive Agent response from the AI."), | |
| title="PDF Text Extractor", | |
| description="This Cognitive Agent allows you to prompt a corpus knowledge, uploaded as a single Zip file, using OpenAI's GPT-3 model." | |
| ) | |
| iface.launch(share=False) | |