Spaces:

dlflannery
/

AgentX

Running

App Files Files Community

dlflannery commited on Sep 15, 2025

Commit

beb50d0

verified ·

1 Parent(s): 519099f

Update app.py

Browse files

multiple file and image uploads

Files changed (1) hide show

app.py +95 -91

app.py CHANGED Viewed

@@ -183,33 +183,35 @@ def clean_up_files():
         except:
             pass
-def load_image(image, user, output_window):
     # status = #'OK, image is ready! Enter prompt and tap submit button'
     try:
         with open(image, 'rb') as image_file:
             base64_image = base64.b64encode(image_file.read()).decode('utf-8')
-        fpath = user + '_image.b64'
         with open(fpath, 'wt') as fp:
             fp.write(base64_image)
-            output_window += md('\nImage loaded\n')
     except:
         output_window = 'Unable to upload image'
-    return [fpath, output_window]
 def upload_image(user, password, output_window):
     if not credentials_ok(user, password):
         return [gr.Image(visible=False, interactive=True), "Incorrect user name and/or password"]
-    return [gr.Image(visible=True, interactive=True), output_window]
 def upload_file(user, password, output_window):
     if not credentials_ok(user, password):
         return [gr.File(visible=False, label='Upload File'), 'Incorrect user and/or password']
-    return [gr.File(visible=True, label='UploadFile'), output_window]
-def load_file(file_uploader, output_window):
     path = file_uploader
     fname = os.path.basename(path)
-    return [path, output_window + f'<BR>{fname} loaded<BR>',
            gr.File(visible=False, label='Upload File', type='filepath', value=None) ]
 def create_openai_container(name):
@@ -247,12 +249,12 @@ def list_openai_container_files(container_id):
 async def chat(prompt_window, user_window, password, history, output_window,
-              uploaded_image_file, uploaded_file_path, prior_inputs):
     file_download = gr.DownloadButton(label='Download File', visible=False, value=None)
     image_window = gr.Image(visible=False, value=None)
     if not credentials_ok(user_window, password):
-        return ['Invalid Credentials', prompt_window, uploaded_image_file,
                      image_window, file_download, history, uploaded_file_path, prior_inputs]
     instructions = '''
     You are a helpful assistant.
@@ -279,60 +281,62 @@ async def chat(prompt_window, user_window, password, history, output_window,
     # inputs = history.copy()
     inputs = prior_inputs
     file_input = ''
-    if uploaded_file_path != '':
-        ext = uploaded_file_path.casefold().split('.')[-1]
-        if ext == 'pdf':
-            client = OpenAI(api_key = OPENAI_API_KEY)
-            file = client.files.create(file=open(f'{uploaded_file_path}','rb'),
-                                      purpose='user_data',
-                                       expires_after={"seconds": 3600, "anchor": "created_at"})
-            file_input=(
-                {"role": "user",
-                "content": [
-                    {
-                        "type": "input_file",
-                        "file_id": file.id,
                     }
-                ]
-                }
-            )
-            inputs.append(file_input)
-        if ext in ['docx', 'txt', 'py']:
-            if ext == 'docx':
-                extracted_text = extract_text_from_docx(uploaded_file_path)
-            else:
-                with open(uploaded_file_path, 'rt') as fp:
-                    extracted_text = fp.read()
-            file_input=(
-                {"role": "user",
-                "content": [
-                    {
-                        "type": "input_text",
-                        "text": f"{extracted_text}",
                         }
                     ]
                 }
             )
-            inputs.append(file_input)
-        uploaded_file_path == ''
-    image_input = ''
-    if uploaded_image_file != '':
-        with open(uploaded_image_file, 'rt') as fp:
-            b64data = fp.read()
-        os.remove(uploaded_image_file)
-        uploaded_image_file = ''
-        image_input = (
-            {
-                "role": "user",
-                 "content": [
-                     {
-                         "type": "input_image",
-                         "image_url": f'data:image/jpeg;base64, {b64data}',
-                    }
-                ]
-            }
-        )
-        inputs.append(image_input)
     history.append({"role":"user", "content":prompt})
     inputs.append({"role":"user", "content":prompt})
     exception_msg = ''
@@ -385,12 +389,9 @@ async def chat(prompt_window, user_window, password, history, output_window,
                         fp.write(fdata)
                         file_download = gr.DownloadButton(label=f'Download {download_ext.upper()} Doc',
                                         visible=True, value=f'./document.{download_ext}')
-    return [response, '', uploaded_image_file, image_window, file_download, history,
             uploaded_file_path, new_inputs]
-# outputs=[history, output_window, prompt_window, uploaded_image_file,
-#                      image_window, file_download])
 def show_help():
     txt = '''
     This is an agent using the OpenAI Python Agents SDK.
@@ -398,34 +399,35 @@ def show_help():
         * Search the Web
         * Compute straight-line distances between locations
         * Analyze images you upload.
-        * Create and display images you describe, which you can download
-        * Get news from the web
-        * Make PDF's based on results it generated.
     Agents perform multiple steps using tools as necessary to satisfy a single request.
     1.  Gemeral:
         1.1 Login with user name and password (not case-sensitive)
         1.2 Type prompts (questions, instructions) into "Prompt or Question" window.
     2.  Chat:
-        2.1 Enter prompt and tap the "Submit Prompt/Question" button.  The responses appear in the Dialog window.
-        2.2 Enter follow-up questions in the Prompt window. Tap "Submit Prompt/Question".
-        2.3 If topic changes or when done chatting, tap the "Start New Session" button.
     3.  Make Image:
-        3.1 Include description of desired image in prompt window.
         3.2 Tap the "Submit Prompt/Question" button. This can take a few seconds.
         3.3 There is a download button on the image display if your system supports file downloads.
         3.4 When done viewing image, tap the "Start New Session" button
-    4.  Analyze an Image you provide:
-        4.1 Tap the "Upload Image to Analyze" button.
-        4.2 An empty image box will appear lower left. Drag or upload image into it. It offers web cam or camera
-               input also.
-        4.3 The image should appear. This can take some time with a slow internet connection and large image.
-        4.4 Enter what you want done with the image in the "Prompt or Question" box.
-        4.5 Tap the "Submit Prompt/Question" button to start the analysis.  This initiates a chat dialog and
-               you can ask follow-up questions. However, the image is not re-analyzed for follow-up dialog.
-    Hint:
         Better  results are obtained by including detailed descriptions and instructions
             of what you want in the prompt.
     '''
     return str(txt).replace('```', ' ').replace('  ', '&nbsp;&nbsp;').replace('  ', '&nbsp;&nbsp;').replace('  ', '&nbsp;&nbsp;').replace('\n','<br>')
@@ -434,7 +436,7 @@ def new_session(user_window, history):
     history = []
     return [prompt_window, history, 'Session cleared',
            gr.Image(visible=False, value=None),
-           gr.Image(visible=False, value=None), '',
            gr.DownloadButton(label='Download File', visible=False, value=None),
            gr.File(visible=False, label='Upload File', type='filepath'), [] ]
@@ -442,8 +444,8 @@ def new_session(user_window, history):
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     password = gr.State("")
     user = gr.State("unknown")
-    uploaded_image_file = gr.State('')
-    uploaded_file_path = gr.State('')
     history = gr.State([])
     inputs = gr.State([])
@@ -467,32 +469,34 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     file_download = gr.DownloadButton(label='Download File', visible=False, value=None)
     with gr.Row():
         with gr.Column():
-            image_window2 = gr.Image(visible=False, interactive=True, label='Image to Analyze', type='filepath')
         with gr.Column():
             image_window = gr.Image(visible=False, label='Generated Image')
     with gr.Row():
         file_uploader = gr.File(visible=False, label='Upload File', type='filepath')
     submit_button.click(chat,
              inputs=[prompt_window, user_window, password, history, output_window,
-                    uploaded_image_file, uploaded_file_path, inputs],
-             outputs=[output_window, prompt_window, uploaded_image_file,
-                     image_window, file_download, history, uploaded_file_path, inputs])
     clear_button.click(fn=new_session, inputs=[user_window, history],
                       outputs=[prompt_window, history, output_window,
                       image_window, image_window2,
-                      uploaded_image_file, file_download, file_uploader, inputs])
     help_button.click(fn=show_help, outputs=output_window)
     button_get_image.click(fn=upload_image,inputs = [user, password, output_window],
                           outputs = [image_window2, output_window])
-    image_window2.upload(fn=load_image, inputs=[image_window2, user, output_window],
-                        outputs=[uploaded_image_file, output_window])
     pwd_window.blur(updatePassword,
                    inputs = [pwd_window, user],
                    outputs = [password, pwd_window, button_upload_file, button_get_image])
     button_upload_file.click(fn=upload_file, inputs=[user, password, output_window],
                             outputs=[file_uploader, output_window])
-    file_uploader.upload(fn=load_file, inputs=[file_uploader, output_window],
-                        outputs=[uploaded_file_path, output_window, file_uploader])
 # demo.launch(share=True, allowed_paths=[dataDir], ssr_mode=False)
     # demo.load(delete_db_files)
     demo.unload(clean_up_files)

         except:
             pass
+def load_image(image, user, output_window, uploaded_image_files):
     # status = #'OK, image is ready! Enter prompt and tap submit button'
     try:
         with open(image, 'rb') as image_file:
             base64_image = base64.b64encode(image_file.read()).decode('utf-8')
+        fpath = f'{user}_image{len(uploaded_image_files)}.b64'
         with open(fpath, 'wt') as fp:
             fp.write(base64_image)
+            output_window += md(f'\nImage {os.path.basename(image)} loaded\n')
+            uploaded_image_files.append(fpath)
     except:
         output_window = 'Unable to upload image'
+    return [uploaded_image_files, output_window]
 def upload_image(user, password, output_window):
     if not credentials_ok(user, password):
         return [gr.Image(visible=False, interactive=True), "Incorrect user name and/or password"]
+    return [gr.Image(visible=True, interactive=True, value=None), output_window]
 def upload_file(user, password, output_window):
     if not credentials_ok(user, password):
         return [gr.File(visible=False, label='Upload File'), 'Incorrect user and/or password']
+    return [gr.File(visible=True, label='UploadFile', value=None), output_window]
+def load_file(file_uploader, output_window, uploaded_file_paths):
     path = file_uploader
     fname = os.path.basename(path)
+    uploaded_file_paths.append(path)
+    return [uploaded_file_paths, output_window + f'<br>{fname} loaded<br>',
            gr.File(visible=False, label='Upload File', type='filepath', value=None) ]
 def create_openai_container(name):
 async def chat(prompt_window, user_window, password, history, output_window,
+              uploaded_image_files, uploaded_file_paths, prior_inputs):
     file_download = gr.DownloadButton(label='Download File', visible=False, value=None)
     image_window = gr.Image(visible=False, value=None)
     if not credentials_ok(user_window, password):
+        return ['Invalid Credentials', prompt_window, uploaded_image_files,
                      image_window, file_download, history, uploaded_file_path, prior_inputs]
     instructions = '''
     You are a helpful assistant.
     # inputs = history.copy()
     inputs = prior_inputs
     file_input = ''
+    if len(uploaded_file_paths) > 0:
+        for uploaded_file_path in uploaded_file_paths:
+            ext = uploaded_file_path.casefold().split('.')[-1]
+            if ext == 'pdf':
+                client = OpenAI(api_key = OPENAI_API_KEY)
+                file = client.files.create(file=open(f'{uploaded_file_path}','rb'),
+                                          purpose='user_data',
+                                           expires_after={"seconds": 3600, "anchor": "created_at"})
+                file_input=(
+                    {"role": "user",
+                    "content": [
+                        {
+                            "type": "input_file",
+                            "file_id": file.id,
+                        }
+                    ]
                     }
+                )
+                inputs.append(file_input)
+            if ext in ['docx', 'txt', 'py']:
+                if ext == 'docx':
+                    extracted_text = extract_text_from_docx(uploaded_file_path)
+                else:
+                    with open(uploaded_file_path, 'rt') as fp:
+                        extracted_text = fp.read()
+                file_input=(
+                    {"role": "user",
+                    "content": [
+                        {
+                            "type": "input_text",
+                            "text": f"{extracted_text}",
+                            }
+                        ]
+                    }
+                )
+                inputs.append(file_input)
+        uploaded_file_paths = []
+    image_input = ''
+    if len(uploaded_image_files) > 0:
+        for file in uploaded_image_files:
+            with open(file, 'rt') as fp:
+                b64data = fp.read()
+            os.remove(file)
+            image_input = (
+                {
+                    "role": "user",
+                     "content": [
+                         {
+                             "type": "input_image",
+                             "image_url": f'data:image/jpeg;base64, {b64data}',
                         }
                     ]
                 }
             )
+            inputs.append(image_input)
+        uploaded_image_files = []
     history.append({"role":"user", "content":prompt})
     inputs.append({"role":"user", "content":prompt})
     exception_msg = ''
                         fp.write(fdata)
                         file_download = gr.DownloadButton(label=f'Download {download_ext.upper()} Doc',
                                         visible=True, value=f'./document.{download_ext}')
+    return [response, '', uploaded_image_files, image_window, file_download, history,
             uploaded_file_path, new_inputs]
 def show_help():
     txt = '''
     This is an agent using the OpenAI Python Agents SDK.
         * Search the Web
         * Compute straight-line distances between locations
         * Analyze images you upload.
+        * Create and display images you describe, which you can download.
+        * Use uploaded images and documents as context. (.txt., .pdf, .docx, .py)
+        * Get news from the web.
+        * Make PDF's, Word Documents and Excel spreadsheets based on results it generated.
     Agents perform multiple steps using tools as necessary to satisfy a single request.
     1.  Gemeral:
         1.1 Login with user name and password (not case-sensitive)
         1.2 Type prompts (questions, instructions) into "Prompt or Question" window.
     2.  Chat:
+        2.1 Upload any image(s) and/or documents (files) you want the agent to consider, using
+             the "Upload Image to Analyze" and "Upload Input File" buttons.
+        2.2 Enter prompt/question and tap the "Submit Prompt/Question" button.  The responses appear
+              in the Dialog window.
+        2.3 Continue your session by optionally uploading more files and/or images and entering a
+             new prompt/question.  The agent remembers past inputs and responses until you tap
+             the "Start New Session" button.
+        2.4 If topic changes or when done chatting, tap the "Start New Session" button.
     3.  Make Image:
+        3.1 Include description of desired image in prompt window.  If desired, uploaded images and
+             files can also be used.
         3.2 Tap the "Submit Prompt/Question" button. This can take a few seconds.
         3.3 There is a download button on the image display if your system supports file downloads.
         3.4 When done viewing image, tap the "Start New Session" button
+    Hints:
         Better  results are obtained by including detailed descriptions and instructions
             of what you want in the prompt.
+        Start a new session whenever memory of previous inputs and responses is no longer
+            needed as context.  The agent can only remember so much.
     '''
     return str(txt).replace('```', ' ').replace('  ', '&nbsp;&nbsp;').replace('  ', '&nbsp;&nbsp;').replace('  ', '&nbsp;&nbsp;').replace('\n','<br>')
     history = []
     return [prompt_window, history, 'Session cleared',
            gr.Image(visible=False, value=None),
+           gr.Image(visible=False, value=None), [],
            gr.DownloadButton(label='Download File', visible=False, value=None),
            gr.File(visible=False, label='Upload File', type='filepath'), [] ]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     password = gr.State("")
     user = gr.State("unknown")
+    uploaded_image_files = gr.State([])
+    uploaded_file_paths = gr.State([])
     history = gr.State([])
     inputs = gr.State([])
     file_download = gr.DownloadButton(label='Download File', visible=False, value=None)
     with gr.Row():
         with gr.Column():
+            image_window2 = gr.Image(visible=False, interactive=True, label='Image to Analyze',
+                                    type='filepath')
         with gr.Column():
             image_window = gr.Image(visible=False, label='Generated Image')
     with gr.Row():
         file_uploader = gr.File(visible=False, label='Upload File', type='filepath')
     submit_button.click(chat,
              inputs=[prompt_window, user_window, password, history, output_window,
+                    uploaded_image_files, uploaded_file_paths, inputs],
+             outputs=[output_window, prompt_window, uploaded_image_files,
+                     image_window, file_download, history, uploaded_file_paths, inputs])
     clear_button.click(fn=new_session, inputs=[user_window, history],
                       outputs=[prompt_window, history, output_window,
                       image_window, image_window2,
+                      uploaded_image_files, file_download, file_uploader, inputs])
     help_button.click(fn=show_help, outputs=output_window)
     button_get_image.click(fn=upload_image,inputs = [user, password, output_window],
                           outputs = [image_window2, output_window])
+    image_window2.upload(fn=load_image,
+                        inputs=[image_window2, user, output_window, uploaded_image_files],
+                        outputs=[uploaded_image_files, output_window])
     pwd_window.blur(updatePassword,
                    inputs = [pwd_window, user],
                    outputs = [password, pwd_window, button_upload_file, button_get_image])
     button_upload_file.click(fn=upload_file, inputs=[user, password, output_window],
                             outputs=[file_uploader, output_window])
+    file_uploader.upload(fn=load_file, inputs=[file_uploader, output_window, uploaded_file_paths],
+                        outputs=[uploaded_file_paths, output_window, file_uploader])
 # demo.launch(share=True, allowed_paths=[dataDir], ssr_mode=False)
     # demo.load(delete_db_files)
     demo.unload(clean_up_files)