Mixtral-TLDR-Web-dev-dev

Sleeping

App Files Files Community

Omnibus commited on Jan 4, 2024

Commit

e06a544

1 Parent(s): 917f3b4

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -4

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ from huggingface_hub import InferenceClient,HfApi
 import random
 import json
 import datetime
 #from query import tasks
 from agent import (
     PREFIX,
@@ -89,7 +91,6 @@ def read_txt(txt_path):
     return text
 def read_pdf(pdf_path):
-    from pypdf import PdfReader
     text=""
     reader = PdfReader(f'{pdf_path}')
     number_of_pages = len(reader.pages)
@@ -100,6 +101,26 @@ def read_pdf(pdf_path):
     return text
 VERBOSE = True
 MAX_HISTORY = 100
 MAX_DATA = 25000
@@ -207,12 +228,15 @@ def compress_data(c, instruct, history):
-def summarize(inp,history,data=None,file=None,url=None):
     if inp == "":
         inp = "Process this data"
     history.clear()
     history = [(inp,"Working on it...")]
     yield "",history
     if url != "":
         val, out = find_all(url)
         if not val:
@@ -280,10 +304,11 @@ with gr.Blocks() as app:
             file=gr.File(label="Input File (.pdf .txt)")
         with gr.Tab("URL"):
             url = gr.Textbox(label="URL")
     #text=gr.JSON()
     #inp_query.change(search_models,inp_query,models_dd)
     clear_btn.click(clear_fn,None,[prompt,chatbot])
-    go=button.click(summarize,[prompt,chatbot,data,file,url],[prompt,chatbot])
     stop_button.click(None,None,None,cancels=[go])
 app.launch(server_port=7860,show_api=False)

 import random
 import json
 import datetime
+from pypdf import PdfReader
+import uuid
 #from query import tasks
 from agent import (
     PREFIX,
     return text
 def read_pdf(pdf_path):
     text=""
     reader = PdfReader(f'{pdf_path}')
     number_of_pages = len(reader.pages)
     return text
+def read_pdf_online(url):
+    uid=uuid.uuid4()
+    response = requests.get(url, stream=True)
+    if response.status_code == 200:
+        with open(f"{uid}.pdf", "wb") as f:
+            f.write(response.content)
+        f.close()
+        #out = Path("./data.pdf")
+        #print (out)
+        reader = PdfReader(f"{uid}.pdf")
+        number_of_pages = len(reader.pages)
+        for i in range(number_of_pages-1):
+            page = reader.pages[i]
+            text = f'{text}\n{page.extract_text()}'
+        print (text)
+    else:
+        text = response.status_code
+        print(text)
+    return text
 VERBOSE = True
 MAX_HISTORY = 100
 MAX_DATA = 25000
+def summarize(inp,history,data=None,file=None,url=None,pdf_url=None):
     if inp == "":
         inp = "Process this data"
     history.clear()
     history = [(inp,"Working on it...")]
     yield "",history
+    if pdf_url.startswith("http"):
+        out = read_pdf_online(url)
+        data=out
     if url != "":
         val, out = find_all(url)
         if not val:
             file=gr.File(label="Input File (.pdf .txt)")
         with gr.Tab("URL"):
             url = gr.Textbox(label="URL")
+        with gr.Tab("PDF URL"):
+            pdf_url = gr.Textbox(label="PDF URL")
     #text=gr.JSON()
     #inp_query.change(search_models,inp_query,models_dd)
     clear_btn.click(clear_fn,None,[prompt,chatbot])
+    go=button.click(summarize,[prompt,chatbot,data,file,url,pdf_url],[prompt,chatbot])
     stop_button.click(None,None,None,cancels=[go])
 app.launch(server_port=7860,show_api=False)