Spaces:

Omnibus
/

pdf-reader

Sleeping

App Files Files Community

Omnibus commited on Jul 22, 2023

Commit

d02b2ab

1 Parent(s): 2d1281f

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -64

app.py CHANGED Viewed

@@ -1,78 +1,16 @@
 import gradio as gr
-from bs4 import BeautifulSoup as bs
-from pypdf import PdfReader
-from pathlib import Path
-import os
-import sys
-#import html5lib
-#import copy
-import requests
-#from IPython.display import IFrame
 def scrape(instring):
     html_src=(f'''
     <div style="text-align:center">
     <h4>Pdf viewer testing</h4>
-    <iframe src="https://docs.google.com/viewer?url={instring}&embedded=true" frameborder="0" height="500px" width="100%"></iframe>
     </div>''')
     return gr.HTML.update(f'''{html_src}''')
-def scrape00(instring):
-    response = requests.get(instring, stream=True)
-    if response.status_code == 200:
-        with open("data.pdf", "wb") as f:
-            f.write(response.content)
-    else:
-        print(response.status_code)
-    out = Path("./data.pdf")
-    print (out)
-    reader = PdfReader("data.pdf")
-    number_of_pages = len(reader.pages)
-    page = reader.pages[0]
-    text = page.extract_text()
-    return gr.HTML.update(f'''<embed src={out} type="application/pdf" width="100%" height="500px" />''')
-def scrape1(instring):
-    # set the url to perform the get request
-    URL = f'{instring}'
-    page = requests.get(URL)
-    # load the page content
-    text = page.content
-    # make a soup object by using beautiful
-    # soup and set the markup as html parser
-    soup = bs(text, "html.parser")
-    out = str(soup.prettify())
-    return gr.HTML.update(f'''<object data={instring} type="application/pdf" width="100%" height="500px">''')
-def scrape0(instring):
-    #r = requests.get(instring)
-    chunk_size=2000
-    url = f'{instring}'
-    r = requests.get(url, stream=True)
-    html_content = requests.get(url).text
-    soup = bs(html_content,"html.parser")
-    with open('metadata.pdf', 'wb') as fd:
-        for chunk in r.iter_content(chunk_size):
-            fd.write(chunk)
-    try:
-        out = r.content
-    except Exception:
-        #out=copy.copy(soup)
-        print ("No Divs")
-    #out = IFrame(src={instring}, width=700, height=600)
-    #return gr.HTML.update(f'''<iframe src={out}, width=700, height=600></iframe>''')
-    return gr.HTML.update(f'''<object data=metadata.pdf type="application/pdf" width="100%" height="500px">''')
 with gr.Blocks() as app:
     inp=gr.Textbox()

 import gradio as gr
 def scrape(instring):
     html_src=(f'''
     <div style="text-align:center">
     <h4>Pdf viewer testing</h4>
+    <iframe src="https://docs.google.com/viewer?url={instring}&embedded=true" frameborder="0" height="1200px" width="100%"></iframe>
     </div>''')
     return gr.HTML.update(f'''{html_src}''')
 with gr.Blocks() as app:
     inp=gr.Textbox()