Spaces:
Runtime error
Runtime error
fixed layout
Browse files
app.py
CHANGED
|
@@ -50,12 +50,12 @@ def process_pdfs(parent_dir: Union[str,list]):
|
|
| 50 |
# 512 is related to the positional encoding "facebook/dpr-ctx_encoder-single-nq-base" model
|
| 51 |
file_name = file_path.split("/")[-1]
|
| 52 |
if len(txt) < 512 :
|
| 53 |
-
new_data =
|
| 54 |
-
df =
|
| 55 |
else :
|
| 56 |
while len(txt) > 512 :
|
| 57 |
-
new_data =
|
| 58 |
-
df =
|
| 59 |
txt = txt[512:]
|
| 60 |
|
| 61 |
# closing the pdf file object
|
|
@@ -101,15 +101,16 @@ def predict(query,file_paths, k=3):
|
|
| 101 |
return out
|
| 102 |
|
| 103 |
with gr.Blocks() as demo :
|
| 104 |
-
with gr.
|
| 105 |
-
gr.
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
|
|
|
| 113 |
button.click(predict, [query,files,k],outputs=output)
|
| 114 |
|
| 115 |
demo.launch()
|
|
|
|
| 50 |
# 512 is related to the positional encoding "facebook/dpr-ctx_encoder-single-nq-base" model
|
| 51 |
file_name = file_path.split("/")[-1]
|
| 52 |
if len(txt) < 512 :
|
| 53 |
+
new_data = pd.DataFrame([[f"{file_name}-page-{i}",txt]],columns=["title","text"])
|
| 54 |
+
df = pd.concat([df,new_data],ignore_index=True)
|
| 55 |
else :
|
| 56 |
while len(txt) > 512 :
|
| 57 |
+
new_data = pd.DataFrame([[f"{file_name}-page-{i}",txt[:512]]],columns=["title","text"])
|
| 58 |
+
df = pd.concat([df,new_data],ignore_index=True)
|
| 59 |
txt = txt[512:]
|
| 60 |
|
| 61 |
# closing the pdf file object
|
|
|
|
| 101 |
return out
|
| 102 |
|
| 103 |
with gr.Blocks() as demo :
|
| 104 |
+
with gr.Row():
|
| 105 |
+
with gr.Column():
|
| 106 |
+
gr.Markdown("## PDF Search Engine")
|
| 107 |
+
files = gr.Files(label="Upload PDFs",type="filepath",file_count="multiple")
|
| 108 |
+
query = gr.Text(label="query")
|
| 109 |
+
with gr.Accordion("number of references",open=False):
|
| 110 |
+
k = gr.Number(value=3)
|
| 111 |
+
button = gr.Button("search")
|
| 112 |
+
with gr.Column():
|
| 113 |
+
output = gr.Textbox(label="output")
|
| 114 |
button.click(predict, [query,files,k],outputs=output)
|
| 115 |
|
| 116 |
demo.launch()
|