Spaces:
Running
on
Zero
Running
on
Zero
Liam Dyer
commited on
add filenames because of a gradio client bug
Browse files
app.py
CHANGED
|
@@ -86,7 +86,7 @@ def convert_pandoc(input_file, filename) -> str:
|
|
| 86 |
|
| 87 |
|
| 88 |
@spaces.GPU
|
| 89 |
-
def convert(input_file) -> str:
|
| 90 |
plain_text_filetypes = [
|
| 91 |
".txt",
|
| 92 |
".csv",
|
|
@@ -99,14 +99,14 @@ def convert(input_file) -> str:
|
|
| 99 |
".jsonc",
|
| 100 |
]
|
| 101 |
# Already a plain text file that wouldn't benefit from pandoc so return the content
|
| 102 |
-
if any(
|
| 103 |
with open(input_file, "r") as f:
|
| 104 |
return f.read()
|
| 105 |
|
| 106 |
-
if
|
| 107 |
return convert_pdf(input_file)
|
| 108 |
|
| 109 |
-
return convert_pandoc(input_file,
|
| 110 |
|
| 111 |
|
| 112 |
def chunk_to_length(text, max_length=512):
|
|
@@ -119,11 +119,14 @@ def chunk_to_length(text, max_length=512):
|
|
| 119 |
|
| 120 |
|
| 121 |
@spaces.GPU
|
| 122 |
-
def predict(queries, documents, max_characters) -> list[list[str]]:
|
| 123 |
queries = queries.split("\n")
|
|
|
|
| 124 |
|
| 125 |
-
#
|
| 126 |
-
converted_docs = [
|
|
|
|
|
|
|
| 127 |
|
| 128 |
# Return if the total length is less than the max characters
|
| 129 |
total_doc_lengths = sum([len(doc) for doc in converted_docs])
|
|
@@ -193,6 +196,7 @@ gr.Interface(
|
|
| 193 |
inputs=[
|
| 194 |
gr.Textbox(label="Queries separated by newline"),
|
| 195 |
gr.File(label="Upload File", file_count="multiple"),
|
|
|
|
| 196 |
gr.Number(label="Max output characters", value=16384),
|
| 197 |
],
|
| 198 |
outputs=[gr.JSON(label="Embedded documents")],
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
@spaces.GPU
|
| 89 |
+
def convert(input_file, filename) -> str:
|
| 90 |
plain_text_filetypes = [
|
| 91 |
".txt",
|
| 92 |
".csv",
|
|
|
|
| 99 |
".jsonc",
|
| 100 |
]
|
| 101 |
# Already a plain text file that wouldn't benefit from pandoc so return the content
|
| 102 |
+
if any(filename.endswith(ft) for ft in plain_text_filetypes):
|
| 103 |
with open(input_file, "r") as f:
|
| 104 |
return f.read()
|
| 105 |
|
| 106 |
+
if filename.endswith(".pdf"):
|
| 107 |
return convert_pdf(input_file)
|
| 108 |
|
| 109 |
+
return convert_pandoc(input_file, filename)
|
| 110 |
|
| 111 |
|
| 112 |
def chunk_to_length(text, max_length=512):
|
|
|
|
| 119 |
|
| 120 |
|
| 121 |
@spaces.GPU
|
| 122 |
+
def predict(queries, documents, document_filenames, max_characters) -> list[list[str]]:
|
| 123 |
queries = queries.split("\n")
|
| 124 |
+
document_filenames = document_filenames.split("\n")
|
| 125 |
|
| 126 |
+
# Convert the documents to text
|
| 127 |
+
converted_docs = [
|
| 128 |
+
convert(doc, filename) for doc, filename in zip(documents, document_filenames)
|
| 129 |
+
]
|
| 130 |
|
| 131 |
# Return if the total length is less than the max characters
|
| 132 |
total_doc_lengths = sum([len(doc) for doc in converted_docs])
|
|
|
|
| 196 |
inputs=[
|
| 197 |
gr.Textbox(label="Queries separated by newline"),
|
| 198 |
gr.File(label="Upload File", file_count="multiple"),
|
| 199 |
+
gr.Textbox(label="Filenames separated by newline"),
|
| 200 |
gr.Number(label="Max output characters", value=16384),
|
| 201 |
],
|
| 202 |
outputs=[gr.JSON(label="Embedded documents")],
|