Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import
|
| 2 |
import fitz
|
| 3 |
from transformers import pipeline, MBart50TokenizerFast, MBartForConditionalGeneration
|
| 4 |
from multiprocessing import Pool, cpu_count
|
|
@@ -63,20 +63,16 @@ def translate_summary(summary, lang):
|
|
| 63 |
|
| 64 |
return " ".join(translated_chunks)
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
# Function to read PDF and summarize and translate chunk by chunk
|
| 69 |
-
def summarize_and_translate_pdf(
|
| 70 |
-
# Save
|
| 71 |
-
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
| 72 |
-
temp_file.write(
|
| 73 |
-
temp_file_path = temp_file.name
|
| 74 |
|
| 75 |
try:
|
| 76 |
-
doc = fitz.open(
|
| 77 |
except FileNotFoundError:
|
| 78 |
-
|
| 79 |
-
return []
|
| 80 |
|
| 81 |
total_chunks = len(doc)
|
| 82 |
chunks = []
|
|
@@ -91,42 +87,26 @@ def summarize_and_translate_pdf(uploaded_file, lang):
|
|
| 91 |
translated_chunks = pool.starmap(summarize_and_translate_chunk, [(chunk, lang) for chunk in chunks])
|
| 92 |
|
| 93 |
# Delete temporary file
|
| 94 |
-
|
| 95 |
|
| 96 |
return translated_chunks
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
"Malayalam": "ml_IN", "Mongolian": "mn_MN", "Marathi": "mr_IN", "Polish": "pl_PL", "Pashto": "ps_AF",
|
| 118 |
-
"Portuguese": "pt_XX", "Swedish": "sv_SE", "Swahili": "sw_KE", "Tamil": "ta_IN", "Telugu": "te_IN",
|
| 119 |
-
"Thai": "th_TH", "Tagalog": "tl_XX", "Ukrainian": "uk_UA", "Urdu": "ur_PK", "Xhosa": "xh_ZA",
|
| 120 |
-
"Galician": "gl_ES", "Slovene": "sl_SI"
|
| 121 |
-
}
|
| 122 |
-
|
| 123 |
-
lang = st.selectbox("Select language for translation", list(languages.keys()))
|
| 124 |
-
|
| 125 |
-
# Translate PDF
|
| 126 |
-
if st.button("Summarize and Translate"):
|
| 127 |
-
translated_chunks = summarize_and_translate_pdf(uploaded_file, languages[lang])
|
| 128 |
-
|
| 129 |
-
# Display translated text
|
| 130 |
-
st.header("Translated Summary")
|
| 131 |
-
for chunk in translated_chunks:
|
| 132 |
-
st.write(chunk)
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
import fitz
|
| 3 |
from transformers import pipeline, MBart50TokenizerFast, MBartForConditionalGeneration
|
| 4 |
from multiprocessing import Pool, cpu_count
|
|
|
|
| 63 |
|
| 64 |
return " ".join(translated_chunks)
|
| 65 |
|
|
|
|
|
|
|
| 66 |
# Function to read PDF and summarize and translate chunk by chunk
|
| 67 |
+
def summarize_and_translate_pdf(pdf_content, lang):
|
| 68 |
+
# Save PDF content to a temporary file
|
| 69 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
| 70 |
+
temp_file.write(pdf_content)
|
|
|
|
| 71 |
|
| 72 |
try:
|
| 73 |
+
doc = fitz.open(temp_file.name)
|
| 74 |
except FileNotFoundError:
|
| 75 |
+
return "File not found. Please make sure the file path is correct."
|
|
|
|
| 76 |
|
| 77 |
total_chunks = len(doc)
|
| 78 |
chunks = []
|
|
|
|
| 87 |
translated_chunks = pool.starmap(summarize_and_translate_chunk, [(chunk, lang) for chunk in chunks])
|
| 88 |
|
| 89 |
# Delete temporary file
|
| 90 |
+
temp_file.close()
|
| 91 |
|
| 92 |
return translated_chunks
|
| 93 |
|
| 94 |
+
# Gradio Interface
|
| 95 |
+
def summarize_and_translate_interface(pdf_content, lang):
|
| 96 |
+
translated_chunks = summarize_and_translate_pdf(pdf_content, lang)
|
| 97 |
+
return "\n".join(translated_chunks)
|
| 98 |
+
|
| 99 |
+
# Gradio UI
|
| 100 |
+
input_pdf = gr.inputs.File(label="Upload a PDF file", type="file")
|
| 101 |
+
language = gr.inputs.Dropdown(choices=["Arabic", "Czech", "German", "English", "Spanish", "Estonian", "Finnish",
|
| 102 |
+
"French", "Gujarati", "Hindi", "Italian", "Japanese", "Kazakh", "Korean",
|
| 103 |
+
"Lithuanian", "Latvian", "Burmese", "Nepali", "Dutch", "Romanian", "Russian",
|
| 104 |
+
"Sinhala", "Turkish", "Vietnamese", "Chinese", "Afrikaans", "Azerbaijani",
|
| 105 |
+
"Bengali", "Persian", "Hebrew", "Croatian", "Indonesian", "Georgian", "Khmer",
|
| 106 |
+
"Macedonian", "Malayalam", "Mongolian", "Marathi", "Polish", "Pashto",
|
| 107 |
+
"Portuguese", "Swedish", "Swahili", "Tamil", "Telugu", "Thai", "Tagalog",
|
| 108 |
+
"Ukrainian", "Urdu", "Xhosa", "Galician", "Slovene"],
|
| 109 |
+
label="Select language for translation")
|
| 110 |
+
output_text = gr.outputs.Textbox(label="Translated Summary")
|
| 111 |
+
|
| 112 |
+
gr.Interface(summarize_and_translate_interface, inputs=[input_pdf, language], outputs=output_text).launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|