Spaces:

FelipeErmeson
/

projeto-rag

Running on Zero

App Files Files Community

Felipe Silva commited on Sep 13

Commit

c71745c

1 Parent(s): 8c92d61

Ajuste retorno do gr.change

Browse files

Files changed (2) hide show

app.py +18 -21
utils.py +5 -8

app.py CHANGED Viewed

@@ -1,26 +1,28 @@
 import gradio as gr
 import spaces
 import torch
 from utils import read_file_pdf, fix_type, extract_content_in_pdf, EXTENSIONS_FILES, EXTENSIONS_IMG_FILES
 from rag_utils import create_split_doc, store_docs, create_rag_chain
 zero = torch.Tensor([0]).cuda()
 print(zero.device) # <-- 'cpu' 🤔
-@spaces.GPU
-def greet(n):
-    print(zero.device) # <-- 'cuda:0' 🤗
-    return f"Hello {zero + n} Tensor"
 MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
 def process_file(file):
     if file is None:
-        return "Nenhum arquivo enviado.", ""
-    if file.size > MAX_FILE_SIZE:
-        return f"O arquivo excede o limite. Por favor, realize o upload de um arquivo que contenha no máximo {MAX_FILE_SIZE/1024/1024:.1f}MB.", ""
     file_obj, type_file = fix_type(file)
     texto_extraido = None
@@ -28,16 +30,11 @@ def process_file(file):
         texto_extraido = extract_content_in_pdf(file_obj)
     elif type_file in EXTENSIONS_IMG_FILES:
         texto_extraido = "OCR não implementado neste exemplo."
-    return texto_extraido or "Não foi possível extrair texto.", texto_extraido or ""
-def ask_question(file, question):
-    texto_extraido, _ = process_file(file)
-    if not texto_extraido or texto_extraido.startswith("Não foi possível"):
-        return texto_extraido
-    # Cria RAG
     docs_splitted = create_split_doc(texto_extraido)
     vector_store = store_docs(docs_splitted)
     rag_chain = create_rag_chain(vector_store)
@@ -46,9 +43,9 @@ def ask_question(file, question):
     return resposta
 with gr.Blocks() as demo:
-    gr.Markdown("## Pergunte qualquer coisa para seu arquivo.")
     gr.Markdown(
-        ":dog: Faça o upload do seu arquivo e pergunte qualquer coisa a ele! Este código é open source e disponível [aqui](https://github.com/FelipeErmeson) no GitHub. :grin:"
     )
     with gr.Row():
@@ -60,7 +57,7 @@ with gr.Blocks() as demo:
             answer_output = gr.Textbox(label="Resposta")
     # Conecta funções
-    file_input.change(fn=process_file, inputs=file_input, outputs=[extracted_text, file_input])
-    question_input.submit(fn=ask_question, inputs=[file_input, question_input], outputs=answer_output)
 demo.launch()

 import gradio as gr
 import spaces
 import torch
+import os
 from utils import read_file_pdf, fix_type, extract_content_in_pdf, EXTENSIONS_FILES, EXTENSIONS_IMG_FILES
 from rag_utils import create_split_doc, store_docs, create_rag_chain
 zero = torch.Tensor([0]).cuda()
 print(zero.device) # <-- 'cpu' 🤔
+# @spaces.GPU
+# def greet(n):
+#     print(zero.device) # <-- 'cuda:0' 🤗
+#     return f"Hello {zero + n} Tensor"
 MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
 def process_file(file):
     if file is None:
+        return "Nenhum arquivo enviado."
+    file_size = os.path.getsize(file)
+    if file_size > MAX_FILE_SIZE:
+        return f"O arquivo excede o limite. Por favor, realize o upload de um arquivo que contenha no máximo {MAX_FILE_SIZE/1024/1024:.1f}MB."
     file_obj, type_file = fix_type(file)
     texto_extraido = None
         texto_extraido = extract_content_in_pdf(file_obj)
     elif type_file in EXTENSIONS_IMG_FILES:
         texto_extraido = "OCR não implementado neste exemplo."
+    return texto_extraido or "Não foi possível extrair texto."
+def ask_question(texto_extraido, question):
+    # RAG
     docs_splitted = create_split_doc(texto_extraido)
     vector_store = store_docs(docs_splitted)
     rag_chain = create_rag_chain(vector_store)
     return resposta
 with gr.Blocks() as demo:
+    gr.Markdown("## ⚙️ Pergunte qualquer coisa para seu arquivo.")
     gr.Markdown(
+        "🐶 Faça o upload do seu arquivo e pergunte qualquer coisa a ele! Este código é open source e disponível [aqui](https://github.com/FelipeErmeson) no GitHub. 😁"
     )
     with gr.Row():
             answer_output = gr.Textbox(label="Resposta")
     # Conecta funções
+    file_input.change(fn=process_file, inputs=file_input, outputs=extracted_text)
+    question_input.submit(fn=ask_question, inputs=[extracted_text, question_input], outputs=answer_output)
 demo.launch()

utils.py CHANGED Viewed

@@ -10,14 +10,11 @@ EXTENSIONS_ALLOWED = EXTENSIONS_IMG_FILES + EXTENSIONS_FILES
 MAX_IMAGE_SIZE = 2000  # pixels
 def fix_type(file_upload):
-    if isinstance(file_upload, str):
-        print('teste: str')
-    else:
-        type_file = file_upload.type.split('/')[-1]
-        if type_file in EXTENSIONS_IMG_FILES:
-            return read_file_img(file_upload), type_file
-        elif type_file in EXTENSIONS_FILES:
-            return read_file_pdf(file_upload), type_file
 # Resize image while maintaining aspect ratio
 def resize_image(image, max_size):

 MAX_IMAGE_SIZE = 2000  # pixels
 def fix_type(file_upload):
+    type_file = file_upload.split('/')[-1].split('.')[-1]
+    if type_file in EXTENSIONS_IMG_FILES:
+        return read_file_img(file_upload), type_file
+    elif type_file in EXTENSIONS_FILES:
+        return read_file_pdf(file_upload), type_file
 # Resize image while maintaining aspect ratio
 def resize_image(image, max_size):