Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from gradio_client import Client, handle_file | |
| import requests | |
| from PIL import Image | |
| import io | |
| import fitz # PyMuPDF | |
| import tempfile | |
| import os | |
| # Função para extrair texto e imagens de um PDF | |
| def extract_from_pdf(pdf_path): | |
| try: | |
| # Abre o PDF | |
| doc = fitz.open(pdf_path) | |
| extracted_text = "" | |
| extracted_images = [] | |
| # Itera sobre as páginas do PDF | |
| for page_num in range(len(doc)): | |
| page = doc.load_page(page_num) | |
| # Extrai texto | |
| extracted_text += page.get_text() | |
| # Extrai imagens | |
| image_list = page.get_images(full=True) | |
| for img_index, img in enumerate(image_list): | |
| xref = img[0] | |
| base_image = doc.extract_image(xref) | |
| image_bytes = base_image["image"] | |
| image = Image.open(io.BytesIO(image_bytes)) | |
| extracted_images.append(image) | |
| return extracted_text, extracted_images | |
| except Exception as e: | |
| return f"Erro ao processar PDF: {str(e)}", [] | |
| # Função principal para fazer a predição | |
| def predict(file, question, seed, top_p, temperature): | |
| try: | |
| # Verifica se o arquivo é um PDF | |
| if file.endswith(".pdf"): | |
| # Extrai texto e imagens do PDF | |
| extracted_text, extracted_images = extract_from_pdf(file) | |
| # Se houver imagens, processa a primeira imagem | |
| if extracted_images: | |
| image = extracted_images[0] | |
| with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_file: | |
| image.save(tmp_file.name, format="PNG") | |
| img_path = tmp_file.name | |
| else: | |
| return "Nenhuma imagem encontrada no PDF." | |
| # Se houver texto, adiciona ao prompt | |
| if extracted_text: | |
| question = f"Texto extraído do PDF:\n{extracted_text}\n\nPergunta: {question}" | |
| else: | |
| # Se não for PDF, trata como imagem | |
| if file.startswith('http'): | |
| response = requests.get(file) | |
| img_path = handle_file(io.BytesIO(response.content)) | |
| else: | |
| img_path = handle_file(file) | |
| # Inicializa o cliente do Gradio | |
| client = Client("deepseek-ai/Janus-Pro-7B") | |
| # Faz a predição | |
| result = client.predict( | |
| image=img_path, | |
| question=question, | |
| seed=seed, | |
| top_p=top_p, | |
| temperature=temperature, | |
| api_name="/multimodal_understanding" | |
| ) | |
| return result | |
| except Exception as e: | |
| return f"Erro durante a predição: {str(e)}" | |
| # Componentes da interface | |
| file_input = gr.File(label="Upload PDF or Image", file_types=[".pdf", ".png", ".jpg", ".jpeg"]) | |
| question_input = gr.Textbox(label="Question", placeholder="Ask something about the file...") | |
| seed_slider = gr.Slider(0, 100, value=42, label="Seed") | |
| top_p_slider = gr.Slider(0, 1, value=0.95, label="Top-p") | |
| temp_slider = gr.Slider(0, 1, value=0.1, label="Temperature") | |
| # Cria a interface | |
| demo = gr.Interface( | |
| fn=predict, | |
| inputs=[ | |
| file_input, | |
| question_input, | |
| seed_slider, | |
| top_p_slider, | |
| temp_slider | |
| ], | |
| outputs=gr.Textbox(label="Answer"), | |
| title="Janus-Pro-7B Multimodal Demo", | |
| description="Ask questions about PDFs or images using the Janus-Pro-7B model", | |
| examples=[ | |
| ["https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png", "What's in this image?", 42, 0.95, 0.1] | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |