Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from gpt4all import GPT4All | |
| from huggingface_hub import hf_hub_download | |
| import numpy as np | |
| from pypdf import PdfReader | |
| import os | |
| from transformers import pipeline | |
| model_path = "models" | |
| model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf" | |
| hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False) | |
| print("Start the model init process") | |
| model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu") | |
| model.config["promptTemplate"] = "[INST] {0} [/INST]" | |
| model.config["systemPrompt"] = "Tu es un assitant et ta tâche est de résumer des texte en français" | |
| model._is_chat_session_activated = False | |
| max_new_tokens = 2048 | |
| def extract_text(file): | |
| reader = PdfReader(file) | |
| text = [] | |
| for p in np.arange(0, len(reader.pages), 1): | |
| page = reader.pages[int(p)] | |
| # extracting text from page | |
| text.append(page.extract_text()) | |
| #text = ' '.join(text) | |
| return text[0] | |
| def summarise(text): | |
| prompt = text | |
| outputs = model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens) | |
| return outputs | |
| with gr.Blocks() as demo: | |
| file_input = gr.File(label="Upload a PDF file") | |
| text_output = gr.Textbox(label="Extracted Text") | |
| summary_output = gr.Textbox(label="Summary") | |
| file_input.upload(extract_text, inputs=file_input, outputs=text_output) | |
| text_output.change(summarise,text_output,summary_output) | |
| demo.launch() |