Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from langchain.prompts import PromptTemplate | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader | |
| from langchain.chains.summarize import load_summarize_chain | |
| from langchain.llms import HuggingFacePipeline | |
| import fitz # PyMuPDF for PDF | |
| from docx import Document | |
| # Load Phi-2 model and tokenizer | |
| device = "cpu" | |
| model_name = "microsoft/phi-2" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16) | |
| model.to(device) | |
| def load_document(file_path): | |
| """Loads document and extracts text using appropriate loader.""" | |
| if file_path.endswith(".pdf"): | |
| loader = PyPDFLoader(file_path) | |
| elif file_path.endswith(".docx"): | |
| loader = Docx2txtLoader(file_path) | |
| elif file_path.endswith(".txt"): | |
| loader = TextLoader(file_path) | |
| else: | |
| return "Unsupported file format." | |
| docs = loader.load() | |
| return docs | |
| def summarize_document(file): | |
| """Summarizes the extracted text using Phi-2 and MapReduce.""" | |
| docs = load_document(file.name) | |
| if not docs: | |
| return "No text found in document." | |
| # Split text into chunks | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| split_docs = text_splitter.split_documents(docs) | |
| # Load summarization chain with MapReduce | |
| llm = HuggingFacePipeline(pipeline=model) | |
| summarize_chain = load_summarize_chain(llm, chain_type="map_reduce") | |
| summary = summarize_chain.run(split_docs) | |
| return summary | |
| # Gradio Interface | |
| demo = gr.Interface( | |
| fn=summarize_document, | |
| inputs=gr.File(label="Upload Document (PDF, DOCX, TXT)"), | |
| outputs=gr.Textbox(label="Summarized Text"), | |
| title="AI Document Summarizer with Phi-2", | |
| description="Upload a document, and the AI will generate a summary using MapReduce." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |