Spaces:
Runtime error
Runtime error
| """ | |
| Tool for summarizing legal documents. | |
| """ | |
| from langchain.tools import BaseTool | |
| from langchain.chains import LLMChain, MapReduceDocumentsChain | |
| from langchain.prompts import PromptTemplate | |
| from langchain.document_loaders import PyPDFLoader | |
| from AI_core.config import LLM, TEXT_SPLITTER | |
| class SummarizationTool(BaseTool): | |
| """Tool to summarize legal documents using a map-reduce approach.""" | |
| name: str = "document_summarization_tool" | |
| description: str = "Summarizes legal documents. Input should be a file path to a PDF or text document." | |
| def _run(self, file_path: str) -> str: | |
| """ | |
| Run the document summarization process. | |
| Args: | |
| file_path: Path to the document to summarize | |
| Returns: | |
| str: Summarized content | |
| """ | |
| # Load document | |
| if file_path.endswith('.pdf'): | |
| loader = PyPDFLoader(file_path) | |
| documents = loader.load() | |
| else: | |
| with open(file_path, 'r') as f: | |
| text = f.read() | |
| documents = [text] | |
| # Split documents | |
| docs = TEXT_SPLITTER.split_documents(documents) | |
| # Map step - summarize each chunk | |
| map_template = """ | |
| You are a legal expert summarizing complex legal documents. | |
| Summarize the following text in a concise and accurate manner, preserving key legal points: | |
| {text} | |
| """ | |
| map_prompt = PromptTemplate(template=map_template, input_variables=["text"]) | |
| map_chain = LLMChain(llm=LLM, prompt=map_prompt, output_key="summary") | |
| # Reduce step - combine summaries | |
| reduce_template = """ | |
| You are a legal expert creating a comprehensive summary from multiple text segments. | |
| Combine these summaries into a cohesive overview of the entire document, organized by key legal themes and points: | |
| {summaries} | |
| """ | |
| reduce_prompt = PromptTemplate(template=reduce_template, input_variables=["summaries"]) | |
| reduce_chain = LLMChain(llm=LLM, prompt=reduce_prompt) | |
| # Create MapReduce chain | |
| map_reduce_chain = MapReduceDocumentsChain( | |
| llm_chain=map_chain, | |
| reduce_documents_chain=reduce_chain, | |
| document_variable_name="text", | |
| ) | |
| return map_reduce_chain.run(docs) |