Spaces:

Amodit
/

jan-contract

Running

File size: 4,697 Bytes

87296cd
 
 
 
 
 
ec18d9b
66c7ada
1848973
66c7ada
 
 
bd6f8a7
 
cfe2de2
bd6f8a7
cfe2de2
bd6f8a7
87296cd
bd6f8a7
87296cd
bd6f8a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87296cd
66c7ada
87296cd
 
 
66c7ada
ec18d9b
87296cd
66c7ada
 
 
 
87296cd
 
66c7ada
1848973
 
87296cd
 
66c7ada
87296cd
 
66c7ada
bd6f8a7
66c7ada
87296cd
 
66c7ada
87296cd


import os
from typing import TypedDict, List
from pydantic import BaseModel, Field

# --- Core LangChain & Document Processing Imports ---
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from core_utils.core_model_loaders import load_embedding_model, load_gemini_llm
from langgraph.graph import StateGraph, END

# --- Initialize Models ---
embedding_model = load_embedding_model()
llm = load_gemini_llm()

# --- 1. RAG Chain Logic ---

def create_rag_chain(retriever):
    """Creates a RAG chain for answering questions about the document."""
    
    template = """Answer the question based only on the following context:
    {context}

    Question: {question}
    """
    prompt = PromptTemplate.from_template(template)

    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    
    return rag_chain

# --- 2. Demystifier Graph Logic ---

class DemystifierState(TypedDict):
    document_chunks: List[str]
    summary: str
    key_terms: str
    final_report: str

def summarize_document(state: DemystifierState):
    """Summarizes the provided document chunks."""
    print("---NODE: Summarizing Document---")
    text = "\n\n".join(state["document_chunks"])
    # Truncate for safety if too large for prompt
    text = text[:30000] 
    
    prompt = f"""
    You are a legal expert. Summarize the following legal document content in simple, easy-to-understand language.
    Focus on the main purpose and parties involved.
    
    Content:
    {text}
    """
    response = llm.invoke(prompt)
    return {"summary": response.content}

def extract_key_terms(state: DemystifierState):
    """Extracts and explains key legal terms."""
    print("---NODE: Extracting Key Terms---")
    text = "\n\n".join(state["document_chunks"])
    text = text[:30000]
    
    prompt = f"""
    Identify 5-7 complex legal terms or clauses from the text below.
    List them and explain what they mean in plain English for a layperson.
    
    Content:
    {text}
    """
    response = llm.invoke(prompt)
    return {"key_terms": response.content}

def generate_report(state: DemystifierState):
    """Compiles the final analysis report."""
    print("---NODE: Generating Final Report---")
    
    report = f"""
    # Document Analysis
    
    ## 📝 Summary
    {state['summary']}
    
    ## 🔑 Key Terms & Definitions
    {state['key_terms']}
    
    ## 💡 Expert Advice
    Always consult with a qualified lawyer for critical legal decisions. This analysis is AI-generated guidance.
    """
    return {"final_report": report}

# --- Build the Graph ---
workflow = StateGraph(DemystifierState)

workflow.add_node("summarize", summarize_document)
workflow.add_node("extract_terms", extract_key_terms)
workflow.add_node("compile_report", generate_report)

# Parallel execution of summary and terms
workflow.set_entry_point("summarize")
workflow.add_edge("summarize", "extract_terms")
workflow.add_edge("extract_terms", "compile_report")
workflow.add_edge("compile_report", END)

demystifier_agent_graph = workflow.compile()

# --- 4. The Master "Controller" Function ---
def process_document_for_demystification(file_path: str):
    """Loads a PDF, runs the full analysis, creates a RAG chain, and returns both."""
    print(f"--- Processing document: {file_path} ---")
    
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    
    if not documents:
        raise ValueError("No content found in PDF.")

    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = splitter.split_documents(documents)
    
    print("--- Creating FAISS vector store for Q&A ---")
    vectorstore = FAISS.from_documents(chunks, embedding=embedding_model)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
    rag_chain = create_rag_chain(retriever)
    
    print("--- Running analysis graph for the report ---")
    chunk_contents = [chunk.page_content for chunk in chunks]
    # Limit context to avoid token limits if document is huge
    graph_input = {"document_chunks": chunk_contents} 
    
    result = demystifier_agent_graph.invoke(graph_input)
    report = result.get("final_report")
    
    return {"report": report, "rag_chain": rag_chain}