Legalize_AI / rag_engine.py
hashirlodhi's picture
Upload 11 files
e46711a verified
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import config
def initialize_llm():
"""
Initialize Groq LLM.
"""
return ChatGroq(
model=config.LLM_MODEL_NAME,
temperature=0.1,
max_tokens=2000,
api_key=config.GROQ_API_KEY
)
def get_rag_prompt():
"""
Create the prompt template for RAG.
"""
return ChatPromptTemplate.from_template("""
You are a Senior Legal Consultant specializing in the laws of Pakistan.
CONTEXT:
1. Constitution of Pakistan
2. Pakistan Penal Code
INSTRUCTIONS:
- Adoption a formal, professional, and authoritative tone suitable for legal memoranda.
- Cite specific Articles, Sections, or Clauses extensively.
- If the information is present: Provide a direct, concise legal opinion.
- If the information is MISSING: State clearly "The provided legal documents do not contain specific provisions regarding [topic]." Do not apologize.
- Structure your response with clear headings if necessary.
LEGAL CONTEXT:
{context}
QUERY: {question}
LEGAL OPINION:
""")
def format_docs(docs):
"""
Format retrieved documents for the prompt.
"""
formatted = []
for i, doc in enumerate(docs):
source = doc.metadata.get('source', 'Unknown Document')
page = doc.metadata.get('page', 'N/A')
# Limit content length to avoid context window issues, though Groq usually has large context
content = doc.page_content[:800]
formatted.append(f"[Document {i+1}: {source}, Page {page}]")
formatted.append(content)
formatted.append("-" * 50)
return "\n".join(formatted)
def create_rag_chain(retriever):
"""
Build the primary RAG chain.
"""
llm = initialize_llm()
prompt = get_rag_prompt()
chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
return chain