Spaces:
Sleeping
Sleeping
Commit ·
87296cd
0
Parent(s):
Initial commit with project files
Browse files- .gitignore +12 -0
- agents/__init__.py +0 -0
- agents/demystifier_agent.py +123 -0
- agents/legal_agent.py +64 -0
- agents/scheme_chatbot.py +51 -0
- components/__init__.py +0 -0
- components/video_recorder.py +86 -0
- core_utils/core_model_loaders.py +21 -0
- jan-contract +1 -0
- main_fastapi.py +133 -0
- main_streamlit.py +164 -0
- requirements.txt +33 -0
- tools/__init__.py +0 -0
- tools/legal_tools.py +20 -0
- tools/scheme_tools.py +20 -0
- utils/__init__.py +0 -0
- utils/model_loaders.py +22 -0
- utils/pdf_generator.py +43 -0
- video_consents/consent_20250823_162229.mp4 +0 -0
.gitignore
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.pyd
|
| 6 |
+
|
| 7 |
+
# Environments
|
| 8 |
+
.env
|
| 9 |
+
venv/
|
| 10 |
+
|
| 11 |
+
# IDE
|
| 12 |
+
.vscode/
|
agents/__init__.py
ADDED
|
File without changes
|
agents/demystifier_agent.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# D:\jan-contract\agents\demystifier_agent.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from typing import TypedDict, List
|
| 5 |
+
from pydantic import BaseModel, Field
|
| 6 |
+
|
| 7 |
+
# --- Core LangChain & Document Processing Imports ---
|
| 8 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
| 9 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 10 |
+
from langchain_community.vectorstores import FAISS
|
| 11 |
+
from langchain.prompts import PromptTemplate
|
| 12 |
+
from langchain.schema.runnable import RunnablePassthrough
|
| 13 |
+
from langchain.schema.output_parser import StrOutputParser
|
| 14 |
+
|
| 15 |
+
# LangGraph Imports
|
| 16 |
+
from langgraph.graph import StateGraph, END, START
|
| 17 |
+
|
| 18 |
+
# --- Tool and NEW Core Model Loader Imports ---
|
| 19 |
+
from tools.legal_tools import legal_search
|
| 20 |
+
from core_utils.core_model_loaders import load_groq_llm, load_embedding_model
|
| 21 |
+
|
| 22 |
+
# --- 1. Model and Parser Setup ---
|
| 23 |
+
# Initialize models by calling the backend-safe loader functions
|
| 24 |
+
groq_llm = load_groq_llm()
|
| 25 |
+
embedding_model = load_embedding_model()
|
| 26 |
+
|
| 27 |
+
# --- Pydantic Models (No Changes) ---
|
| 28 |
+
class ExplainedTerm(BaseModel):
|
| 29 |
+
term: str = Field(description="The legal term or jargon identified.")
|
| 30 |
+
explanation: str = Field(description="A simple, plain-English explanation of the term.")
|
| 31 |
+
resource_link: str = Field(description="A working URL for a resource explaining this term in India.")
|
| 32 |
+
|
| 33 |
+
class DemystifyReport(BaseModel):
|
| 34 |
+
summary: str = Field(description="A concise summary of the legal document's purpose and key points.")
|
| 35 |
+
key_terms: List[ExplainedTerm] = Field(description="A list of the most important explained legal terms.")
|
| 36 |
+
overall_advice: str = Field(description="A concluding sentence of general advice.")
|
| 37 |
+
|
| 38 |
+
# --- 2. LangGraph for Document Analysis (No Changes) ---
|
| 39 |
+
class DemystifyState(TypedDict):
|
| 40 |
+
document_chunks: List[str]
|
| 41 |
+
summary: str
|
| 42 |
+
identified_terms: List[str]
|
| 43 |
+
final_report: DemystifyReport
|
| 44 |
+
|
| 45 |
+
def summarize_node(state: DemystifyState):
|
| 46 |
+
"""Takes all document chunks and creates a high-level summary."""
|
| 47 |
+
print("---NODE (Demystify): Generating Summary---")
|
| 48 |
+
context = "\n\n".join(state["document_chunks"])
|
| 49 |
+
prompt = f"You are a paralegal expert... Document Content:\n{context}"
|
| 50 |
+
summary = groq_llm.invoke(prompt).content
|
| 51 |
+
return {"summary": summary}
|
| 52 |
+
|
| 53 |
+
def identify_terms_node(state: DemystifyState):
|
| 54 |
+
"""Identifies the most critical and potentially confusing legal terms in the document."""
|
| 55 |
+
print("---NODE (Demystify): Identifying Key Terms---")
|
| 56 |
+
context = "\n\n".join(state["document_chunks"])
|
| 57 |
+
prompt = f"Based on the following legal document, identify the 3-5 most critical legal terms... Document Content:\n{context}"
|
| 58 |
+
terms_string = groq_llm.invoke(prompt).content
|
| 59 |
+
identified_terms = [term.strip() for term in terms_string.split(',') if term.strip()]
|
| 60 |
+
return {"identified_terms": identified_terms}
|
| 61 |
+
|
| 62 |
+
def generate_report_node(state: DemystifyState):
|
| 63 |
+
"""Combines the summary and terms into a final, structured report with enriched explanations."""
|
| 64 |
+
print("---NODE (Demystify): Generating Final Report---")
|
| 65 |
+
explained_terms_list = []
|
| 66 |
+
document_context = "\n\n".join(state["document_chunks"])
|
| 67 |
+
for term in state["identified_terms"]:
|
| 68 |
+
print(f" - Researching term: {term}")
|
| 69 |
+
search_results = legal_search.invoke(f"simple explanation of legal term '{term}' in Indian law")
|
| 70 |
+
prompt = f"""A user is reading a legal document that contains the term "{term}".
|
| 71 |
+
Overall document context is: {document_context[:2000]}
|
| 72 |
+
Web search results for "{term}" are: {search_results}
|
| 73 |
+
Format your response strictly as:
|
| 74 |
+
Explanation: [Your simple, one-sentence explanation here]
|
| 75 |
+
URL: [The best, full, working URL from the search results]"""
|
| 76 |
+
response = groq_llm.invoke(prompt).content
|
| 77 |
+
try:
|
| 78 |
+
explanation = response.split("Explanation:")[1].split("URL:")[0].strip()
|
| 79 |
+
link = response.split("URL:")[-1].strip()
|
| 80 |
+
except IndexError:
|
| 81 |
+
explanation = "Could not generate a simple explanation for this term."
|
| 82 |
+
link = "No link found."
|
| 83 |
+
explained_terms_list.append(ExplainedTerm(term=term, explanation=explanation, resource_link=link))
|
| 84 |
+
final_report = DemystifyReport(summary=state["summary"], key_terms=explained_terms_list, overall_advice="This is an automated analysis. For critical matters, please consult with a qualified legal professional.")
|
| 85 |
+
return {"final_report": final_report}
|
| 86 |
+
|
| 87 |
+
# Compile the analysis graph
|
| 88 |
+
graph_builder = StateGraph(DemystifyState)
|
| 89 |
+
graph_builder.add_node("summarize", summarize_node)
|
| 90 |
+
graph_builder.add_node("identify_terms", identify_terms_node)
|
| 91 |
+
graph_builder.add_node("generate_report", generate_report_node)
|
| 92 |
+
graph_builder.add_edge(START, "summarize")
|
| 93 |
+
graph_builder.add_edge("summarize", "identify_terms")
|
| 94 |
+
graph_builder.add_edge("identify_terms", "generate_report")
|
| 95 |
+
graph_builder.add_edge("generate_report", END)
|
| 96 |
+
demystifier_agent_graph = graph_builder.compile()
|
| 97 |
+
|
| 98 |
+
# --- 3. Helper Function to Create the RAG Chain (No Changes) ---
|
| 99 |
+
def create_rag_chain(retriever):
|
| 100 |
+
"""Creates the Q&A chain for the interactive chat."""
|
| 101 |
+
prompt_template = """You are a helpful assistant... CONTEXT: {context} QUESTION: {question} ANSWER:"""
|
| 102 |
+
prompt = PromptTemplate.from_template(prompt_template)
|
| 103 |
+
rag_chain = ({"context": retriever, "question": RunnablePassthrough()} | prompt | groq_llm | StrOutputParser())
|
| 104 |
+
return rag_chain
|
| 105 |
+
|
| 106 |
+
# --- 4. The Master "Controller" Function (No Changes) ---
|
| 107 |
+
def process_document_for_demystification(file_path: str):
|
| 108 |
+
"""Loads a PDF, runs the full analysis, creates a RAG chain, and returns both."""
|
| 109 |
+
print(f"--- Processing document: {file_path} ---")
|
| 110 |
+
loader = PyMuPDFLoader(file_path)
|
| 111 |
+
documents = loader.load()
|
| 112 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
| 113 |
+
chunks = splitter.split_documents(documents)
|
| 114 |
+
print("--- Creating FAISS vector store for Q&A ---")
|
| 115 |
+
vectorstore = FAISS.from_documents(chunks, embedding=embedding_model)
|
| 116 |
+
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
|
| 117 |
+
rag_chain = create_rag_chain(retriever)
|
| 118 |
+
print("--- Running analysis graph for the report ---")
|
| 119 |
+
chunk_contents = [chunk.page_content for chunk in chunks]
|
| 120 |
+
graph_input = {"document_chunks": chunk_contents}
|
| 121 |
+
result = demystifier_agent_graph.invoke(graph_input)
|
| 122 |
+
report = result.get("final_report")
|
| 123 |
+
return {"report": report, "rag_chain": rag_chain}
|
agents/legal_agent.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# D:\jan-contract\agents\legal_agent.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from langchain.prompts import PromptTemplate
|
| 5 |
+
from langgraph.graph import StateGraph, END
|
| 6 |
+
from typing import List, TypedDict
|
| 7 |
+
from pydantic import BaseModel, Field
|
| 8 |
+
from langchain_core.output_parsers import PydanticOutputParser
|
| 9 |
+
|
| 10 |
+
# --- Tool and NEW Core Model Loader Imports ---
|
| 11 |
+
from tools.legal_tools import legal_search
|
| 12 |
+
from core_utils.core_model_loaders import load_gemini_llm
|
| 13 |
+
|
| 14 |
+
# --- Pydantic Models (No Changes) ---
|
| 15 |
+
class LegalTriviaItem(BaseModel):
|
| 16 |
+
point: str = Field(description="A concise summary of the legal point or right.")
|
| 17 |
+
explanation: str = Field(description="A brief explanation of what the point means for the user.")
|
| 18 |
+
source_url: str = Field(description="The full, working URL to the official source or a highly reputable article explaining the law.")
|
| 19 |
+
|
| 20 |
+
class LegalTriviaOutput(BaseModel):
|
| 21 |
+
trivia: List[LegalTriviaItem] = Field(description="A list of structured legal trivia items.")
|
| 22 |
+
|
| 23 |
+
# --- Setup Models and Parsers ---
|
| 24 |
+
parser = PydanticOutputParser(pydantic_object=LegalTriviaOutput)
|
| 25 |
+
|
| 26 |
+
# --- Initialize the LLM by calling the backend-safe loader function ---
|
| 27 |
+
llm = load_gemini_llm()
|
| 28 |
+
|
| 29 |
+
# --- LangGraph State (No Changes) ---
|
| 30 |
+
class LegalAgentState(TypedDict):
|
| 31 |
+
user_request: str
|
| 32 |
+
legal_doc: str
|
| 33 |
+
legal_trivia: LegalTriviaOutput
|
| 34 |
+
|
| 35 |
+
# --- LangGraph Nodes (No Changes) ---
|
| 36 |
+
def generate_legal_doc(state: LegalAgentState):
|
| 37 |
+
prompt_text = f"Based on the user's request, generate a simple legal document text for an informal agreement in India. Keep it clear and simple.\n\nUser Request: {state['user_request']}"
|
| 38 |
+
legal_doc_text = llm.invoke(prompt_text).content
|
| 39 |
+
return {"legal_doc": legal_doc_text}
|
| 40 |
+
|
| 41 |
+
def get_legal_trivia(state: LegalAgentState):
|
| 42 |
+
prompt = PromptTemplate(
|
| 43 |
+
template="""
|
| 44 |
+
You are a specialized legal assistant for India's informal workforce...
|
| 45 |
+
User's situation: {user_request}
|
| 46 |
+
Web search results: {search_results}
|
| 47 |
+
{format_instructions}
|
| 48 |
+
""",
|
| 49 |
+
input_variables=["user_request", "search_results"],
|
| 50 |
+
partial_variables={"format_instructions": parser.get_format_instructions()},
|
| 51 |
+
)
|
| 52 |
+
chain = prompt | llm | parser
|
| 53 |
+
search_results = legal_search.invoke(state["user_request"])
|
| 54 |
+
structured_trivia = chain.invoke({"user_request": state["user_request"], "search_results": search_results})
|
| 55 |
+
return {"legal_trivia": structured_trivia}
|
| 56 |
+
|
| 57 |
+
# --- Build Graph (No Changes) ---
|
| 58 |
+
workflow = StateGraph(LegalAgentState)
|
| 59 |
+
workflow.add_node("generate_legal_doc", generate_legal_doc)
|
| 60 |
+
workflow.add_node("get_legal_trivia", get_legal_trivia)
|
| 61 |
+
workflow.set_entry_point("generate_legal_doc")
|
| 62 |
+
workflow.add_edge("generate_legal_doc", "get_legal_trivia")
|
| 63 |
+
workflow.add_edge("get_legal_trivia", END)
|
| 64 |
+
legal_agent = workflow.compile()
|
agents/scheme_chatbot.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# D:\jan-contract\agents\scheme_chatbot.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from langchain.prompts import PromptTemplate
|
| 5 |
+
from langchain.schema.runnable import RunnablePassthrough
|
| 6 |
+
from pydantic import BaseModel, Field
|
| 7 |
+
from langchain_core.output_parsers import PydanticOutputParser
|
| 8 |
+
from typing import List
|
| 9 |
+
|
| 10 |
+
# --- Tool and NEW Core Model Loader Imports ---
|
| 11 |
+
from tools.scheme_tools import scheme_search
|
| 12 |
+
from core_utils.core_model_loaders import load_gemini_llm
|
| 13 |
+
|
| 14 |
+
# --- Pydantic Models (No Changes) ---
|
| 15 |
+
class GovernmentScheme(BaseModel):
|
| 16 |
+
scheme_name: str = Field(description="The official name of the government scheme.")
|
| 17 |
+
description: str = Field(description="A concise summary of the scheme's objectives and benefits.")
|
| 18 |
+
target_audience: str = Field(description="Who the scheme is intended for (e.g., Women, Farmers, PwD).")
|
| 19 |
+
official_link: str = Field(description="The full, working URL to the official government scheme page or portal.")
|
| 20 |
+
|
| 21 |
+
class SchemeOutput(BaseModel):
|
| 22 |
+
schemes: List[GovernmentScheme] = Field(description="A list of relevant government schemes.")
|
| 23 |
+
|
| 24 |
+
# --- Setup Models and Parsers ---
|
| 25 |
+
parser = PydanticOutputParser(pydantic_object=SchemeOutput)
|
| 26 |
+
|
| 27 |
+
# --- Initialize the LLM by calling the backend-safe loader function ---
|
| 28 |
+
llm = load_gemini_llm()
|
| 29 |
+
|
| 30 |
+
# --- Prompt Template (No Changes) ---
|
| 31 |
+
prompt = PromptTemplate(
|
| 32 |
+
template="""
|
| 33 |
+
You are an expert assistant for Indian government schemes...
|
| 34 |
+
User Profile: {user_profile}
|
| 35 |
+
Web search results: {search_results}
|
| 36 |
+
{format_instructions}
|
| 37 |
+
""",
|
| 38 |
+
input_variables=["user_profile", "search_results"],
|
| 39 |
+
partial_variables={"format_instructions": parser.get_format_instructions()},
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# --- Build Chain (No Changes) ---
|
| 43 |
+
def get_search_results(query: dict):
|
| 44 |
+
return scheme_search.invoke(query["user_profile"])
|
| 45 |
+
|
| 46 |
+
scheme_chatbot = (
|
| 47 |
+
{"search_results": get_search_results, "user_profile": RunnablePassthrough()}
|
| 48 |
+
| prompt
|
| 49 |
+
| llm
|
| 50 |
+
| parser
|
| 51 |
+
)
|
components/__init__.py
ADDED
|
File without changes
|
components/video_recorder.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# D:\jan-contract\components\video_recorder.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import streamlit as st
|
| 5 |
+
import datetime
|
| 6 |
+
import av
|
| 7 |
+
|
| 8 |
+
from streamlit_webrtc import webrtc_streamer, WebRtcMode
|
| 9 |
+
|
| 10 |
+
VIDEO_CONSENT_DIR = "video_consents"
|
| 11 |
+
os.makedirs(VIDEO_CONSENT_DIR, exist_ok=True)
|
| 12 |
+
|
| 13 |
+
def record_consent_video():
|
| 14 |
+
"""
|
| 15 |
+
Encapsulates the video recording logic using the component's internal state.
|
| 16 |
+
|
| 17 |
+
The video is automatically saved when the user clicks the "STOP" button
|
| 18 |
+
on the webrtc component.
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
str | None: The file path of the saved video, or None if not saved yet.
|
| 22 |
+
"""
|
| 23 |
+
# Instructions for the new, more intuitive workflow
|
| 24 |
+
st.info("Instructions: Click START, record your consent, then click STOP to finalize.")
|
| 25 |
+
|
| 26 |
+
webrtc_ctx = webrtc_streamer(
|
| 27 |
+
key="video-consent-recorder",
|
| 28 |
+
mode=WebRtcMode.SENDRECV, # SENDRECV mode is needed for the stop-button-triggered callback
|
| 29 |
+
media_stream_constraints={"video": True, "audio": True},
|
| 30 |
+
video_receiver_size=256,
|
| 31 |
+
async_processing=True,
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# This block executes ONLY when the component is running (after START is clicked)
|
| 35 |
+
if webrtc_ctx.state.playing and webrtc_ctx.video_receiver:
|
| 36 |
+
# Inform the user that recording is in progress
|
| 37 |
+
st.success("🔴 Recording in progress...")
|
| 38 |
+
|
| 39 |
+
# If the 'frames_buffer' is not in session state, initialize it
|
| 40 |
+
if "frames_buffer" not in st.session_state:
|
| 41 |
+
st.session_state.frames_buffer = []
|
| 42 |
+
|
| 43 |
+
# Append each new frame to our session state buffer
|
| 44 |
+
while True:
|
| 45 |
+
try:
|
| 46 |
+
frame = webrtc_ctx.video_receiver.get_frame(timeout=1)
|
| 47 |
+
st.session_state.frames_buffer.append(frame)
|
| 48 |
+
except av.error.TimeoutError:
|
| 49 |
+
break # Break the loop when the stream ends (user clicks STOP)
|
| 50 |
+
|
| 51 |
+
# This block executes after the user clicks STOP
|
| 52 |
+
if not webrtc_ctx.state.playing and st.session_state.get("frames_buffer"):
|
| 53 |
+
with st.spinner("Saving your recording..."):
|
| 54 |
+
try:
|
| 55 |
+
video_frames = st.session_state.frames_buffer
|
| 56 |
+
|
| 57 |
+
# Generate a unique filename
|
| 58 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 59 |
+
video_filename = os.path.join(VIDEO_CONSENT_DIR, f"consent_{timestamp}.mp4")
|
| 60 |
+
|
| 61 |
+
# Use the av library to write the buffered frames to a video file
|
| 62 |
+
with av.open(video_filename, mode="w") as container:
|
| 63 |
+
stream = container.add_stream("libx264", rate=24)
|
| 64 |
+
stream.width = video_frames[0].width
|
| 65 |
+
stream.height = video_frames[0].height
|
| 66 |
+
stream.pix_fmt = "yuv420p"
|
| 67 |
+
|
| 68 |
+
for frame in video_frames:
|
| 69 |
+
packet = stream.encode(frame)
|
| 70 |
+
container.mux(packet)
|
| 71 |
+
|
| 72 |
+
# Flush the stream
|
| 73 |
+
packet = stream.encode()
|
| 74 |
+
container.mux(packet)
|
| 75 |
+
|
| 76 |
+
# Clear the buffer from session state and return the path
|
| 77 |
+
st.session_state.frames_buffer = []
|
| 78 |
+
st.session_state.video_filename = video_filename
|
| 79 |
+
return video_filename
|
| 80 |
+
|
| 81 |
+
except Exception as e:
|
| 82 |
+
st.error(f"An error occurred while saving the video: {e}")
|
| 83 |
+
st.session_state.frames_buffer = [] # Clear buffer on error
|
| 84 |
+
return None
|
| 85 |
+
|
| 86 |
+
return None
|
core_utils/core_model_loaders.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# D:\jan-contract\core_utils\core_model_loaders.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from langchain_community.embeddings import FastEmbedEmbeddings
|
| 5 |
+
from langchain_groq import ChatGroq
|
| 6 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 7 |
+
|
| 8 |
+
# --- Simple, non-caching functions for the backend ---
|
| 9 |
+
# These can be safely imported by FastAPI or any other backend script.
|
| 10 |
+
|
| 11 |
+
def load_embedding_model():
|
| 12 |
+
"""Loads the embedding model without any Streamlit dependencies."""
|
| 13 |
+
return FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")
|
| 14 |
+
|
| 15 |
+
def load_groq_llm():
|
| 16 |
+
"""Loads the Groq LLM without any Streamlit dependencies."""
|
| 17 |
+
return ChatGroq(temperature=0, model="llama3-8b-8192", api_key=os.getenv("GROQ_API_KEY"))
|
| 18 |
+
|
| 19 |
+
def load_gemini_llm():
|
| 20 |
+
"""Loads the Gemini LLM without any Streamlit dependencies."""
|
| 21 |
+
return ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0)
|
jan-contract
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit 2848b1d403225a405df97356f7f9e4c4a1a727b6
|
main_fastapi.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# D:\jan-contract\main_fastapi.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import uuid
|
| 5 |
+
import tempfile
|
| 6 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException
|
| 7 |
+
from fastapi.responses import StreamingResponse
|
| 8 |
+
from pydantic import BaseModel
|
| 9 |
+
import io
|
| 10 |
+
|
| 11 |
+
# --- Import all our backend logic and agents ---
|
| 12 |
+
from agents.legal_agent import legal_agent
|
| 13 |
+
from agents.scheme_chatbot import scheme_chatbot
|
| 14 |
+
from agents.demystifier_agent import process_document_for_demystification
|
| 15 |
+
from utils.pdf_generator import generate_formatted_pdf
|
| 16 |
+
|
| 17 |
+
# --- 1. Initialize FastAPI App ---
|
| 18 |
+
app = FastAPI(
|
| 19 |
+
title="Jan-Contract API",
|
| 20 |
+
description="A comprehensive API for generating digital contracts, finding government schemes, and analyzing legal documents for India's informal workforce.",
|
| 21 |
+
version="1.0.0",
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# --- 2. Pydantic Models for Request Bodies ---
|
| 25 |
+
# These models provide automatic data validation and documentation for our API.
|
| 26 |
+
class ContractRequest(BaseModel):
|
| 27 |
+
user_request: str
|
| 28 |
+
|
| 29 |
+
class SchemeRequest(BaseModel):
|
| 30 |
+
user_profile: str
|
| 31 |
+
|
| 32 |
+
class ChatRequest(BaseModel):
|
| 33 |
+
session_id: str
|
| 34 |
+
question: str
|
| 35 |
+
|
| 36 |
+
# --- 3. State Management for the Demystifier Chat ---
|
| 37 |
+
# This is a simple in-memory cache for a hackathon. For production, you would
|
| 38 |
+
# use a more robust cache like Redis.
|
| 39 |
+
SESSION_CACHE = {}
|
| 40 |
+
|
| 41 |
+
# --- 4. API Endpoints ---
|
| 42 |
+
|
| 43 |
+
@app.post("/generate-contract/json", tags=["Contract Generator"])
|
| 44 |
+
async def generate_contract_json(request: ContractRequest):
|
| 45 |
+
"""
|
| 46 |
+
Takes a plain-text description and returns a structured JSON object
|
| 47 |
+
containing the generated contract text (in Markdown) and relevant legal trivia.
|
| 48 |
+
"""
|
| 49 |
+
try:
|
| 50 |
+
result = legal_agent.invoke({"user_request": request.user_request})
|
| 51 |
+
return result
|
| 52 |
+
except Exception as e:
|
| 53 |
+
raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
|
| 54 |
+
|
| 55 |
+
@app.post("/generate-contract/pdf", tags=["Contract Generator"])
|
| 56 |
+
async def generate_contract_pdf(request: ContractRequest):
|
| 57 |
+
"""
|
| 58 |
+
Takes a plain-text description, generates a contract, and returns it
|
| 59 |
+
directly as a downloadable PDF file.
|
| 60 |
+
"""
|
| 61 |
+
try:
|
| 62 |
+
result = legal_agent.invoke({"user_request": request.user_request})
|
| 63 |
+
contract_text = result.get('legal_doc', "Error: Could not generate document text.")
|
| 64 |
+
|
| 65 |
+
pdf_bytes = generate_formatted_pdf(contract_text)
|
| 66 |
+
|
| 67 |
+
return StreamingResponse(
|
| 68 |
+
io.BytesIO(pdf_bytes),
|
| 69 |
+
media_type="application/pdf",
|
| 70 |
+
headers={"Content-Disposition": "attachment;filename=digital_agreement.pdf"}
|
| 71 |
+
)
|
| 72 |
+
except Exception as e:
|
| 73 |
+
raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
|
| 74 |
+
|
| 75 |
+
@app.post("/find-schemes", tags=["Scheme Finder"])
|
| 76 |
+
async def find_schemes(request: SchemeRequest):
|
| 77 |
+
"""
|
| 78 |
+
Takes a user profile description and returns a list of relevant
|
| 79 |
+
government schemes with names, descriptions, and official links.
|
| 80 |
+
"""
|
| 81 |
+
try:
|
| 82 |
+
response = scheme_chatbot.invoke({"user_profile": request.user_profile})
|
| 83 |
+
return response
|
| 84 |
+
except Exception as e:
|
| 85 |
+
raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
|
| 86 |
+
|
| 87 |
+
@app.post("/demystify/upload", tags=["Document Demystifier"])
|
| 88 |
+
async def demystify_upload(file: UploadFile = File(...)):
|
| 89 |
+
"""
|
| 90 |
+
Upload a PDF document for analysis. This endpoint processes the document,
|
| 91 |
+
creates a RAG chain for chatting, and returns the initial analysis report
|
| 92 |
+
along with a unique `session_id` for follow-up questions.
|
| 93 |
+
"""
|
| 94 |
+
if file.content_type != "application/pdf":
|
| 95 |
+
raise HTTPException(status_code=400, detail="Invalid file type. Please upload a PDF.")
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
# Use a temporary file to save the upload, as our loader needs a file path
|
| 99 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
| 100 |
+
tmp.write(await file.read())
|
| 101 |
+
tmp_path = tmp.name
|
| 102 |
+
|
| 103 |
+
analysis_result = process_document_for_demystification(tmp_path)
|
| 104 |
+
|
| 105 |
+
# Clean up the temporary file
|
| 106 |
+
os.unlink(tmp_path)
|
| 107 |
+
|
| 108 |
+
# Create a unique session ID and cache the RAG chain
|
| 109 |
+
session_id = str(uuid.uuid4())
|
| 110 |
+
SESSION_CACHE[session_id] = analysis_result["rag_chain"]
|
| 111 |
+
|
| 112 |
+
return {
|
| 113 |
+
"session_id": session_id,
|
| 114 |
+
"report": analysis_result["report"]
|
| 115 |
+
}
|
| 116 |
+
except Exception as e:
|
| 117 |
+
raise HTTPException(status_code=500, detail=f"Failed to process document: {e}")
|
| 118 |
+
|
| 119 |
+
@app.post("/demystify/chat", tags=["Document Demystifier"])
|
| 120 |
+
async def demystify_chat(request: ChatRequest):
|
| 121 |
+
"""
|
| 122 |
+
Ask a follow-up question to a previously uploaded document.
|
| 123 |
+
Requires the `session_id` returned by the /demystify/upload endpoint.
|
| 124 |
+
"""
|
| 125 |
+
rag_chain = SESSION_CACHE.get(request.session_id)
|
| 126 |
+
if not rag_chain:
|
| 127 |
+
raise HTTPException(status_code=404, detail="Session not found. Please upload the document again.")
|
| 128 |
+
|
| 129 |
+
try:
|
| 130 |
+
response = rag_chain.invoke(request.question)
|
| 131 |
+
return {"answer": response}
|
| 132 |
+
except Exception as e:
|
| 133 |
+
raise HTTPException(status_code=500, detail=f"An error occurred during chat: {e}")
|
main_streamlit.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# D:\jan-contract\main_streamlit.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import streamlit as st
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
from agents.demystifier_agent import process_document_for_demystification
|
| 8 |
+
from components.video_recorder import record_consent_video
|
| 9 |
+
from utils.pdf_generator import generate_formatted_pdf
|
| 10 |
+
|
| 11 |
+
# --- Initial Setup ---
|
| 12 |
+
load_dotenv()
|
| 13 |
+
st.set_page_config(layout="wide", page_title="Jan-Contract Unified Assistant")
|
| 14 |
+
st.title("Jan-Contract: Your Digital Workforce Assistant")
|
| 15 |
+
|
| 16 |
+
PDF_UPLOAD_DIR = "pdfs_demystify"
|
| 17 |
+
os.makedirs(PDF_UPLOAD_DIR, exist_ok=True)
|
| 18 |
+
|
| 19 |
+
# --- Tabs ---
|
| 20 |
+
tab1, tab2, tab3 = st.tabs([
|
| 21 |
+
" **Contract Generator**",
|
| 22 |
+
" **Scheme Finder**",
|
| 23 |
+
" **Document Demystifier & Chat**"
|
| 24 |
+
])
|
| 25 |
+
|
| 26 |
+
# --- TAB 1: Contract Generator ---
|
| 27 |
+
with tab1:
|
| 28 |
+
st.header("Create a Simple Digital Agreement")
|
| 29 |
+
st.write("Turn your everyday language into a clear agreement, then provide video consent.")
|
| 30 |
+
|
| 31 |
+
st.subheader("Step 1: Describe and Generate Your Agreement")
|
| 32 |
+
user_request = st.text_area("Describe the agreement...", height=120, key="contract_request")
|
| 33 |
+
|
| 34 |
+
if st.button("Generate Document & Get Legal Info", type="primary"):
|
| 35 |
+
if user_request:
|
| 36 |
+
with st.spinner("Generating document..."):
|
| 37 |
+
from agents.legal_agent import legal_agent
|
| 38 |
+
result = legal_agent.invoke({"user_request": user_request})
|
| 39 |
+
st.session_state.legal_result = result
|
| 40 |
+
# Reset video state for each new contract
|
| 41 |
+
if 'video_path_from_component' in st.session_state:
|
| 42 |
+
del st.session_state['video_path_from_component']
|
| 43 |
+
if 'frames_buffer' in st.session_state:
|
| 44 |
+
del st.session_state['frames_buffer'] # Clear old frames
|
| 45 |
+
else:
|
| 46 |
+
st.error("Please describe the agreement.")
|
| 47 |
+
|
| 48 |
+
if 'legal_result' in st.session_state:
|
| 49 |
+
result = st.session_state.legal_result
|
| 50 |
+
col1, col2 = st.columns(2)
|
| 51 |
+
|
| 52 |
+
with col1:
|
| 53 |
+
st.subheader("Generated Digital Agreement")
|
| 54 |
+
st.markdown(result['legal_doc'])
|
| 55 |
+
pdf_bytes = generate_formatted_pdf(result['legal_doc'])
|
| 56 |
+
st.download_button(label="⬇️ Download Formatted PDF", data=pdf_bytes, file_name="agreement.pdf")
|
| 57 |
+
|
| 58 |
+
with col2:
|
| 59 |
+
st.subheader("Relevant Legal Trivia")
|
| 60 |
+
# ... [Trivia display logic] ...
|
| 61 |
+
|
| 62 |
+
st.divider()
|
| 63 |
+
|
| 64 |
+
st.subheader("Step 2: Record Video Consent for this Agreement")
|
| 65 |
+
saved_video_path = record_consent_video()
|
| 66 |
+
|
| 67 |
+
if saved_video_path:
|
| 68 |
+
st.session_state.video_path_from_component = saved_video_path
|
| 69 |
+
|
| 70 |
+
if st.session_state.get("video_path_from_component"):
|
| 71 |
+
st.success("✅ Your consent has been recorded and saved!")
|
| 72 |
+
st.video(st.session_state.video_path_from_component)
|
| 73 |
+
st.info("This video is now linked to your generated agreement.")
|
| 74 |
+
# --- TAB 2: Scheme Finder (Unchanged) ---
|
| 75 |
+
with tab2:
|
| 76 |
+
st.header("Find Relevant Government Schemes")
|
| 77 |
+
st.write("Describe yourself or your situation to find government schemes that might apply to you.")
|
| 78 |
+
|
| 79 |
+
user_profile = st.text_input("Enter your profile...", key="scheme_profile")
|
| 80 |
+
|
| 81 |
+
if st.button("Find Schemes", type="primary", key="b2"):
|
| 82 |
+
if user_profile:
|
| 83 |
+
with st.spinner("Initializing models and searching for schemes..."):
|
| 84 |
+
# Lazy import the agent
|
| 85 |
+
from agents.scheme_chatbot import scheme_chatbot
|
| 86 |
+
response = scheme_chatbot.invoke({"user_profile": user_profile})
|
| 87 |
+
st.session_state.scheme_response = response
|
| 88 |
+
else:
|
| 89 |
+
st.error("Please enter a profile.")
|
| 90 |
+
|
| 91 |
+
if 'scheme_response' in st.session_state:
|
| 92 |
+
response = st.session_state.scheme_response
|
| 93 |
+
st.subheader(f"Potential Schemes for: '{user_profile}'")
|
| 94 |
+
if response and response.schemes:
|
| 95 |
+
for scheme in response.schemes:
|
| 96 |
+
with st.container(border=True):
|
| 97 |
+
st.markdown(f"#### {scheme.scheme_name}")
|
| 98 |
+
st.write(f"**Description:** {scheme.description}")
|
| 99 |
+
st.link_button("Go to Official Page ➡️", scheme.official_link)
|
| 100 |
+
|
| 101 |
+
# --- TAB 3: Demystifier & Chat (RESTORED to original functionality) ---
|
| 102 |
+
with tab3:
|
| 103 |
+
st.header("Simplify & Chat With Your Legal Document")
|
| 104 |
+
st.markdown("Get a plain-English summary of your document, then ask specific follow-up questions.")
|
| 105 |
+
|
| 106 |
+
uploaded_file = st.file_uploader("Choose a PDF document", type="pdf", key="demystify_uploader")
|
| 107 |
+
|
| 108 |
+
if uploaded_file and st.button("Analyze Document", type="primary"):
|
| 109 |
+
with st.spinner("Performing deep analysis and preparing for chat..."):
|
| 110 |
+
# Save the file to a persistent location
|
| 111 |
+
temp_file_path = os.path.join(PDF_UPLOAD_DIR, uploaded_file.name)
|
| 112 |
+
with open(temp_file_path, "wb") as f:
|
| 113 |
+
f.write(uploaded_file.getbuffer())
|
| 114 |
+
|
| 115 |
+
# Single call to the backend agent logic
|
| 116 |
+
analysis_result = process_document_for_demystification(temp_file_path)
|
| 117 |
+
|
| 118 |
+
# Store the results returned by the agent
|
| 119 |
+
st.session_state.demystify_report = analysis_result["report"]
|
| 120 |
+
st.session_state.rag_chain = analysis_result["rag_chain"]
|
| 121 |
+
st.session_state.messages = [] # Initialize chat history
|
| 122 |
+
|
| 123 |
+
# This part of the UI only displays after the analysis is complete
|
| 124 |
+
if 'demystify_report' in st.session_state:
|
| 125 |
+
# Step 1: Display Report
|
| 126 |
+
report = st.session_state.demystify_report
|
| 127 |
+
st.divider()
|
| 128 |
+
st.header("Step 1: Automated Document Analysis")
|
| 129 |
+
with st.container(border=True):
|
| 130 |
+
st.subheader("📄 Document Summary")
|
| 131 |
+
st.write(report.summary)
|
| 132 |
+
st.divider()
|
| 133 |
+
st.subheader("🔑 Key Terms Explained")
|
| 134 |
+
for term in report.key_terms:
|
| 135 |
+
with st.expander(f"**{term.term}**"):
|
| 136 |
+
st.write(term.explanation)
|
| 137 |
+
st.markdown(f"[Learn More Here]({term.resource_link})")
|
| 138 |
+
st.divider()
|
| 139 |
+
st.success(f"**Overall Advice:** {report.overall_advice}")
|
| 140 |
+
st.divider()
|
| 141 |
+
|
| 142 |
+
# Step 2: Display Chat
|
| 143 |
+
st.header("Step 2: Ask Follow-up Questions")
|
| 144 |
+
st.info("The document is now ready for your questions. Chat with it below.")
|
| 145 |
+
|
| 146 |
+
for message in st.session_state.get("messages", []):
|
| 147 |
+
with st.chat_message(message["role"]):
|
| 148 |
+
st.markdown(message["content"])
|
| 149 |
+
|
| 150 |
+
if prompt := st.chat_input("Ask a specific question about the document..."):
|
| 151 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 152 |
+
with st.chat_message("user"):
|
| 153 |
+
st.markdown(prompt)
|
| 154 |
+
|
| 155 |
+
with st.chat_message("assistant"):
|
| 156 |
+
with st.spinner("Searching the document..."):
|
| 157 |
+
rag_chain = st.session_state.rag_chain
|
| 158 |
+
response = rag_chain.invoke(prompt)
|
| 159 |
+
st.markdown(response)
|
| 160 |
+
|
| 161 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
| 162 |
+
|
| 163 |
+
elif not uploaded_file:
|
| 164 |
+
st.info("Upload a PDF document to begin the analysis.")
|
requirements.txt
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# D:\jan-contract\requirements.txt
|
| 2 |
+
|
| 3 |
+
# Core LangChain libraries
|
| 4 |
+
langchain-core
|
| 5 |
+
langchain
|
| 6 |
+
langchain-community
|
| 7 |
+
langgraph
|
| 8 |
+
|
| 9 |
+
# LLM Integrations
|
| 10 |
+
langchain_google_genai
|
| 11 |
+
langchain-groq
|
| 12 |
+
|
| 13 |
+
# Tooling
|
| 14 |
+
tavily-python
|
| 15 |
+
pypdf
|
| 16 |
+
pymupdf
|
| 17 |
+
fastembed
|
| 18 |
+
faiss-cpu
|
| 19 |
+
python-multipart
|
| 20 |
+
# Web Frameworks
|
| 21 |
+
fastapi
|
| 22 |
+
uvicorn
|
| 23 |
+
streamlit
|
| 24 |
+
|
| 25 |
+
# Utilities
|
| 26 |
+
python-dotenv
|
| 27 |
+
pydantic
|
| 28 |
+
fpdf2
|
| 29 |
+
|
| 30 |
+
# --- NEW: For Video Recording ---
|
| 31 |
+
streamlit-webrtc
|
| 32 |
+
opencv-python-headless
|
| 33 |
+
av
|
tools/__init__.py
ADDED
|
File without changes
|
tools/legal_tools.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# D:\jan-contract\tools\legal_tools.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from langchain.tools import tool
|
| 6 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 7 |
+
|
| 8 |
+
load_dotenv()
|
| 9 |
+
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
|
| 10 |
+
|
| 11 |
+
@tool
|
| 12 |
+
def legal_search(query: str):
|
| 13 |
+
"""
|
| 14 |
+
Searches for legal information and relevant sections for a given query in the Indian context.
|
| 15 |
+
Use this tool to find legal trivia and sections related to agreements.
|
| 16 |
+
"""
|
| 17 |
+
# Increased max_results to 5 for more comprehensive context
|
| 18 |
+
tavily_search = TavilySearchResults(max_results=5)
|
| 19 |
+
results = tavily_search.invoke(f"Indian law and sections for: {query}")
|
| 20 |
+
return results
|
tools/scheme_tools.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# D:\jan-contract\tools\scheme_tools.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from langchain.tools import tool
|
| 6 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 7 |
+
|
| 8 |
+
load_dotenv()
|
| 9 |
+
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
|
| 10 |
+
|
| 11 |
+
@tool
|
| 12 |
+
def scheme_search(query: str):
|
| 13 |
+
"""
|
| 14 |
+
Searches for government schemes based on a user's profile.
|
| 15 |
+
Use this tool to find relevant government schemes for a user.
|
| 16 |
+
"""
|
| 17 |
+
# Increased max_results to 7 to find content from more sources
|
| 18 |
+
tavily_search = TavilySearchResults(max_results=7)
|
| 19 |
+
results = tavily_search.invoke(f"official government schemes for {query} in India site:gov.in OR site:nic.in")
|
| 20 |
+
return results
|
utils/__init__.py
ADDED
|
File without changes
|
utils/model_loaders.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# D:\jan-contract\utils\model_loaders.py
|
| 2 |
+
|
| 3 |
+
import streamlit as st
|
| 4 |
+
# Import from our new backend-safe loader
|
| 5 |
+
from core_utils.core_model_loaders import load_embedding_model, load_groq_llm, load_gemini_llm
|
| 6 |
+
|
| 7 |
+
@st.cache_resource
|
| 8 |
+
def get_embedding_model():
|
| 9 |
+
"""Loads and caches the embedding model for the Streamlit app."""
|
| 10 |
+
with st.spinner("Initializing embedding model (this is a one-time download)..."):
|
| 11 |
+
model = load_embedding_model()
|
| 12 |
+
return model
|
| 13 |
+
|
| 14 |
+
@st.cache_resource
|
| 15 |
+
def get_groq_llm():
|
| 16 |
+
"""Loads and caches the Groq LLM for the Streamlit app."""
|
| 17 |
+
return load_groq_llm()
|
| 18 |
+
|
| 19 |
+
@st.cache_resource
|
| 20 |
+
def get_gemini_llm():
|
| 21 |
+
"""Loads and caches the Gemini LLM for the Streamlit app."""
|
| 22 |
+
return load_gemini_llm()
|
utils/pdf_generator.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# D:\jan-contract\utils\pdf_generator.py
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from fpdf import FPDF
|
| 5 |
+
|
| 6 |
+
def markdown_to_html_for_fpdf(md_text: str) -> str:
|
| 7 |
+
"""
|
| 8 |
+
A helper function to convert our simple Markdown (bold and newlines)
|
| 9 |
+
into simple HTML that FPDF's write_html method can understand.
|
| 10 |
+
"""
|
| 11 |
+
# 1. Convert **bold** syntax to <b>bold</b> HTML tags
|
| 12 |
+
# The regex finds text between double asterisks and wraps it in <b> tags.
|
| 13 |
+
text = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', md_text)
|
| 14 |
+
|
| 15 |
+
# 2. Convert newline characters to <br> HTML tags for line breaks
|
| 16 |
+
text = text.replace('\n', '<br>')
|
| 17 |
+
|
| 18 |
+
return text
|
| 19 |
+
|
| 20 |
+
def generate_formatted_pdf(text: str) -> bytes:
|
| 21 |
+
"""
|
| 22 |
+
Takes a string containing Markdown and converts it into a well-formatted PDF
|
| 23 |
+
by first converting the Markdown to HTML and then rendering the HTML.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
text (str): The content of the contract, with Markdown syntax.
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
bytes: The content of the generated PDF file as a byte string.
|
| 30 |
+
"""
|
| 31 |
+
pdf = FPDF()
|
| 32 |
+
pdf.add_page()
|
| 33 |
+
pdf.set_font("Arial", size=12)
|
| 34 |
+
|
| 35 |
+
# Convert our Markdown-style text into simple HTML
|
| 36 |
+
html_content = markdown_to_html_for_fpdf(text)
|
| 37 |
+
|
| 38 |
+
# Use the more robust write_html() method to render the formatted text.
|
| 39 |
+
# We still need to handle character encoding properly.
|
| 40 |
+
pdf.write_html(html_content.encode('latin-1', 'replace').decode('latin-1'))
|
| 41 |
+
|
| 42 |
+
# Return the PDF as a 'bytes' object, which Streamlit requires.
|
| 43 |
+
return bytes(pdf.output())
|
video_consents/consent_20250823_162229.mp4
ADDED
|
Binary file (28.2 kB). View file
|
|
|