Amodit commited on
Commit
87296cd
·
0 Parent(s):

Initial commit with project files

Browse files
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+
7
+ # Environments
8
+ .env
9
+ venv/
10
+
11
+ # IDE
12
+ .vscode/
agents/__init__.py ADDED
File without changes
agents/demystifier_agent.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # D:\jan-contract\agents\demystifier_agent.py
2
+
3
+ import os
4
+ from typing import TypedDict, List
5
+ from pydantic import BaseModel, Field
6
+
7
+ # --- Core LangChain & Document Processing Imports ---
8
+ from langchain_community.document_loaders import PyMuPDFLoader
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain_community.vectorstores import FAISS
11
+ from langchain.prompts import PromptTemplate
12
+ from langchain.schema.runnable import RunnablePassthrough
13
+ from langchain.schema.output_parser import StrOutputParser
14
+
15
+ # LangGraph Imports
16
+ from langgraph.graph import StateGraph, END, START
17
+
18
+ # --- Tool and NEW Core Model Loader Imports ---
19
+ from tools.legal_tools import legal_search
20
+ from core_utils.core_model_loaders import load_groq_llm, load_embedding_model
21
+
22
+ # --- 1. Model and Parser Setup ---
23
+ # Initialize models by calling the backend-safe loader functions
24
+ groq_llm = load_groq_llm()
25
+ embedding_model = load_embedding_model()
26
+
27
+ # --- Pydantic Models (No Changes) ---
28
+ class ExplainedTerm(BaseModel):
29
+ term: str = Field(description="The legal term or jargon identified.")
30
+ explanation: str = Field(description="A simple, plain-English explanation of the term.")
31
+ resource_link: str = Field(description="A working URL for a resource explaining this term in India.")
32
+
33
+ class DemystifyReport(BaseModel):
34
+ summary: str = Field(description="A concise summary of the legal document's purpose and key points.")
35
+ key_terms: List[ExplainedTerm] = Field(description="A list of the most important explained legal terms.")
36
+ overall_advice: str = Field(description="A concluding sentence of general advice.")
37
+
38
+ # --- 2. LangGraph for Document Analysis (No Changes) ---
39
+ class DemystifyState(TypedDict):
40
+ document_chunks: List[str]
41
+ summary: str
42
+ identified_terms: List[str]
43
+ final_report: DemystifyReport
44
+
45
+ def summarize_node(state: DemystifyState):
46
+ """Takes all document chunks and creates a high-level summary."""
47
+ print("---NODE (Demystify): Generating Summary---")
48
+ context = "\n\n".join(state["document_chunks"])
49
+ prompt = f"You are a paralegal expert... Document Content:\n{context}"
50
+ summary = groq_llm.invoke(prompt).content
51
+ return {"summary": summary}
52
+
53
+ def identify_terms_node(state: DemystifyState):
54
+ """Identifies the most critical and potentially confusing legal terms in the document."""
55
+ print("---NODE (Demystify): Identifying Key Terms---")
56
+ context = "\n\n".join(state["document_chunks"])
57
+ prompt = f"Based on the following legal document, identify the 3-5 most critical legal terms... Document Content:\n{context}"
58
+ terms_string = groq_llm.invoke(prompt).content
59
+ identified_terms = [term.strip() for term in terms_string.split(',') if term.strip()]
60
+ return {"identified_terms": identified_terms}
61
+
62
+ def generate_report_node(state: DemystifyState):
63
+ """Combines the summary and terms into a final, structured report with enriched explanations."""
64
+ print("---NODE (Demystify): Generating Final Report---")
65
+ explained_terms_list = []
66
+ document_context = "\n\n".join(state["document_chunks"])
67
+ for term in state["identified_terms"]:
68
+ print(f" - Researching term: {term}")
69
+ search_results = legal_search.invoke(f"simple explanation of legal term '{term}' in Indian law")
70
+ prompt = f"""A user is reading a legal document that contains the term "{term}".
71
+ Overall document context is: {document_context[:2000]}
72
+ Web search results for "{term}" are: {search_results}
73
+ Format your response strictly as:
74
+ Explanation: [Your simple, one-sentence explanation here]
75
+ URL: [The best, full, working URL from the search results]"""
76
+ response = groq_llm.invoke(prompt).content
77
+ try:
78
+ explanation = response.split("Explanation:")[1].split("URL:")[0].strip()
79
+ link = response.split("URL:")[-1].strip()
80
+ except IndexError:
81
+ explanation = "Could not generate a simple explanation for this term."
82
+ link = "No link found."
83
+ explained_terms_list.append(ExplainedTerm(term=term, explanation=explanation, resource_link=link))
84
+ final_report = DemystifyReport(summary=state["summary"], key_terms=explained_terms_list, overall_advice="This is an automated analysis. For critical matters, please consult with a qualified legal professional.")
85
+ return {"final_report": final_report}
86
+
87
+ # Compile the analysis graph
88
+ graph_builder = StateGraph(DemystifyState)
89
+ graph_builder.add_node("summarize", summarize_node)
90
+ graph_builder.add_node("identify_terms", identify_terms_node)
91
+ graph_builder.add_node("generate_report", generate_report_node)
92
+ graph_builder.add_edge(START, "summarize")
93
+ graph_builder.add_edge("summarize", "identify_terms")
94
+ graph_builder.add_edge("identify_terms", "generate_report")
95
+ graph_builder.add_edge("generate_report", END)
96
+ demystifier_agent_graph = graph_builder.compile()
97
+
98
+ # --- 3. Helper Function to Create the RAG Chain (No Changes) ---
99
+ def create_rag_chain(retriever):
100
+ """Creates the Q&A chain for the interactive chat."""
101
+ prompt_template = """You are a helpful assistant... CONTEXT: {context} QUESTION: {question} ANSWER:"""
102
+ prompt = PromptTemplate.from_template(prompt_template)
103
+ rag_chain = ({"context": retriever, "question": RunnablePassthrough()} | prompt | groq_llm | StrOutputParser())
104
+ return rag_chain
105
+
106
+ # --- 4. The Master "Controller" Function (No Changes) ---
107
+ def process_document_for_demystification(file_path: str):
108
+ """Loads a PDF, runs the full analysis, creates a RAG chain, and returns both."""
109
+ print(f"--- Processing document: {file_path} ---")
110
+ loader = PyMuPDFLoader(file_path)
111
+ documents = loader.load()
112
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
113
+ chunks = splitter.split_documents(documents)
114
+ print("--- Creating FAISS vector store for Q&A ---")
115
+ vectorstore = FAISS.from_documents(chunks, embedding=embedding_model)
116
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
117
+ rag_chain = create_rag_chain(retriever)
118
+ print("--- Running analysis graph for the report ---")
119
+ chunk_contents = [chunk.page_content for chunk in chunks]
120
+ graph_input = {"document_chunks": chunk_contents}
121
+ result = demystifier_agent_graph.invoke(graph_input)
122
+ report = result.get("final_report")
123
+ return {"report": report, "rag_chain": rag_chain}
agents/legal_agent.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # D:\jan-contract\agents\legal_agent.py
2
+
3
+ import os
4
+ from langchain.prompts import PromptTemplate
5
+ from langgraph.graph import StateGraph, END
6
+ from typing import List, TypedDict
7
+ from pydantic import BaseModel, Field
8
+ from langchain_core.output_parsers import PydanticOutputParser
9
+
10
+ # --- Tool and NEW Core Model Loader Imports ---
11
+ from tools.legal_tools import legal_search
12
+ from core_utils.core_model_loaders import load_gemini_llm
13
+
14
+ # --- Pydantic Models (No Changes) ---
15
+ class LegalTriviaItem(BaseModel):
16
+ point: str = Field(description="A concise summary of the legal point or right.")
17
+ explanation: str = Field(description="A brief explanation of what the point means for the user.")
18
+ source_url: str = Field(description="The full, working URL to the official source or a highly reputable article explaining the law.")
19
+
20
+ class LegalTriviaOutput(BaseModel):
21
+ trivia: List[LegalTriviaItem] = Field(description="A list of structured legal trivia items.")
22
+
23
+ # --- Setup Models and Parsers ---
24
+ parser = PydanticOutputParser(pydantic_object=LegalTriviaOutput)
25
+
26
+ # --- Initialize the LLM by calling the backend-safe loader function ---
27
+ llm = load_gemini_llm()
28
+
29
+ # --- LangGraph State (No Changes) ---
30
+ class LegalAgentState(TypedDict):
31
+ user_request: str
32
+ legal_doc: str
33
+ legal_trivia: LegalTriviaOutput
34
+
35
+ # --- LangGraph Nodes (No Changes) ---
36
+ def generate_legal_doc(state: LegalAgentState):
37
+ prompt_text = f"Based on the user's request, generate a simple legal document text for an informal agreement in India. Keep it clear and simple.\n\nUser Request: {state['user_request']}"
38
+ legal_doc_text = llm.invoke(prompt_text).content
39
+ return {"legal_doc": legal_doc_text}
40
+
41
+ def get_legal_trivia(state: LegalAgentState):
42
+ prompt = PromptTemplate(
43
+ template="""
44
+ You are a specialized legal assistant for India's informal workforce...
45
+ User's situation: {user_request}
46
+ Web search results: {search_results}
47
+ {format_instructions}
48
+ """,
49
+ input_variables=["user_request", "search_results"],
50
+ partial_variables={"format_instructions": parser.get_format_instructions()},
51
+ )
52
+ chain = prompt | llm | parser
53
+ search_results = legal_search.invoke(state["user_request"])
54
+ structured_trivia = chain.invoke({"user_request": state["user_request"], "search_results": search_results})
55
+ return {"legal_trivia": structured_trivia}
56
+
57
+ # --- Build Graph (No Changes) ---
58
+ workflow = StateGraph(LegalAgentState)
59
+ workflow.add_node("generate_legal_doc", generate_legal_doc)
60
+ workflow.add_node("get_legal_trivia", get_legal_trivia)
61
+ workflow.set_entry_point("generate_legal_doc")
62
+ workflow.add_edge("generate_legal_doc", "get_legal_trivia")
63
+ workflow.add_edge("get_legal_trivia", END)
64
+ legal_agent = workflow.compile()
agents/scheme_chatbot.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # D:\jan-contract\agents\scheme_chatbot.py
2
+
3
+ import os
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain.schema.runnable import RunnablePassthrough
6
+ from pydantic import BaseModel, Field
7
+ from langchain_core.output_parsers import PydanticOutputParser
8
+ from typing import List
9
+
10
+ # --- Tool and NEW Core Model Loader Imports ---
11
+ from tools.scheme_tools import scheme_search
12
+ from core_utils.core_model_loaders import load_gemini_llm
13
+
14
+ # --- Pydantic Models (No Changes) ---
15
+ class GovernmentScheme(BaseModel):
16
+ scheme_name: str = Field(description="The official name of the government scheme.")
17
+ description: str = Field(description="A concise summary of the scheme's objectives and benefits.")
18
+ target_audience: str = Field(description="Who the scheme is intended for (e.g., Women, Farmers, PwD).")
19
+ official_link: str = Field(description="The full, working URL to the official government scheme page or portal.")
20
+
21
+ class SchemeOutput(BaseModel):
22
+ schemes: List[GovernmentScheme] = Field(description="A list of relevant government schemes.")
23
+
24
+ # --- Setup Models and Parsers ---
25
+ parser = PydanticOutputParser(pydantic_object=SchemeOutput)
26
+
27
+ # --- Initialize the LLM by calling the backend-safe loader function ---
28
+ llm = load_gemini_llm()
29
+
30
+ # --- Prompt Template (No Changes) ---
31
+ prompt = PromptTemplate(
32
+ template="""
33
+ You are an expert assistant for Indian government schemes...
34
+ User Profile: {user_profile}
35
+ Web search results: {search_results}
36
+ {format_instructions}
37
+ """,
38
+ input_variables=["user_profile", "search_results"],
39
+ partial_variables={"format_instructions": parser.get_format_instructions()},
40
+ )
41
+
42
+ # --- Build Chain (No Changes) ---
43
+ def get_search_results(query: dict):
44
+ return scheme_search.invoke(query["user_profile"])
45
+
46
+ scheme_chatbot = (
47
+ {"search_results": get_search_results, "user_profile": RunnablePassthrough()}
48
+ | prompt
49
+ | llm
50
+ | parser
51
+ )
components/__init__.py ADDED
File without changes
components/video_recorder.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # D:\jan-contract\components\video_recorder.py
2
+
3
+ import os
4
+ import streamlit as st
5
+ import datetime
6
+ import av
7
+
8
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode
9
+
10
+ VIDEO_CONSENT_DIR = "video_consents"
11
+ os.makedirs(VIDEO_CONSENT_DIR, exist_ok=True)
12
+
13
+ def record_consent_video():
14
+ """
15
+ Encapsulates the video recording logic using the component's internal state.
16
+
17
+ The video is automatically saved when the user clicks the "STOP" button
18
+ on the webrtc component.
19
+
20
+ Returns:
21
+ str | None: The file path of the saved video, or None if not saved yet.
22
+ """
23
+ # Instructions for the new, more intuitive workflow
24
+ st.info("Instructions: Click START, record your consent, then click STOP to finalize.")
25
+
26
+ webrtc_ctx = webrtc_streamer(
27
+ key="video-consent-recorder",
28
+ mode=WebRtcMode.SENDRECV, # SENDRECV mode is needed for the stop-button-triggered callback
29
+ media_stream_constraints={"video": True, "audio": True},
30
+ video_receiver_size=256,
31
+ async_processing=True,
32
+ )
33
+
34
+ # This block executes ONLY when the component is running (after START is clicked)
35
+ if webrtc_ctx.state.playing and webrtc_ctx.video_receiver:
36
+ # Inform the user that recording is in progress
37
+ st.success("🔴 Recording in progress...")
38
+
39
+ # If the 'frames_buffer' is not in session state, initialize it
40
+ if "frames_buffer" not in st.session_state:
41
+ st.session_state.frames_buffer = []
42
+
43
+ # Append each new frame to our session state buffer
44
+ while True:
45
+ try:
46
+ frame = webrtc_ctx.video_receiver.get_frame(timeout=1)
47
+ st.session_state.frames_buffer.append(frame)
48
+ except av.error.TimeoutError:
49
+ break # Break the loop when the stream ends (user clicks STOP)
50
+
51
+ # This block executes after the user clicks STOP
52
+ if not webrtc_ctx.state.playing and st.session_state.get("frames_buffer"):
53
+ with st.spinner("Saving your recording..."):
54
+ try:
55
+ video_frames = st.session_state.frames_buffer
56
+
57
+ # Generate a unique filename
58
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
59
+ video_filename = os.path.join(VIDEO_CONSENT_DIR, f"consent_{timestamp}.mp4")
60
+
61
+ # Use the av library to write the buffered frames to a video file
62
+ with av.open(video_filename, mode="w") as container:
63
+ stream = container.add_stream("libx264", rate=24)
64
+ stream.width = video_frames[0].width
65
+ stream.height = video_frames[0].height
66
+ stream.pix_fmt = "yuv420p"
67
+
68
+ for frame in video_frames:
69
+ packet = stream.encode(frame)
70
+ container.mux(packet)
71
+
72
+ # Flush the stream
73
+ packet = stream.encode()
74
+ container.mux(packet)
75
+
76
+ # Clear the buffer from session state and return the path
77
+ st.session_state.frames_buffer = []
78
+ st.session_state.video_filename = video_filename
79
+ return video_filename
80
+
81
+ except Exception as e:
82
+ st.error(f"An error occurred while saving the video: {e}")
83
+ st.session_state.frames_buffer = [] # Clear buffer on error
84
+ return None
85
+
86
+ return None
core_utils/core_model_loaders.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # D:\jan-contract\core_utils\core_model_loaders.py
2
+
3
+ import os
4
+ from langchain_community.embeddings import FastEmbedEmbeddings
5
+ from langchain_groq import ChatGroq
6
+ from langchain_google_genai import ChatGoogleGenerativeAI
7
+
8
+ # --- Simple, non-caching functions for the backend ---
9
+ # These can be safely imported by FastAPI or any other backend script.
10
+
11
+ def load_embedding_model():
12
+ """Loads the embedding model without any Streamlit dependencies."""
13
+ return FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")
14
+
15
+ def load_groq_llm():
16
+ """Loads the Groq LLM without any Streamlit dependencies."""
17
+ return ChatGroq(temperature=0, model="llama3-8b-8192", api_key=os.getenv("GROQ_API_KEY"))
18
+
19
+ def load_gemini_llm():
20
+ """Loads the Gemini LLM without any Streamlit dependencies."""
21
+ return ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0)
jan-contract ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 2848b1d403225a405df97356f7f9e4c4a1a727b6
main_fastapi.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # D:\jan-contract\main_fastapi.py
2
+
3
+ import os
4
+ import uuid
5
+ import tempfile
6
+ from fastapi import FastAPI, UploadFile, File, HTTPException
7
+ from fastapi.responses import StreamingResponse
8
+ from pydantic import BaseModel
9
+ import io
10
+
11
+ # --- Import all our backend logic and agents ---
12
+ from agents.legal_agent import legal_agent
13
+ from agents.scheme_chatbot import scheme_chatbot
14
+ from agents.demystifier_agent import process_document_for_demystification
15
+ from utils.pdf_generator import generate_formatted_pdf
16
+
17
+ # --- 1. Initialize FastAPI App ---
18
+ app = FastAPI(
19
+ title="Jan-Contract API",
20
+ description="A comprehensive API for generating digital contracts, finding government schemes, and analyzing legal documents for India's informal workforce.",
21
+ version="1.0.0",
22
+ )
23
+
24
+ # --- 2. Pydantic Models for Request Bodies ---
25
+ # These models provide automatic data validation and documentation for our API.
26
+ class ContractRequest(BaseModel):
27
+ user_request: str
28
+
29
+ class SchemeRequest(BaseModel):
30
+ user_profile: str
31
+
32
+ class ChatRequest(BaseModel):
33
+ session_id: str
34
+ question: str
35
+
36
+ # --- 3. State Management for the Demystifier Chat ---
37
+ # This is a simple in-memory cache for a hackathon. For production, you would
38
+ # use a more robust cache like Redis.
39
+ SESSION_CACHE = {}
40
+
41
+ # --- 4. API Endpoints ---
42
+
43
+ @app.post("/generate-contract/json", tags=["Contract Generator"])
44
+ async def generate_contract_json(request: ContractRequest):
45
+ """
46
+ Takes a plain-text description and returns a structured JSON object
47
+ containing the generated contract text (in Markdown) and relevant legal trivia.
48
+ """
49
+ try:
50
+ result = legal_agent.invoke({"user_request": request.user_request})
51
+ return result
52
+ except Exception as e:
53
+ raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
54
+
55
+ @app.post("/generate-contract/pdf", tags=["Contract Generator"])
56
+ async def generate_contract_pdf(request: ContractRequest):
57
+ """
58
+ Takes a plain-text description, generates a contract, and returns it
59
+ directly as a downloadable PDF file.
60
+ """
61
+ try:
62
+ result = legal_agent.invoke({"user_request": request.user_request})
63
+ contract_text = result.get('legal_doc', "Error: Could not generate document text.")
64
+
65
+ pdf_bytes = generate_formatted_pdf(contract_text)
66
+
67
+ return StreamingResponse(
68
+ io.BytesIO(pdf_bytes),
69
+ media_type="application/pdf",
70
+ headers={"Content-Disposition": "attachment;filename=digital_agreement.pdf"}
71
+ )
72
+ except Exception as e:
73
+ raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
74
+
75
+ @app.post("/find-schemes", tags=["Scheme Finder"])
76
+ async def find_schemes(request: SchemeRequest):
77
+ """
78
+ Takes a user profile description and returns a list of relevant
79
+ government schemes with names, descriptions, and official links.
80
+ """
81
+ try:
82
+ response = scheme_chatbot.invoke({"user_profile": request.user_profile})
83
+ return response
84
+ except Exception as e:
85
+ raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
86
+
87
+ @app.post("/demystify/upload", tags=["Document Demystifier"])
88
+ async def demystify_upload(file: UploadFile = File(...)):
89
+ """
90
+ Upload a PDF document for analysis. This endpoint processes the document,
91
+ creates a RAG chain for chatting, and returns the initial analysis report
92
+ along with a unique `session_id` for follow-up questions.
93
+ """
94
+ if file.content_type != "application/pdf":
95
+ raise HTTPException(status_code=400, detail="Invalid file type. Please upload a PDF.")
96
+
97
+ try:
98
+ # Use a temporary file to save the upload, as our loader needs a file path
99
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
100
+ tmp.write(await file.read())
101
+ tmp_path = tmp.name
102
+
103
+ analysis_result = process_document_for_demystification(tmp_path)
104
+
105
+ # Clean up the temporary file
106
+ os.unlink(tmp_path)
107
+
108
+ # Create a unique session ID and cache the RAG chain
109
+ session_id = str(uuid.uuid4())
110
+ SESSION_CACHE[session_id] = analysis_result["rag_chain"]
111
+
112
+ return {
113
+ "session_id": session_id,
114
+ "report": analysis_result["report"]
115
+ }
116
+ except Exception as e:
117
+ raise HTTPException(status_code=500, detail=f"Failed to process document: {e}")
118
+
119
+ @app.post("/demystify/chat", tags=["Document Demystifier"])
120
+ async def demystify_chat(request: ChatRequest):
121
+ """
122
+ Ask a follow-up question to a previously uploaded document.
123
+ Requires the `session_id` returned by the /demystify/upload endpoint.
124
+ """
125
+ rag_chain = SESSION_CACHE.get(request.session_id)
126
+ if not rag_chain:
127
+ raise HTTPException(status_code=404, detail="Session not found. Please upload the document again.")
128
+
129
+ try:
130
+ response = rag_chain.invoke(request.question)
131
+ return {"answer": response}
132
+ except Exception as e:
133
+ raise HTTPException(status_code=500, detail=f"An error occurred during chat: {e}")
main_streamlit.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # D:\jan-contract\main_streamlit.py
2
+
3
+ import os
4
+ import streamlit as st
5
+ from dotenv import load_dotenv
6
+
7
+ from agents.demystifier_agent import process_document_for_demystification
8
+ from components.video_recorder import record_consent_video
9
+ from utils.pdf_generator import generate_formatted_pdf
10
+
11
+ # --- Initial Setup ---
12
+ load_dotenv()
13
+ st.set_page_config(layout="wide", page_title="Jan-Contract Unified Assistant")
14
+ st.title("Jan-Contract: Your Digital Workforce Assistant")
15
+
16
+ PDF_UPLOAD_DIR = "pdfs_demystify"
17
+ os.makedirs(PDF_UPLOAD_DIR, exist_ok=True)
18
+
19
+ # --- Tabs ---
20
+ tab1, tab2, tab3 = st.tabs([
21
+ " **Contract Generator**",
22
+ " **Scheme Finder**",
23
+ " **Document Demystifier & Chat**"
24
+ ])
25
+
26
+ # --- TAB 1: Contract Generator ---
27
+ with tab1:
28
+ st.header("Create a Simple Digital Agreement")
29
+ st.write("Turn your everyday language into a clear agreement, then provide video consent.")
30
+
31
+ st.subheader("Step 1: Describe and Generate Your Agreement")
32
+ user_request = st.text_area("Describe the agreement...", height=120, key="contract_request")
33
+
34
+ if st.button("Generate Document & Get Legal Info", type="primary"):
35
+ if user_request:
36
+ with st.spinner("Generating document..."):
37
+ from agents.legal_agent import legal_agent
38
+ result = legal_agent.invoke({"user_request": user_request})
39
+ st.session_state.legal_result = result
40
+ # Reset video state for each new contract
41
+ if 'video_path_from_component' in st.session_state:
42
+ del st.session_state['video_path_from_component']
43
+ if 'frames_buffer' in st.session_state:
44
+ del st.session_state['frames_buffer'] # Clear old frames
45
+ else:
46
+ st.error("Please describe the agreement.")
47
+
48
+ if 'legal_result' in st.session_state:
49
+ result = st.session_state.legal_result
50
+ col1, col2 = st.columns(2)
51
+
52
+ with col1:
53
+ st.subheader("Generated Digital Agreement")
54
+ st.markdown(result['legal_doc'])
55
+ pdf_bytes = generate_formatted_pdf(result['legal_doc'])
56
+ st.download_button(label="⬇️ Download Formatted PDF", data=pdf_bytes, file_name="agreement.pdf")
57
+
58
+ with col2:
59
+ st.subheader("Relevant Legal Trivia")
60
+ # ... [Trivia display logic] ...
61
+
62
+ st.divider()
63
+
64
+ st.subheader("Step 2: Record Video Consent for this Agreement")
65
+ saved_video_path = record_consent_video()
66
+
67
+ if saved_video_path:
68
+ st.session_state.video_path_from_component = saved_video_path
69
+
70
+ if st.session_state.get("video_path_from_component"):
71
+ st.success("✅ Your consent has been recorded and saved!")
72
+ st.video(st.session_state.video_path_from_component)
73
+ st.info("This video is now linked to your generated agreement.")
74
+ # --- TAB 2: Scheme Finder (Unchanged) ---
75
+ with tab2:
76
+ st.header("Find Relevant Government Schemes")
77
+ st.write("Describe yourself or your situation to find government schemes that might apply to you.")
78
+
79
+ user_profile = st.text_input("Enter your profile...", key="scheme_profile")
80
+
81
+ if st.button("Find Schemes", type="primary", key="b2"):
82
+ if user_profile:
83
+ with st.spinner("Initializing models and searching for schemes..."):
84
+ # Lazy import the agent
85
+ from agents.scheme_chatbot import scheme_chatbot
86
+ response = scheme_chatbot.invoke({"user_profile": user_profile})
87
+ st.session_state.scheme_response = response
88
+ else:
89
+ st.error("Please enter a profile.")
90
+
91
+ if 'scheme_response' in st.session_state:
92
+ response = st.session_state.scheme_response
93
+ st.subheader(f"Potential Schemes for: '{user_profile}'")
94
+ if response and response.schemes:
95
+ for scheme in response.schemes:
96
+ with st.container(border=True):
97
+ st.markdown(f"#### {scheme.scheme_name}")
98
+ st.write(f"**Description:** {scheme.description}")
99
+ st.link_button("Go to Official Page ➡️", scheme.official_link)
100
+
101
+ # --- TAB 3: Demystifier & Chat (RESTORED to original functionality) ---
102
+ with tab3:
103
+ st.header("Simplify & Chat With Your Legal Document")
104
+ st.markdown("Get a plain-English summary of your document, then ask specific follow-up questions.")
105
+
106
+ uploaded_file = st.file_uploader("Choose a PDF document", type="pdf", key="demystify_uploader")
107
+
108
+ if uploaded_file and st.button("Analyze Document", type="primary"):
109
+ with st.spinner("Performing deep analysis and preparing for chat..."):
110
+ # Save the file to a persistent location
111
+ temp_file_path = os.path.join(PDF_UPLOAD_DIR, uploaded_file.name)
112
+ with open(temp_file_path, "wb") as f:
113
+ f.write(uploaded_file.getbuffer())
114
+
115
+ # Single call to the backend agent logic
116
+ analysis_result = process_document_for_demystification(temp_file_path)
117
+
118
+ # Store the results returned by the agent
119
+ st.session_state.demystify_report = analysis_result["report"]
120
+ st.session_state.rag_chain = analysis_result["rag_chain"]
121
+ st.session_state.messages = [] # Initialize chat history
122
+
123
+ # This part of the UI only displays after the analysis is complete
124
+ if 'demystify_report' in st.session_state:
125
+ # Step 1: Display Report
126
+ report = st.session_state.demystify_report
127
+ st.divider()
128
+ st.header("Step 1: Automated Document Analysis")
129
+ with st.container(border=True):
130
+ st.subheader("📄 Document Summary")
131
+ st.write(report.summary)
132
+ st.divider()
133
+ st.subheader("🔑 Key Terms Explained")
134
+ for term in report.key_terms:
135
+ with st.expander(f"**{term.term}**"):
136
+ st.write(term.explanation)
137
+ st.markdown(f"[Learn More Here]({term.resource_link})")
138
+ st.divider()
139
+ st.success(f"**Overall Advice:** {report.overall_advice}")
140
+ st.divider()
141
+
142
+ # Step 2: Display Chat
143
+ st.header("Step 2: Ask Follow-up Questions")
144
+ st.info("The document is now ready for your questions. Chat with it below.")
145
+
146
+ for message in st.session_state.get("messages", []):
147
+ with st.chat_message(message["role"]):
148
+ st.markdown(message["content"])
149
+
150
+ if prompt := st.chat_input("Ask a specific question about the document..."):
151
+ st.session_state.messages.append({"role": "user", "content": prompt})
152
+ with st.chat_message("user"):
153
+ st.markdown(prompt)
154
+
155
+ with st.chat_message("assistant"):
156
+ with st.spinner("Searching the document..."):
157
+ rag_chain = st.session_state.rag_chain
158
+ response = rag_chain.invoke(prompt)
159
+ st.markdown(response)
160
+
161
+ st.session_state.messages.append({"role": "assistant", "content": response})
162
+
163
+ elif not uploaded_file:
164
+ st.info("Upload a PDF document to begin the analysis.")
requirements.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # D:\jan-contract\requirements.txt
2
+
3
+ # Core LangChain libraries
4
+ langchain-core
5
+ langchain
6
+ langchain-community
7
+ langgraph
8
+
9
+ # LLM Integrations
10
+ langchain_google_genai
11
+ langchain-groq
12
+
13
+ # Tooling
14
+ tavily-python
15
+ pypdf
16
+ pymupdf
17
+ fastembed
18
+ faiss-cpu
19
+ python-multipart
20
+ # Web Frameworks
21
+ fastapi
22
+ uvicorn
23
+ streamlit
24
+
25
+ # Utilities
26
+ python-dotenv
27
+ pydantic
28
+ fpdf2
29
+
30
+ # --- NEW: For Video Recording ---
31
+ streamlit-webrtc
32
+ opencv-python-headless
33
+ av
tools/__init__.py ADDED
File without changes
tools/legal_tools.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # D:\jan-contract\tools\legal_tools.py
2
+
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from langchain.tools import tool
6
+ from langchain_community.tools.tavily_search import TavilySearchResults
7
+
8
+ load_dotenv()
9
+ os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
10
+
11
+ @tool
12
+ def legal_search(query: str):
13
+ """
14
+ Searches for legal information and relevant sections for a given query in the Indian context.
15
+ Use this tool to find legal trivia and sections related to agreements.
16
+ """
17
+ # Increased max_results to 5 for more comprehensive context
18
+ tavily_search = TavilySearchResults(max_results=5)
19
+ results = tavily_search.invoke(f"Indian law and sections for: {query}")
20
+ return results
tools/scheme_tools.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # D:\jan-contract\tools\scheme_tools.py
2
+
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from langchain.tools import tool
6
+ from langchain_community.tools.tavily_search import TavilySearchResults
7
+
8
+ load_dotenv()
9
+ os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
10
+
11
+ @tool
12
+ def scheme_search(query: str):
13
+ """
14
+ Searches for government schemes based on a user's profile.
15
+ Use this tool to find relevant government schemes for a user.
16
+ """
17
+ # Increased max_results to 7 to find content from more sources
18
+ tavily_search = TavilySearchResults(max_results=7)
19
+ results = tavily_search.invoke(f"official government schemes for {query} in India site:gov.in OR site:nic.in")
20
+ return results
utils/__init__.py ADDED
File without changes
utils/model_loaders.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # D:\jan-contract\utils\model_loaders.py
2
+
3
+ import streamlit as st
4
+ # Import from our new backend-safe loader
5
+ from core_utils.core_model_loaders import load_embedding_model, load_groq_llm, load_gemini_llm
6
+
7
+ @st.cache_resource
8
+ def get_embedding_model():
9
+ """Loads and caches the embedding model for the Streamlit app."""
10
+ with st.spinner("Initializing embedding model (this is a one-time download)..."):
11
+ model = load_embedding_model()
12
+ return model
13
+
14
+ @st.cache_resource
15
+ def get_groq_llm():
16
+ """Loads and caches the Groq LLM for the Streamlit app."""
17
+ return load_groq_llm()
18
+
19
+ @st.cache_resource
20
+ def get_gemini_llm():
21
+ """Loads and caches the Gemini LLM for the Streamlit app."""
22
+ return load_gemini_llm()
utils/pdf_generator.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # D:\jan-contract\utils\pdf_generator.py
2
+
3
+ import re
4
+ from fpdf import FPDF
5
+
6
+ def markdown_to_html_for_fpdf(md_text: str) -> str:
7
+ """
8
+ A helper function to convert our simple Markdown (bold and newlines)
9
+ into simple HTML that FPDF's write_html method can understand.
10
+ """
11
+ # 1. Convert **bold** syntax to <b>bold</b> HTML tags
12
+ # The regex finds text between double asterisks and wraps it in <b> tags.
13
+ text = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', md_text)
14
+
15
+ # 2. Convert newline characters to <br> HTML tags for line breaks
16
+ text = text.replace('\n', '<br>')
17
+
18
+ return text
19
+
20
+ def generate_formatted_pdf(text: str) -> bytes:
21
+ """
22
+ Takes a string containing Markdown and converts it into a well-formatted PDF
23
+ by first converting the Markdown to HTML and then rendering the HTML.
24
+
25
+ Args:
26
+ text (str): The content of the contract, with Markdown syntax.
27
+
28
+ Returns:
29
+ bytes: The content of the generated PDF file as a byte string.
30
+ """
31
+ pdf = FPDF()
32
+ pdf.add_page()
33
+ pdf.set_font("Arial", size=12)
34
+
35
+ # Convert our Markdown-style text into simple HTML
36
+ html_content = markdown_to_html_for_fpdf(text)
37
+
38
+ # Use the more robust write_html() method to render the formatted text.
39
+ # We still need to handle character encoding properly.
40
+ pdf.write_html(html_content.encode('latin-1', 'replace').decode('latin-1'))
41
+
42
+ # Return the PDF as a 'bytes' object, which Streamlit requires.
43
+ return bytes(pdf.output())
video_consents/consent_20250823_162229.mp4 ADDED
Binary file (28.2 kB). View file