Rivalcoder commited on
Commit
f9d767c
·
1 Parent(s): d2adcac
Files changed (5) hide show
  1. kanon_api.py +92 -0
  2. main.py +33 -0
  3. predictor.py +112 -0
  4. requirements.txt +8 -0
  5. vectorstore.py +47 -0
kanon_api.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from concurrent.futures import ThreadPoolExecutor, as_completed
4
+
5
+ BASE_URL = "https://indiankanoon.org"
6
+
7
+ def search_cases(query, max_results=10):
8
+ """
9
+ Scrape search results from Indian Kanoon website.
10
+ Returns a list of case URLs and titles.
11
+ """
12
+ search_url = f"{BASE_URL}/search/?formInput={query}"
13
+ response = requests.get(search_url)
14
+ response.raise_for_status()
15
+
16
+ soup = BeautifulSoup(response.text, "html.parser")
17
+ results = []
18
+
19
+ for result in soup.select(".result_title")[:max_results]:
20
+ title_tag = result.find("a")
21
+ if title_tag and title_tag.get("href"):
22
+ results.append({
23
+ "title": title_tag.get_text(strip=True),
24
+ "url": BASE_URL + title_tag["href"]
25
+ })
26
+ return results
27
+
28
+
29
+ def get_case_content(case_url):
30
+ """
31
+ Scrape the full text of a case from its URL.
32
+ """
33
+ try:
34
+ response = requests.get(case_url)
35
+ response.raise_for_status()
36
+ soup = BeautifulSoup(response.text, "html.parser")
37
+
38
+ selectors = [
39
+ "div#maincontent",
40
+ "div.content",
41
+ "pre",
42
+ "div.article_text",
43
+ "div.judgement-text"
44
+ ]
45
+
46
+ for sel in selectors:
47
+ content_div = soup.select_one(sel)
48
+ if content_div:
49
+ text = content_div.get_text(separator="\n", strip=True)
50
+ if text:
51
+ return text
52
+
53
+ paragraphs = soup.find_all("p")
54
+ if paragraphs:
55
+ return "\n".join(p.get_text(strip=True) for p in paragraphs)
56
+
57
+ except Exception:
58
+ return None
59
+
60
+ return "No content found."
61
+
62
+
63
+ # =========================
64
+ # Parallel Case Fetching
65
+ # =========================
66
+ def fetch_case_text(case):
67
+ """
68
+ Fetch case content safely for a single case dictionary.
69
+ """
70
+ case['text'] = get_case_content(case['url'])
71
+ return case
72
+
73
+ def fetch_cases_parallel(cases, max_workers=5):
74
+ """
75
+ Fetch multiple cases in parallel using ThreadPoolExecutor.
76
+ """
77
+ results = []
78
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
79
+ futures = {executor.submit(fetch_case_text, case): case for case in cases}
80
+ for future in as_completed(futures):
81
+ results.append(future.result())
82
+ return results
83
+
84
+
85
+ # # Example usage
86
+ # query = "Cheat in Neet exam"
87
+ # cases = search_cases(query, max_results=5)
88
+ # # Fetch content in parallel
89
+ # cases = fetch_cases_parallel(cases, max_workers=5)
90
+ # for case in cases:
91
+ # print(f"Title: {case['title']}")
92
+ # print(f"Content snippet: {case['text'][:1000]}...\n")
main.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from .predictor import predict_outcome
4
+ import datetime
5
+
6
+
7
+ app = FastAPI()
8
+
9
+ class CaseRequest(BaseModel):
10
+ case: str
11
+
12
+ @app.post("/predict")
13
+ async def predict(case_request: CaseRequest):
14
+ user_case = case_request.case
15
+ result = predict_outcome(user_case)
16
+ return {"prediction": result}
17
+
18
+ @app.get("/health")
19
+ async def health_check():
20
+ """
21
+ Basic health check endpoint.
22
+ Returns status, server time, and optional components health.
23
+ """
24
+ # You can also add DB, vectorstore, or AI API checks here if needed
25
+ status = {
26
+ "status": "ok",
27
+ "server_time": datetime.datetime.utcnow().isoformat() + "Z",
28
+ "dependencies": {
29
+ "google_genai_api": "ok" if True else "error", # placeholder
30
+ "vectorstore": "ok" if True else "error"
31
+ }
32
+ }
33
+ return status
predictor.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .kanon_api import search_cases, get_case_content
2
+ from .vectorstore import create_vector_store
3
+ from google import genai
4
+ import os
5
+ import re
6
+ import json
7
+
8
+
9
+ client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
10
+
11
+ def predict_outcome(user_case: str):
12
+ """
13
+ Predict likely case outcome using AI based on related past cases.
14
+ """
15
+
16
+ # 1️⃣ Generate legal search query
17
+ search_prompt = f"""
18
+ You are an expert Indian legal AI assistant.
19
+ Given these case facts, generate a precise **search query** suitable for finding relevant Indian legal cases on a legal database like Indian Kanoon.
20
+
21
+ Case facts:
22
+ {user_case}
23
+
24
+ Requirements:
25
+ - Output **only one line** in natural language.
26
+ - Include **relevant Indian laws, sections, or keywords** if applicable.
27
+ - Make it precise for legal search; do **not** use generic phrases.
28
+ - Return **only the query**, nothing else, no explanation.
29
+ - DOnt Give Output This " Some " or " .." Like That DOnt Give In response only one best Line Match the Case To Give Only One
30
+
31
+ Example output:
32
+ "Liability for defective vehicles and accident compensation."
33
+ "About compensation for deaths and injuries due to a road accident caused by a vehicle defect"
34
+ """
35
+
36
+ search_chat = client.chats.create(model="gemini-2.5-flash-lite")
37
+ query_response = search_chat.send_message(search_prompt)
38
+
39
+ query = query_response.text.strip().replace("\n", " ").strip('"').strip("'")
40
+ print("Generated legal search query:", query)
41
+
42
+ # 2️⃣ Search related cases
43
+ related_cases_data = search_cases(query, max_results=10)
44
+
45
+ # 3️⃣ Fetch full text for each result
46
+ for case in related_cases_data:
47
+ case['text'] = get_case_content(case['url'])
48
+
49
+ related_cases_texts = [case["text"] for case in related_cases_data if case.get("text")]
50
+ if not related_cases_texts:
51
+ return "No relevant cases found to analyze."
52
+
53
+ # 4️⃣ Create vector store
54
+ vectorstore = create_vector_store(related_cases_texts)
55
+ if not vectorstore:
56
+ return "Vector store creation failed."
57
+
58
+ # 5️⃣ Retrieve relevant cases
59
+ retriever = vectorstore.as_retriever()
60
+ relevant_docs = retriever.invoke(user_case)
61
+ combined_text = "\n".join([d.page_content for d in relevant_docs])
62
+
63
+ if not combined_text.strip():
64
+ return "No relevant context could be found from retrieved cases."
65
+
66
+ # 6️⃣ Generate final prediction
67
+ prompt = f"""
68
+ You are an expert Indian legal AI assistant.
69
+ User case facts:
70
+ {user_case}
71
+
72
+ Consider these previous cases:
73
+ {combined_text}
74
+
75
+ Return the output strictly as JSON with the following keys:
76
+ - "probability": estimated percentage chance of winning the case (number between 0-100)
77
+ - "timeline": approximate duration or end period of the case based on similar past cases
78
+ - "feature_points": list of key points favoring win/loss and any major influencing factors
79
+
80
+ Example JSON:
81
+ {{
82
+ "probability": 75,
83
+ "timeline": "6-12 months",
84
+ "feature_points": [
85
+ "Plaintiff has strong documentary evidence",
86
+ "Defendant has prior similar case loss",
87
+ "Possible delay due to procedural issues"
88
+ ]
89
+ }}
90
+ Do **not** include any explanation outside the JSON.
91
+ """
92
+ chat = client.chats.create(model="gemini-2.0-flash-exp")
93
+ response = chat.send_message(prompt)
94
+
95
+ raw_text = response.text.strip()
96
+
97
+ # 1️⃣ Remove ```json or ``` at start/end
98
+ raw_text = re.sub(r"^```json\s*|^```|```$", "", raw_text, flags=re.IGNORECASE).strip()
99
+
100
+ # 2️⃣ Remove wrapping quotes if present
101
+ if (raw_text.startswith('"') and raw_text.endswith('"')) or (raw_text.startswith("'") and raw_text.endswith("'")):
102
+ raw_text = raw_text[1:-1].strip()
103
+ # Unescape quotes inside
104
+ raw_text = raw_text.replace('\\"', '"').replace("\\'", "'")
105
+
106
+ # 3️⃣ Try parsing as JSON
107
+ try:
108
+ result_json = json.loads(raw_text)
109
+ except json.JSONDecodeError:
110
+ result_json = {"error": "AI did not return valid JSON", "raw_response": raw_text}
111
+
112
+ return result_json
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ requests
4
+ beautifulsoup4
5
+ pydantic
6
+ langchain
7
+ faiss-cpu
8
+ google-genai
vectorstore.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.vectorstores import FAISS
2
+ from langchain.docstore.document import Document
3
+ from langchain.embeddings.base import Embeddings
4
+ from google import genai
5
+ import os
6
+
7
+ # Make sure your environment variable GOOGLE_API_KEY is set
8
+ API_KEY = os.getenv("GOOGLE_API_KEY")
9
+ if not API_KEY:
10
+ raise ValueError("Missing GOOGLE_API_KEY environment variable!")
11
+
12
+ # Initialize client with API key
13
+ client = genai.Client(api_key=API_KEY)
14
+
15
+
16
+ class GeminiEmbeddings(Embeddings):
17
+ """LangChain wrapper for Google Gemini embeddings"""
18
+
19
+ def embed_documents(self, texts):
20
+ if not texts:
21
+ return []
22
+ response = client.models.embed_content(
23
+ model="gemini-embedding-001",
24
+ contents=texts
25
+ )
26
+ # Each response.embeddings[i].values is a list of floats
27
+ return [e.values for e in response.embeddings]
28
+
29
+ def embed_query(self, text):
30
+ response = client.models.embed_content(
31
+ model="gemini-embedding-001",
32
+ contents=[text]
33
+ )
34
+ return response.embeddings[0].values
35
+
36
+
37
+ def create_vector_store(texts):
38
+ docs = [Document(page_content=t) for t in texts if t.strip()]
39
+ if not docs:
40
+ return None
41
+
42
+ embeddings = GeminiEmbeddings()
43
+ vectorstore = FAISS.from_texts(
44
+ texts=[d.page_content for d in docs],
45
+ embedding=embeddings
46
+ )
47
+ return vectorstore