Spaces:

fin-jack
/

AILegalAssistantTejpal.app

Build error

App Files Files Community

AILegalAssistantTejpal.app / app.py

fin-jack

Create app.py

090d043 verified about 2 years ago

raw

history blame contribute delete

3.99 kB

	import os
	import spacy
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	from fastapi import FastAPI

	app = FastAPI()

	def load_legal_data(directory):
	"""
	Load legal text data from a directory.

	Args:
	- directory (str): Path to the directory containing legal text files.

	Returns:
	- texts (list): List of text content loaded from files in the directory.
	"""
	texts = []
	for file_name in os.listdir(directory):
	file_path = os.path.join(directory, file_name)
	with open(file_path, 'r') as file:
	content = file.read()
	texts.append(content)
	return texts

	def preprocess_text(texts, nlp):
	"""
	Preprocess and vectorize text using SpaCy and TF-IDF.

	Args:
	- texts (list): List of text documents.
	- nlp (spacy.Language): SpaCy language model.

	Returns:
	- tfidf_matrix (scipy.sparse.csr_matrix): TF-IDF matrix of vectorized text.
	- vectorizer (sklearn.feature_extraction.text.TfidfVectorizer): TF-IDF vectorizer.
	"""
	processed_texts = [" ".join([token.lemma_ for token in nlp(text) if not token.is_stop]) for text in texts]
	vectorizer = TfidfVectorizer()
	tfidf_matrix = vectorizer.fit_transform(processed_texts)
	return tfidf_matrix, vectorizer

	def get_most_relevant_text(query_vector, tfidf_matrix, texts):
	"""
	Retrieve the most relevant text based on cosine similarity.

	Args:
	- query_vector (scipy.sparse.csr_matrix): Vectorized query.
	- tfidf_matrix (scipy.sparse.csr_matrix): TF-IDF matrix of vectorized text.
	- texts (list): List of text documents.

	Returns:
	- most_relevant_text (str): Most relevant text document.
	"""
	similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()
	top_idx = similarities.argmax()
	most_relevant_text = texts[top_idx]
	return most_relevant_text

	# Load legal data - Cases
	cases_directory = '/kaggle/input/legalai/Object_casedocs/'
	cases_texts = load_legal_data(cases_directory)

	# Load legal data - Statutes
	statutes_directory = '/kaggle/input/legalai/Object_statutes/'
	statutes_texts = load_legal_data(statutes_directory)

	# Load SpaCy language model
	nlp = spacy.load("en_core_web_sm")

	# Preprocess and vectorize text for cases
	tfidf_matrix_cases, vectorizer_cases = preprocess_text(cases_texts, nlp)

	# Preprocess and vectorize text for statutes
	tfidf_matrix_statutes, vectorizer_statutes = preprocess_text(statutes_texts, nlp)

	# API Endpoint to handle legal queries
	@app.post("/analyze/")
	async def analyze_legal_query(query: str):
	# Vectorize user query
	query_vector_cases = vectorizer_cases.transform([query])
	query_vector_statutes = vectorizer_statutes.transform([query])

	# Retrieve the most relevant case and statute
	relevant_case = get_most_relevant_text(query_vector_cases, tfidf_matrix_cases, cases_texts)
	relevant_statute = get_most_relevant_text(query_vector_statutes, tfidf_matrix_statutes, statutes_texts)

	# Extract statutes from the relevant case
	doc = nlp(relevant_case)
	statutes = [ent.text for ent in doc.ents if ent.label_ == "LAW"]

	# Summarize the relevant case
	case_summary = "\n".join([sent.text for sent in doc.sents])

	# Generate Legal Document
	legal_document = f"Legal Document - User Query: {query}\n\n"
	legal_document += f"Case Summary:\n{case_summary}\n\n"
	legal_document += "Relevant Statute:\n"
	legal_document += f"{relevant_statute}\n"
	legal_document += "\nGuidance for the User:\n"
	legal_document += "To defend your friend in court, focus on presenting evidence that supports their actions were in self-defense.\n"
	legal_document += "Emphasize any mitigating circumstances and demonstrate their lack of intent to harm.\n"
	legal_document += "Consult with a qualified legal professional to build a strong defense strategy."

	return {"query": query, "legal_document": legal_document}