Spaces:
Build error
Build error
| import os | |
| import spacy | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from fastapi import FastAPI | |
| app = FastAPI() | |
| def load_legal_data(directory): | |
| """ | |
| Load legal text data from a directory. | |
| Args: | |
| - directory (str): Path to the directory containing legal text files. | |
| Returns: | |
| - texts (list): List of text content loaded from files in the directory. | |
| """ | |
| texts = [] | |
| for file_name in os.listdir(directory): | |
| file_path = os.path.join(directory, file_name) | |
| with open(file_path, 'r') as file: | |
| content = file.read() | |
| texts.append(content) | |
| return texts | |
| def preprocess_text(texts, nlp): | |
| """ | |
| Preprocess and vectorize text using SpaCy and TF-IDF. | |
| Args: | |
| - texts (list): List of text documents. | |
| - nlp (spacy.Language): SpaCy language model. | |
| Returns: | |
| - tfidf_matrix (scipy.sparse.csr_matrix): TF-IDF matrix of vectorized text. | |
| - vectorizer (sklearn.feature_extraction.text.TfidfVectorizer): TF-IDF vectorizer. | |
| """ | |
| processed_texts = [" ".join([token.lemma_ for token in nlp(text) if not token.is_stop]) for text in texts] | |
| vectorizer = TfidfVectorizer() | |
| tfidf_matrix = vectorizer.fit_transform(processed_texts) | |
| return tfidf_matrix, vectorizer | |
| def get_most_relevant_text(query_vector, tfidf_matrix, texts): | |
| """ | |
| Retrieve the most relevant text based on cosine similarity. | |
| Args: | |
| - query_vector (scipy.sparse.csr_matrix): Vectorized query. | |
| - tfidf_matrix (scipy.sparse.csr_matrix): TF-IDF matrix of vectorized text. | |
| - texts (list): List of text documents. | |
| Returns: | |
| - most_relevant_text (str): Most relevant text document. | |
| """ | |
| similarities = cosine_similarity(query_vector, tfidf_matrix).flatten() | |
| top_idx = similarities.argmax() | |
| most_relevant_text = texts[top_idx] | |
| return most_relevant_text | |
| # Load legal data - Cases | |
| cases_directory = '/kaggle/input/legalai/Object_casedocs/' | |
| cases_texts = load_legal_data(cases_directory) | |
| # Load legal data - Statutes | |
| statutes_directory = '/kaggle/input/legalai/Object_statutes/' | |
| statutes_texts = load_legal_data(statutes_directory) | |
| # Load SpaCy language model | |
| nlp = spacy.load("en_core_web_sm") | |
| # Preprocess and vectorize text for cases | |
| tfidf_matrix_cases, vectorizer_cases = preprocess_text(cases_texts, nlp) | |
| # Preprocess and vectorize text for statutes | |
| tfidf_matrix_statutes, vectorizer_statutes = preprocess_text(statutes_texts, nlp) | |
| # API Endpoint to handle legal queries | |
| async def analyze_legal_query(query: str): | |
| # Vectorize user query | |
| query_vector_cases = vectorizer_cases.transform([query]) | |
| query_vector_statutes = vectorizer_statutes.transform([query]) | |
| # Retrieve the most relevant case and statute | |
| relevant_case = get_most_relevant_text(query_vector_cases, tfidf_matrix_cases, cases_texts) | |
| relevant_statute = get_most_relevant_text(query_vector_statutes, tfidf_matrix_statutes, statutes_texts) | |
| # Extract statutes from the relevant case | |
| doc = nlp(relevant_case) | |
| statutes = [ent.text for ent in doc.ents if ent.label_ == "LAW"] | |
| # Summarize the relevant case | |
| case_summary = "\n".join([sent.text for sent in doc.sents]) | |
| # Generate Legal Document | |
| legal_document = f"Legal Document - User Query: {query}\n\n" | |
| legal_document += f"Case Summary:\n{case_summary}\n\n" | |
| legal_document += "Relevant Statute:\n" | |
| legal_document += f"{relevant_statute}\n" | |
| legal_document += "\nGuidance for the User:\n" | |
| legal_document += "To defend your friend in court, focus on presenting evidence that supports their actions were in self-defense.\n" | |
| legal_document += "Emphasize any mitigating circumstances and demonstrate their lack of intent to harm.\n" | |
| legal_document += "Consult with a qualified legal professional to build a strong defense strategy." | |
| return {"query": query, "legal_document": legal_document} | |