Spaces:
Build error
Build error
| import os | |
| import streamlit as st | |
| import spacy | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # Load legal data - Cases | |
| cases_directory = 'Object_casedocs' | |
| cases_texts = [] | |
| for file_name in os.listdir(cases_directory): | |
| file_path = os.path.join(cases_directory, file_name) | |
| with open(file_path, 'r') as file: | |
| content = file.read() | |
| cases_texts.append(content) | |
| # Load legal data - Statutes | |
| statutes_directory = 'Object_statutes' | |
| statutes_texts = {} | |
| for file_name in os.listdir(statutes_directory): | |
| file_path = os.path.join(statutes_directory, file_name) | |
| with open(file_path, 'r') as file: | |
| statute_content = file.read() | |
| statutes_texts[file_name] = statute_content | |
| # Preprocess and vectorize text for cases | |
| nlp = spacy.load("en_core_web_sm") | |
| processed_cases_texts = [" ".join([token.lemma_ for token in nlp(text) if not token.is_stop]) for text in cases_texts] | |
| vectorizer_cases = TfidfVectorizer() | |
| tfidf_matrix_cases = vectorizer_cases.fit_transform(processed_cases_texts) | |
| # Preprocess and vectorize text for statutes | |
| processed_statutes_texts = [" ".join([token.lemma_ for token in nlp(text) if not token.is_stop]) for text in statutes_texts.values()] | |
| vectorizer_statutes = TfidfVectorizer() | |
| tfidf_matrix_statutes = vectorizer_statutes.fit_transform(processed_statutes_texts) | |
| # User interaction loop | |
| while True: | |
| user_query = st.text_input("Ask a legal-related question (type 'exit' to quit): ") | |
| if user_query.lower() == 'exit': | |
| st.write("Exiting the program. Goodbye!") | |
| break | |
| # Vectorize user query | |
| query_vector_cases = vectorizer_cases.transform([user_query]) | |
| query_vector_statutes = vectorizer_statutes.transform([user_query]) | |
| # Compute cosine similarity between the query and each case | |
| query_similarities_cases = cosine_similarity(query_vector_cases, tfidf_matrix_cases).flatten() | |
| # Compute cosine similarity between the query and each statute | |
| query_similarities_statutes = cosine_similarity(query_vector_statutes, tfidf_matrix_statutes).flatten() | |
| # Retrieve the most relevant case and statute | |
| top_case_idx = query_similarities_cases.argmax() | |
| top_statute_idx = query_similarities_statutes.argmax() | |
| relevant_case = cases_texts[top_case_idx] | |
| relevant_statute_name = list(statutes_texts.keys())[top_statute_idx] | |
| relevant_statute_content = statutes_texts[relevant_statute_name] | |
| # Summarize the relevant case | |
| doc = nlp(relevant_case) | |
| statutes = [ent.text for ent in doc.ents if ent.label_ == "LAW"] | |
| doc_sentences = list(doc.sents) | |
| case_summary = "\n".join([sent.text for sent in doc_sentences]) # Include the entire case content as a summary | |
| # Generate Statute Explanation | |
| statute_explanation = f"Statute: {relevant_statute_name}\n{relevant_statute_content}" | |
| # Generate Legal Document | |
| document = f"Legal Document - User Query: {user_query}\n\n" | |
| document += f"Case Summary:\n{case_summary}\n\n" | |
| document += "Statute Explanation:\n" | |
| document += f"{statute_explanation}\n" | |
| document += "\nGuidance for the User:\n" | |
| document += "To defend your friend in court, focus on presenting evidence that supports their actions were in self-defense.\n" | |
| document += "Emphasize any mitigating circumstances and demonstrate their lack of intent to harm.\n" | |
| document += "Consult with a qualified legal professional to build a strong defense strategy." | |
| # Display the legal document to the user | |
| st.text_area("Legal Document", document) | |