fin-jack commited on
Commit
e94296c
·
verified ·
1 Parent(s): 1062e27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -11
app.py CHANGED
@@ -1,23 +1,31 @@
1
- import requests
2
  import os
 
3
  import spacy
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
 
7
- # URL for the legal cases data
8
- cases_url = '/kaggle/input/legalai/' # Replace with the actual URL of your legal cases data
 
9
 
10
- # Fetch legal cases data from URL
11
- response = requests.get(cases_url)
12
- if response.status_code == 200:
13
- cases_content = response.text.splitlines()
14
- else:
15
- print("Failed to fetch legal cases data. Status code:", response.status_code)
16
- cases_content = []
 
 
 
 
 
 
 
17
 
18
  # Preprocess and vectorize text for cases
19
  nlp = spacy.load("en_core_web_sm")
20
- processed_cases_texts = [" ".join([token.lemma_ for token in nlp(text) if not token.is_stop]) for text in cases_content]
21
  vectorizer_cases = TfidfVectorizer()
22
  tfidf_matrix_cases = vectorizer_cases.fit_transform(processed_cases_texts)
23
 
 
 
1
  import os
2
+ import streamlit as st
3
  import spacy
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
 
7
+ # Load legal data - Cases
8
+ cases_directory = '/kaggle/input/legalai/Object_casedocs/'
9
+ cases_texts = []
10
 
11
+ for file_name in os.listdir(cases_directory):
12
+ file_path = os.path.join(cases_directory, file_name)
13
+ with open(file_path, 'r') as file:
14
+ content = file.read()
15
+ cases_texts.append(content)
16
+
17
+ # Load legal data - Statutes
18
+ statutes_directory = '/kaggle/input/legalai/Object_statutes/'
19
+ statutes_texts = {}
20
+ for file_name in os.listdir(statutes_directory):
21
+ file_path = os.path.join(statutes_directory, file_name)
22
+ with open(file_path, 'r') as file:
23
+ statute_content = file.read()
24
+ statutes_texts[file_name] = statute_content
25
 
26
  # Preprocess and vectorize text for cases
27
  nlp = spacy.load("en_core_web_sm")
28
+ processed_cases_texts = [" ".join([token.lemma_ for token in nlp(text) if not token.is_stop]) for text in cases_texts]
29
  vectorizer_cases = TfidfVectorizer()
30
  tfidf_matrix_cases = vectorizer_cases.fit_transform(processed_cases_texts)
31