Spaces:

lenawilli
/

GDPR

Build error

App Files Files Community

lenawilli commited on Jun 25, 2025

Commit

c73a79b

verified ·

1 Parent(s): 2d7abbc

Upload 9 files

Browse files

Files changed (9) hide show

src/KnowledgeGraphView.py +143 -0
src/app-st.py +442 -0
src/gdpr_articles_baseline.json +0 -0
src/logistic_regression_model.joblib +3 -0
src/logistic_regression_vectorizer.joblib +3 -0
src/multinomialNB_model.joblib +3 -0
src/multinomialNB_vectorizer.joblib +3 -0
src/requirements.txt +17 -0
src/sentence_transformer_model.joblib +3 -0

src/KnowledgeGraphView.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import streamlit as st
+from rdflib import Graph, RDFS, Namespace, URIRef, RDF, Literal
+from pyvis.network import Network
+import streamlit.components.v1 as components
+# --- Load RDF ---
+g = Graph()
+g.parse("../KnowledgeGraph/gdpr_policy_graph.ttl", format="ttl")
+BASE_URI = "http://example.org/gdpr#"
+EX = Namespace(BASE_URI)
+# --- UI Layout ---
+st.set_page_config(layout="wide")
+st.title("🕸️ GDPR Knowledge Graph Visualizer")
+# --- Get Filter Options ---
+articles = sorted({str(o).split(" ")[1] for s, p, o in g.triples((None, RDFS.label, None)) if "Article" in str(o)})
+sections = sorted({
+    str(label)
+    for sec in g.subjects(RDF.type, EX.PolicySection)
+    for label in g.objects(sec, RDFS.label)
+})
+col1, col2 = st.columns([1, 3])
+selected_article = col1.selectbox("🔍 Filter by Article Number", ["All"] + articles)
+selected_section = col2.selectbox("📄 Filter by Policy Section", ["All"] + sections)
+# --- Determine nodes to show based on filters ---
+def get_related_nodes_for_section(section_label):
+    # Find the PolicySection node
+    matching_sections = [
+        s for s, p, o in g.triples((None, RDFS.label, Literal(section_label)))
+        if (s, RDF.type, EX.PolicySection) in g
+    ]
+    if not matching_sections:
+        return set()
+    sec_node = matching_sections[0]
+    clause_nodes = set(o for _, _, o in g.triples((sec_node, EX.relatesToClause, None)))
+    article_nodes = set(o for c in clause_nodes for _, _, o in g.triples((c, EX.partOf, None)))
+    return {sec_node} | clause_nodes | article_nodes
+def get_related_nodes_for_article(article_number):
+    # Find Article node(s)
+    article_nodes = set(
+        s for s, p, o in g.triples((None, RDFS.label, None))
+        if (s, RDF.type, EX.Article) in g and f"Article {article_number}" in str(o)
+    )
+    if not article_nodes:
+        return set()
+    article_node = list(article_nodes)[0]
+    # Clauses of the article
+    clause_nodes = set(o for _, _, o in g.triples((None, EX.partOf, article_node)))
+    # Find all policy sections relating to these clauses
+    policy_sections = set(
+        s for s, p, o in g.triples((None, EX.relatesToClause, None)) if o in clause_nodes
+    )
+    return {article_node} | clause_nodes | policy_sections
+def expand_with_neighbors(graph, nodes):
+    expanded = set(nodes)
+    for node in nodes:
+        # Outgoing neighbors
+        for _, _, o in graph.triples((node, None, None)):
+            expanded.add(o)
+        # Incoming neighbors
+        for s, _, _ in graph.triples((None, None, node)):
+            expanded.add(s)
+    return expanded
+if selected_section != "All" and selected_article != "All":
+    # Filter by both: intersection of sets
+    nodes_for_section = get_related_nodes_for_section(selected_section)
+    nodes_for_article = get_related_nodes_for_article(selected_article)
+    base_nodes = nodes_for_section & nodes_for_article
+elif selected_section != "All":
+    base_nodes = get_related_nodes_for_section(selected_section)
+elif selected_article != "All":
+    base_nodes = get_related_nodes_for_article(selected_article)
+else:
+    # Show everything
+    base_nodes = set()
+    for s, p, o in g:
+        base_nodes.add(s)
+        base_nodes.add(o)
+nodes_to_show = expand_with_neighbors(g, base_nodes)
+# --- Initialize network ---
+net = Network(height="750px", width="100%", bgcolor="#ffffff", font_color="black")
+net.force_atlas_2based(gravity=-30)
+added_nodes = set()
+def get_type(uri):
+    for s, p, o in g.triples((uri, RDFS.label, None)):
+        label = str(o)
+        if label.startswith("Article"):
+            return "article"
+        elif label.startswith("Art."):
+            return "clause"
+        elif label.startswith("Section"):
+            return "section"
+    return "unknown"
+def get_label(g, node):
+    for _, _, label in g.triples((node, RDFS.label, None)):
+        return str(label)
+    return None
+def color_by_type(node_type):
+    return {
+        "article": "#77B5FE",   # blue
+        "clause": "#81C784",    # green
+        "section": "#FFB74D",   # orange
+    }.get(node_type, "#D3D3D3")
+# --- Add nodes ---
+for node in nodes_to_show:
+    label = get_label(g, node) or (str(node).split("#")[-1] if isinstance(node, URIRef) else str(node))
+    n_type = get_type(node)
+    tooltip = label
+    for val in g.objects(node, EX.similarityScore):
+        tooltip += f"\nSimilarity: {val}"
+    for val in g.objects(node, RDFS.comment):
+        tooltip += f"\n{val}"
+    net.add_node(str(node), label=label, title=tooltip, color=color_by_type(n_type))
+    added_nodes.add(str(node))
+# --- Add edges only between shown nodes ---
+for s, p, o in g:
+    if s in nodes_to_show and o in nodes_to_show:
+        pred_label = p.split("#")[-1] if isinstance(p, URIRef) else str(p)
+        net.add_edge(str(s), str(o), label=pred_label)
+# --- Render ---
+net.save_graph("graph.html")
+with open("graph.html", "r", encoding="utf-8") as f:
+    html = f.read()
+components.html(html, height=780, scrolling=True)

src/app-st.py ADDED Viewed

	@@ -0,0 +1,442 @@

+import streamlit as st
+import json
+import numpy as np
+import joblib
+from collections import defaultdict
+from transformers import AutoTokenizer, AutoModel
+from sklearn.metrics.pairwise import cosine_similarity
+import torch
+import re
+from typing import List, Dict, Any
+from openai import OpenAI
+from dotenv import load_dotenv
+import os
+from sentence_transformers import SentenceTransformer
+from rdflib import Graph, Namespace, URIRef, Literal, RDF, RDFS, XSD
+import os
+import networkx as nx
+from pyvis.network import Network
+import streamlit.components.v1 as components
+# ---------------------------
+# LegalBERT-based compliance checker
+# ---------------------------
+class GDPRComplianceChecker:
+    def __init__(self, model_name="nlpaueb/bert-base-uncased-eurlex"):
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModel.from_pretrained(model_name)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model.to(self.device).eval()
+    def get_embeddings(self, texts):
+        embeddings = []
+        for text in texts:
+            inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            with torch.no_grad():
+                output = self.model(**inputs)
+                embedding = output.last_hidden_state[:, 0, :].cpu().numpy()
+                embeddings.append(embedding[0])
+        return np.array(embeddings)
+    def chunk_policy_text(self, text, chunk_size=500):
+        paragraphs = re.split(r'\n{2,}|\.\s+', text)
+        chunks, current = [], ""
+        for para in paragraphs:
+            if len(current) + len(para) < chunk_size:
+                current += " " + para
+            else:
+                chunks.append(current.strip())
+                current = para
+        if current:
+            chunks.append(current.strip())
+        return [chunk for chunk in chunks if len(chunk) > 50]
+    def load_gdpr_articles(self, gdpr_json):
+        gdpr_map, texts = {}, []
+        for article in gdpr_json:
+            number, title = article["article_number"], article["article_title"]
+            body = " ".join([f"{k} {v}" for sec in article["sections"] for k, v in sec.items()])
+            full_text = f"Article {number}: {title}. {body}"
+            gdpr_map[number] = {"title": title, "text": full_text}
+            texts.append(full_text)
+        embeddings = self.get_embeddings(texts)
+        return gdpr_map, embeddings
+    def calculate_compliance_score(self, policy_text, gdpr_map, gdpr_embeddings):
+        chunks = self.chunk_policy_text(policy_text)
+        if not chunks:
+            return {"error": "Policy has no meaningful chunks."}
+        chunk_embeddings = self.get_embeddings(chunks)
+        sim_matrix = cosine_similarity(gdpr_embeddings, chunk_embeddings)
+        article_scores = {}
+        presence_threshold = 0.35
+        total_score, counted_articles = 0, 0
+        for i, (art_num, art_data) in enumerate(gdpr_map.items()):
+            max_sim = np.max(sim_matrix[i])
+            best_idx = np.argmax(sim_matrix[i])
+            if max_sim < presence_threshold:
+                continue
+            score_pct = min(100, max(0, (max_sim - presence_threshold) / (1 - presence_threshold) * 100))
+            article_scores[art_num] = {
+                "article_title": art_data["title"],
+                "compliance_percentage": round(score_pct, 2),
+                "similarity_score": round(max_sim, 4),
+                "matched_text_snippet": chunks[best_idx][:300] + "..."
+            }
+            total_score += score_pct
+            counted_articles += 1
+        overall = round(total_score / counted_articles, 2) if counted_articles else 0
+        return {
+            "overall_compliance_percentage": overall,
+            "relevant_articles_analyzed": counted_articles,
+            "total_policy_chunks": len(chunks),
+            "article_scores": article_scores
+        }
+def chunk_policy_text(text, chunk_size=500):
+    import re
+    paragraphs = re.split(r'\n{2,}|\.\s+', text)
+    chunks, current = [], ""
+    for para in paragraphs:
+        if len(current) + len(para) < chunk_size:
+            current += " " + para
+        else:
+            chunks.append(current.strip())
+            current = para
+    if current:
+        chunks.append(current.strip())
+    return [chunk for chunk in chunks if len(chunk) > 50]
+def prepare_article_text(article: Dict[str, Any]) -> str:
+    body = " ".join(
+        " ".join(sec.values()) if isinstance(sec, dict) else str(sec)
+        for sec in article.get("sections", [])
+    )
+    return f"Art. {article['article_number']} – {article['article_title']} {body}"
+def get_embedding(text: str) -> List[float]:
+    # If input is a list of strings, clean each string
+    if isinstance(text, list):
+        cleaned_text = [t.replace("\n", " ") for t in text]
+    else:  # single string
+        cleaned_text = text.replace("\n", " ")
+    resp = client.embeddings.create(model=EMBED_MODEL, input=cleaned_text)
+    if isinstance(cleaned_text, list):
+        return [item.embedding for item in resp.data]
+    else:
+        return resp.data[0].embedding
+def rdflib_to_networkx(rdflib_graph):
+    nx_graph = nx.MultiDiGraph()
+    for s, p, o in rdflib_graph:
+        nx_graph.add_edge(str(s), str(o), label=str(p))
+    return nx_graph
+def draw_pyvis_graph(nx_graph):
+    net = Network(height="600px", width="100%", directed=True, notebook=False)
+    net.from_nx(nx_graph)
+    net.repulsion(node_distance=200, central_gravity=0.33, spring_length=100, spring_strength=0.10, damping=0.95)
+    return net
+# ---------------------------
+# Streamlit interface
+# ---------------------------
+st.set_page_config(page_title="GDPR Compliance Checker", layout="wide")
+st.title("🛡️ GDPR Compliance Checker")
+with st.sidebar:
+    st.header("Upload Files")
+    gdpr_file = st.file_uploader("GDPR JSON File", type=["json"])
+    policy_file = st.file_uploader("Company Policy (.txt)", type=["txt"])
+if gdpr_file and policy_file:
+    model_choice = st.selectbox(
+        "Choose the model to use:",
+        ["Logistic Regression", "MultinomialNB", "LegalBERT (Eurlex)", "SentenceTransformer", "LLM Model", "Knowledge Graphs"]
+    )
+    gdpr_data = json.load(gdpr_file)
+    article_title_map = {f"Article {a['article_number']}": a['article_title'] for a in gdpr_data}
+    policy_text = policy_file.read().decode("utf-8")
+    with st.spinner("Analyzing..."):
+        if model_choice == "LegalBERT (Eurlex)":
+            checker = GDPRComplianceChecker()
+            gdpr_map, gdpr_embeddings = checker.load_gdpr_articles(gdpr_data)
+            result = checker.calculate_compliance_score(policy_text, gdpr_map, gdpr_embeddings)
+        elif model_choice in ["Logistic Regression", "MultinomialNB"]:
+            if model_choice == "Logistic Regression":
+                model = joblib.load("logistic_regression_model.joblib")
+                vectorizer = joblib.load("logistic_regression_vectorizer.joblib")
+            else:
+                model = joblib.load("multinomialNB_model.joblib")
+                vectorizer = joblib.load("multinomialNB_vectorizer.joblib")
+            chunks = chunk_policy_text(policy_text)
+            chunks = [c.strip() for c in chunks if len(c.strip()) > 40]
+            X_tfidf = vectorizer.transform(chunks)
+            y_pred = model.predict(X_tfidf)
+            y_proba = model.predict_proba(X_tfidf)
+            article_scores = defaultdict(lambda: {
+                "article_title": "",
+                "compliance_percentage": 0.0,
+                "similarity_score": 0.0,
+                "matched_text_snippet": ""
+            })
+            total_score = 0
+            counted_chunks = 0
+            for i, (label, prob_vector) in enumerate(zip(y_pred, y_proba)):
+                max_prob = max(prob_vector)
+                if max_prob >= 0.35:
+                    score_pct = min(100.0, max(0.0, (max_prob - 0.35) / (1 - 0.35) * 100))
+                    if score_pct > article_scores[label]["compliance_percentage"]:
+                        article_scores[label]["compliance_percentage"] = score_pct
+                        article_scores[label]["similarity_score"] = round(max_prob, 4)
+                        article_scores[label]["matched_text_snippet"] = chunks[i][:300] + "..."
+                    article_scores[label]["article_title"] = article_title_map.get(label, label)
+                    total_score += score_pct
+                    counted_chunks += 1
+            overall = round(total_score / counted_chunks, 2) if counted_chunks else 0
+            result = {
+                "overall_compliance_percentage": overall,
+                "relevant_articles_analyzed": len(article_scores),
+                "total_policy_chunks": len(chunks),
+                "article_scores": dict(article_scores)
+            }
+        elif model_choice == "SentenceTransformer":
+            model = joblib.load("sentence_transformer_model.joblib")
+            gdpr_texts = []
+            gdpr_map = {}
+            for article in gdpr_data:
+                number, title = article["article_number"], article["article_title"]
+                body = " ".join([f"{k} {v}" for sec in article["sections"] for k, v in sec.items()])
+                full_text = f"Article {number}: {title}. {body}"
+                gdpr_map[number] = {
+                    "title": title,
+                    "text": full_text
+                }
+                gdpr_texts.append(full_text)
+            gdpr_embeddings = model.encode(gdpr_texts, convert_to_numpy=True)
+            chunks = chunk_policy_text(policy_text)
+            chunk_embeddings = model.encode(chunks, convert_to_numpy=True)
+            sim_matrix = cosine_similarity(gdpr_embeddings, chunk_embeddings)
+            article_scores = {}
+            presence_threshold = 0.35
+            total_score, counted_articles = 0, 0
+            for i, (art_num, art_data) in enumerate(gdpr_map.items()):
+                max_sim = np.max(sim_matrix[i])
+                best_idx = np.argmax(sim_matrix[i])
+                if max_sim < presence_threshold:
+                    continue
+                score_pct = min(100, max(0, (max_sim - presence_threshold) / (1 - presence_threshold) * 100))
+                article_scores[art_num] = {
+                    "article_title": art_data["title"],
+                    "compliance_percentage": round(score_pct, 2),
+                    "similarity_score": round(max_sim, 4),
+                    "matched_text_snippet": chunks[best_idx][:300] + "..."
+                }
+                total_score += score_pct
+                counted_articles += 1
+            overall = round(total_score / counted_articles, 2) if counted_articles else 0
+            result = {
+                "overall_compliance_percentage": overall,
+                "relevant_articles_analyzed": counted_articles,
+                "total_policy_chunks": len(chunks),
+                "article_scores": article_scores
+            }
+        elif model_choice == "LLM Model":
+            load_dotenv()
+            api_key = os.getenv("OPENAI_API_KEY")
+            client = OpenAI(api_key=api_key)
+            EMBED_MODEL = "text-embedding-3-small"
+            gdpr_embeddings = {}
+            gdpr_map = {}
+            for art in gdpr_data:
+                number, title = art["article_number"], art["article_title"]
+                art_text = prepare_article_text(art)
+                gdpr_embeddings[art["article_number"]] = {
+                    "embedding": get_embedding(art_text),
+                    "title": art["article_title"]
+                }
+                gdpr_map[number] = {"title": title, "text": art_text}
+            chunks = chunk_policy_text(policy_text)
+            chunk_embeddings = get_embedding(chunks)
+            gdpr_embedding_vectors = [v["embedding"] for v in gdpr_embeddings.values()]
+            sim_matrix = cosine_similarity(gdpr_embedding_vectors, chunk_embeddings)
+            article_scores = {}
+            presence_threshold = 0.35
+            total_score, counted_articles = 0, 0
+            for i, (art_num, art_data) in enumerate(gdpr_map.items()):
+                max_sim = np.max(sim_matrix[i])
+                best_idx = np.argmax(sim_matrix[i])
+                if max_sim < presence_threshold:
+                    continue
+                score_pct = min(100, max(0, (max_sim - presence_threshold) / (1 - presence_threshold) * 100))
+                article_scores[art_num] = {
+                    "article_title": art_data["title"],
+                    "compliance_percentage": round(score_pct, 2),
+                    "similarity_score": round(max_sim, 4),
+                    "matched_text_snippet": chunks[best_idx][:300] + "..."
+                }
+                total_score += score_pct
+                counted_articles += 1
+            overall = round(total_score / counted_articles, 2) if counted_articles else 0
+            result = {
+                "overall_compliance_percentage": overall,
+                "relevant_articles_analyzed": counted_articles,
+                "total_policy_chunks": len(chunks),
+                "article_scores": article_scores
+            }
+        elif model_choice == "Knowledge Graphs":
+            EMBED_MODEL = "all-MiniLM-L6-v2"
+            model = SentenceTransformer(EMBED_MODEL)
+            TOP_N = 1
+            BASE_URI = "http://example.org/gdpr#"
+            gdpr_embeddings = {}
+            gdpr_map = {}
+            for art in gdpr_data:
+                number, title = art["article_number"], art["article_title"]
+                art_text = prepare_article_text(art)
+                gdpr_embeddings[art["article_number"]] = {
+                    "embedding": model.encode(art_text),
+                    "title": art["article_title"],
+                    "uri": URIRef(f"{BASE_URI}Article{art['article_number']}")
+                }
+                gdpr_map[number] = {"title": title, "text": art_text}
+            g = Graph()
+            EX = Namespace(BASE_URI)
+            g.bind("ex", EX)
+            # Add article nodes
+            for num, info in gdpr_embeddings.items():
+                g.add((info["uri"], RDF.type, EX.Article))
+                g.add((info["uri"], RDFS.label, Literal(f"Article {num}: {info['title']}")))
+            # Extract GDPR article vectors
+            article_nums = list(gdpr_embeddings.keys())
+            article_vectors = np.array([gdpr_embeddings[num]["embedding"] for num in article_nums])
+            # Score tracking
+            total_score = 0
+            counted_sections = 0
+            chunks = chunk_policy_text(policy_text)
+            report = []
+            presence_threshold = 0.35
+            # Process each policy chunk
+            for idx, text in enumerate(chunks, start=1):
+                if not text.strip():
+                    continue
+                # RDF section node
+                sec_uri = URIRef(f"{BASE_URI}PolicySection{idx}")
+                g.add((sec_uri, RDF.type, EX.PolicySection))
+                g.add((sec_uri, RDFS.label, Literal(f"Section {idx}")))
+                # Embed section
+                sec_emb = model.encode(text)
+                # Similarities to all articles
+                sims = []
+                for i, art_num in enumerate(article_nums):
+                    art_emb = article_vectors[i]
+                    sim = cosine_similarity([sec_emb], [art_emb])[0][0]
+                    sims.append({
+                        "article": art_num,
+                        "title": gdpr_embeddings[art_num]["title"],
+                        "similarity": round(sim, 4),
+                        "uri": gdpr_embeddings[art_num]["uri"],
+                        "text": gdpr_map[art_num]["text"]
+                    })
+                # Sort and pick best match
+                sims.sort(key=lambda x: x["similarity"], reverse=True)
+                top_match = sims[0]
+                # Threshold filtering
+                if top_match["similarity"] < presence_threshold:
+                    continue
+                # Compliance score
+                score_pct = min(100, max(0, (top_match["similarity"] - presence_threshold) / (1 - presence_threshold) * 100))
+                # Add RDF triples
+                g.add((sec_uri, EX.relatesTo, top_match["uri"]))
+                g.add((sec_uri, EX.similarityScore, Literal(top_match["similarity"], datatype=XSD.float)))
+                g.serialize(destination="gdpr_policy_graph.ttl", format="turtle")
+                total_score += score_pct
+                counted_sections += 1
+            # Final summary
+            overall = round(total_score / counted_sections, 2) if counted_sections else 0
+            result = {
+                "overall_compliance_percentage": overall,
+                "relevant_sections_analyzed": counted_sections,
+                "total_policy_sections": len(chunks),
+                "ttl": True
+            }
+        else:
+            result = {}
+    if result:
+        st.subheader(f"✅ Overall Compliance Score: {result['overall_compliance_percentage']}%")
+        st.markdown("---")
+        st.subheader("📋 Detailed Article Breakdown")
+        ttl_file_path = "gdpr_policy_graph.ttl"
+        if result.get('article_scores'):
+            for art_num, data in sorted(result['article_scores'].items(), key=lambda x: -x[1]['compliance_percentage']):
+                with st.expander(f"Article {art_num} - {data['article_title']} ({data['compliance_percentage']}%)"):
+                    st.write(f"**Similarity Score**: {data['similarity_score']}")
+                    st.write(f"**Matched Text**:\n\n{data['matched_text_snippet']}")
+        elif result.get("ttl") and os.path.exists(ttl_file_path):
+            st.markdown("---")
+            st.subheader("🧠 Interactive RDF Graph Visualization")
+            g = Graph()
+            g.parse(ttl_file_path, format="ttl")
+            nx_graph = rdflib_to_networkx(g)
+            net = draw_pyvis_graph(nx_graph)
+            # Save the interactive graph temporarily
+            net.save_graph("rdf_graph.html")
+            HtmlFile = open("rdf_graph.html", "r", encoding="utf-8").read()
+            # Display interactive graph inside Streamlit
+            components.html(HtmlFile, height=650, scrolling=True)
+        else:
+            st.info("No article scores or RDF graph to display.")
+else:
+    st.info("Please upload both a GDPR JSON file and a company policy text file to begin.")

src/gdpr_articles_baseline.json ADDED Viewed

The diff for this file is too large to render. See raw diff

src/logistic_regression_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:091220aebae9dc7e864e5dea7c53cee4257342d759fe88bae43e247e4f75c2dd
+size 8558559

src/logistic_regression_vectorizer.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:300da132d26d2172ca64ce66dfb522048fc3b0238e93c850c849744c69c7c46c
+size 255317

src/multinomialNB_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:023df78556de03380ff5a50a0c53c9c2f10bb330f84d4ead95a465aec8c5c84e
+size 17115775

src/multinomialNB_vectorizer.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:300da132d26d2172ca64ce66dfb522048fc3b0238e93c850c849744c69c7c46c
+size 255317

src/requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+pandas
+scikit-learn
+nltk
+joblib
+streamlit
+torch
+transformers
+numpy
+matplotlib
+seaborn
+sentence_transformers
+rdflib
+openai
+python-dotenv
+rdflib
+networkx
+pyvis

src/sentence_transformer_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:517b22c8e93043f88e3fcc73d567e88d3ac9da66babd9d061ed0ca30ed58c6fc
+size 91394608