File size: 3,762 Bytes
6d7c62b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339fe7c
 
9b72ecc
6d7c62b
 
 
 
 
903d923
 
6d7c62b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
903d923
6d7c62b
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import asyncio
import os

try:
    asyncio.get_running_loop()
except RuntimeError:
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)

import streamlit as st
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaLLM
from langchain.docstore.document import Document
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from huggingface_hub import InferenceClient

os.environ["STREAMLIT_WATCHER_TYPE"] = "none"

@st.cache_resource
def load_llm():
    client = InferenceClient(model="microsoft/phi-3-mini-4k-instruct")
    return client

# 🧠 Cache embedder
@st.cache_resource
def load_embedder():
    return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

llm = load_llm()
embedder = load_embedder()

# Sidebar Upload
st.sidebar.title("πŸ“„ Upload Terms & Conditions")
input_mode = st.sidebar.radio("Choose Input Method", ["πŸ“‹ Paste Text", "πŸ“ Upload File"])

uploaded_text = ""
if input_mode == "πŸ“‹ Paste Text":
    uploaded_text = st.sidebar.text_area("Paste your T&C text here")
elif input_mode == "πŸ“ Upload File":
    uploaded_file = st.sidebar.file_uploader("Upload a .txt, .pdf, or .docx file", type=["txt", "pdf", "docx"])
    if uploaded_file:
        if uploaded_file.type == "text/plain":
            uploaded_text = uploaded_file.read().decode("utf-8")
        elif uploaded_file.type == "application/pdf":
            with open("temp.pdf", "wb") as f:
                f.write(uploaded_file.read())
            docs = PyMuPDFLoader("temp.pdf").load()
            uploaded_text = "\n".join([d.page_content for d in docs])
        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            with open("temp.docx", "wb") as f:
                f.write(uploaded_file.read())
            docs = Docx2txtLoader("temp.docx").load()
            uploaded_text = "\n".join([d.page_content for d in docs])

# βœ… Vectorstore setup
if uploaded_text:
    st.success("βœ… Document loaded and processed!")

    if "db" not in st.session_state:
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        documents = text_splitter.create_documents([uploaded_text])
        db = FAISS.from_documents(documents, embedder)
        st.session_state.db = db
        st.session_state.chat_history = []

# πŸ’¬ Chat section
if "db" in st.session_state:
    st.title("🧾 Legal Assistant Chat")
    st.markdown("Ask anything about the uploaded document.")

    user_input = st.chat_input("Type your question here...")

    if user_input:
        with st.spinner("πŸ€– Thinking..."):
            retriever = st.session_state.db.as_retriever(search_kwargs={"k": 3})
            docs = retriever.invoke(user_input)
            context = "\n\n".join([doc.page_content for doc in docs])

            prompt = f"""You are a helpful legal assistant.
Based on the following contract, answer the user's question, This application built by Vighnesh.

Context:
{context}

Question:
{user_input}

Answer:"""

            answer = llm.text_generation(prompt, max_new_tokens=200)

            # Save chat history
            st.session_state.chat_history.append(("user", user_input))
            st.session_state.chat_history.append(("assistant", answer))

    # Display chat history
    for role, message in st.session_state.chat_history:
        if role == "user":
            st.chat_message("user").write(message)
        else:
            st.chat_message("assistant").write(message)