import asyncio import os try: asyncio.get_running_loop() except RuntimeError: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) import streamlit as st from langchain_community.vectorstores import FAISS from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader from langchain_huggingface import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_ollama import OllamaLLM from langchain.docstore.document import Document from transformers import AutoModelForCausalLM, AutoTokenizer import torch from huggingface_hub import InferenceClient os.environ["STREAMLIT_WATCHER_TYPE"] = "none" @st.cache_resource def load_llm(): client = InferenceClient(model="microsoft/phi-3-mini-4k-instruct") return client # ๐Ÿง  Cache embedder @st.cache_resource def load_embedder(): return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") llm = load_llm() embedder = load_embedder() # Sidebar Upload st.sidebar.title("๐Ÿ“„ Upload Terms & Conditions") input_mode = st.sidebar.radio("Choose Input Method", ["๐Ÿ“‹ Paste Text", "๐Ÿ“ Upload File"]) uploaded_text = "" if input_mode == "๐Ÿ“‹ Paste Text": uploaded_text = st.sidebar.text_area("Paste your T&C text here") elif input_mode == "๐Ÿ“ Upload File": uploaded_file = st.sidebar.file_uploader("Upload a .txt, .pdf, or .docx file", type=["txt", "pdf", "docx"]) if uploaded_file: if uploaded_file.type == "text/plain": uploaded_text = uploaded_file.read().decode("utf-8") elif uploaded_file.type == "application/pdf": with open("temp.pdf", "wb") as f: f.write(uploaded_file.read()) docs = PyMuPDFLoader("temp.pdf").load() uploaded_text = "\n".join([d.page_content for d in docs]) elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": with open("temp.docx", "wb") as f: f.write(uploaded_file.read()) docs = Docx2txtLoader("temp.docx").load() uploaded_text = "\n".join([d.page_content for d in docs]) # โœ… Vectorstore setup if uploaded_text: st.success("โœ… Document loaded and processed!") if "db" not in st.session_state: text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) documents = text_splitter.create_documents([uploaded_text]) db = FAISS.from_documents(documents, embedder) st.session_state.db = db st.session_state.chat_history = [] # ๐Ÿ’ฌ Chat section if "db" in st.session_state: st.title("๐Ÿงพ Legal Assistant Chat") st.markdown("Ask anything about the uploaded document.") user_input = st.chat_input("Type your question here...") if user_input: with st.spinner("๐Ÿค– Thinking..."): retriever = st.session_state.db.as_retriever(search_kwargs={"k": 3}) docs = retriever.invoke(user_input) context = "\n\n".join([doc.page_content for doc in docs]) prompt = f"""You are a helpful legal assistant. Based on the following contract, answer the user's question, This application built by Vighnesh. Context: {context} Question: {user_input} Answer:""" answer = llm.text_generation(prompt, max_new_tokens=200) # Save chat history st.session_state.chat_history.append(("user", user_input)) st.session_state.chat_history.append(("assistant", answer)) # Display chat history for role, message in st.session_state.chat_history: if role == "user": st.chat_message("user").write(message) else: st.chat_message("assistant").write(message)