Spaces:
Sleeping
Sleeping
| import asyncio | |
| import os | |
| try: | |
| asyncio.get_running_loop() | |
| except RuntimeError: | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| import streamlit as st | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_ollama import OllamaLLM | |
| from langchain.docstore.document import Document | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| from huggingface_hub import InferenceClient | |
| os.environ["STREAMLIT_WATCHER_TYPE"] = "none" | |
| def load_llm(): | |
| client = InferenceClient(model="microsoft/phi-3-mini-4k-instruct") | |
| return client | |
| # π§ Cache embedder | |
| def load_embedder(): | |
| return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| llm = load_llm() | |
| embedder = load_embedder() | |
| # Sidebar Upload | |
| st.sidebar.title("π Upload Terms & Conditions") | |
| input_mode = st.sidebar.radio("Choose Input Method", ["π Paste Text", "π Upload File"]) | |
| uploaded_text = "" | |
| if input_mode == "π Paste Text": | |
| uploaded_text = st.sidebar.text_area("Paste your T&C text here") | |
| elif input_mode == "π Upload File": | |
| uploaded_file = st.sidebar.file_uploader("Upload a .txt, .pdf, or .docx file", type=["txt", "pdf", "docx"]) | |
| if uploaded_file: | |
| if uploaded_file.type == "text/plain": | |
| uploaded_text = uploaded_file.read().decode("utf-8") | |
| elif uploaded_file.type == "application/pdf": | |
| with open("temp.pdf", "wb") as f: | |
| f.write(uploaded_file.read()) | |
| docs = PyMuPDFLoader("temp.pdf").load() | |
| uploaded_text = "\n".join([d.page_content for d in docs]) | |
| elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
| with open("temp.docx", "wb") as f: | |
| f.write(uploaded_file.read()) | |
| docs = Docx2txtLoader("temp.docx").load() | |
| uploaded_text = "\n".join([d.page_content for d in docs]) | |
| # β Vectorstore setup | |
| if uploaded_text: | |
| st.success("β Document loaded and processed!") | |
| if "db" not in st.session_state: | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| documents = text_splitter.create_documents([uploaded_text]) | |
| db = FAISS.from_documents(documents, embedder) | |
| st.session_state.db = db | |
| st.session_state.chat_history = [] | |
| # π¬ Chat section | |
| if "db" in st.session_state: | |
| st.title("π§Ύ Legal Assistant Chat") | |
| st.markdown("Ask anything about the uploaded document.") | |
| user_input = st.chat_input("Type your question here...") | |
| if user_input: | |
| with st.spinner("π€ Thinking..."): | |
| retriever = st.session_state.db.as_retriever(search_kwargs={"k": 3}) | |
| docs = retriever.invoke(user_input) | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| prompt = f"""You are a helpful legal assistant. | |
| Based on the following contract, answer the user's question, This application built by Vighnesh. | |
| Context: | |
| {context} | |
| Question: | |
| {user_input} | |
| Answer:""" | |
| answer = llm.text_generation(prompt, max_new_tokens=200) | |
| # Save chat history | |
| st.session_state.chat_history.append(("user", user_input)) | |
| st.session_state.chat_history.append(("assistant", answer)) | |
| # Display chat history | |
| for role, message in st.session_state.chat_history: | |
| if role == "user": | |
| st.chat_message("user").write(message) | |
| else: | |
| st.chat_message("assistant").write(message) | |