Docu_Analyzer / app.py
VGreatVig07's picture
Update app.py
9b72ecc verified
import asyncio
import os
try:
asyncio.get_running_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
import streamlit as st
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyMuPDFLoader, Docx2txtLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaLLM
from langchain.docstore.document import Document
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from huggingface_hub import InferenceClient
os.environ["STREAMLIT_WATCHER_TYPE"] = "none"
@st.cache_resource
def load_llm():
client = InferenceClient(model="microsoft/phi-3-mini-4k-instruct")
return client
# 🧠 Cache embedder
@st.cache_resource
def load_embedder():
return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = load_llm()
embedder = load_embedder()
# Sidebar Upload
st.sidebar.title("πŸ“„ Upload Terms & Conditions")
input_mode = st.sidebar.radio("Choose Input Method", ["πŸ“‹ Paste Text", "πŸ“ Upload File"])
uploaded_text = ""
if input_mode == "πŸ“‹ Paste Text":
uploaded_text = st.sidebar.text_area("Paste your T&C text here")
elif input_mode == "πŸ“ Upload File":
uploaded_file = st.sidebar.file_uploader("Upload a .txt, .pdf, or .docx file", type=["txt", "pdf", "docx"])
if uploaded_file:
if uploaded_file.type == "text/plain":
uploaded_text = uploaded_file.read().decode("utf-8")
elif uploaded_file.type == "application/pdf":
with open("temp.pdf", "wb") as f:
f.write(uploaded_file.read())
docs = PyMuPDFLoader("temp.pdf").load()
uploaded_text = "\n".join([d.page_content for d in docs])
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
with open("temp.docx", "wb") as f:
f.write(uploaded_file.read())
docs = Docx2txtLoader("temp.docx").load()
uploaded_text = "\n".join([d.page_content for d in docs])
# βœ… Vectorstore setup
if uploaded_text:
st.success("βœ… Document loaded and processed!")
if "db" not in st.session_state:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = text_splitter.create_documents([uploaded_text])
db = FAISS.from_documents(documents, embedder)
st.session_state.db = db
st.session_state.chat_history = []
# πŸ’¬ Chat section
if "db" in st.session_state:
st.title("🧾 Legal Assistant Chat")
st.markdown("Ask anything about the uploaded document.")
user_input = st.chat_input("Type your question here...")
if user_input:
with st.spinner("πŸ€– Thinking..."):
retriever = st.session_state.db.as_retriever(search_kwargs={"k": 3})
docs = retriever.invoke(user_input)
context = "\n\n".join([doc.page_content for doc in docs])
prompt = f"""You are a helpful legal assistant.
Based on the following contract, answer the user's question, This application built by Vighnesh.
Context:
{context}
Question:
{user_input}
Answer:"""
answer = llm.text_generation(prompt, max_new_tokens=200)
# Save chat history
st.session_state.chat_history.append(("user", user_input))
st.session_state.chat_history.append(("assistant", answer))
# Display chat history
for role, message in st.session_state.chat_history:
if role == "user":
st.chat_message("user").write(message)
else:
st.chat_message("assistant").write(message)