Spaces:

mo-mazen
/

iiii

Sleeping

App Files Files Community

mo-mazen commited on Nov 18, 2025

Commit

9c2b411

verified ·

1 Parent(s): 70703b9

Upload insightX.py

Browse files

Files changed (1) hide show

insightX.py +95 -0

insightX.py ADDED Viewed

	@@ -0,0 +1,95 @@

+# Run this app with:
+# python -m streamlit run "d:/Code/project 1/insightX.py"
+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+import pandas as pd
+import docx2txt
+import PyPDF2
+# Load model and tokenizer
+@st.cache_resource(show_spinner=False)
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained("google/long-t5-tglobal-base")
+    model = AutoModelForSeq2SeqLM.from_pretrained("google/long-t5-tglobal-base")
+    return tokenizer, model
+tokenizer, model = load_model()
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+st.title("🧠 InsightX Chat")
+st.write("Chat with Long-T5 to summarize, rewrite, or explore long-form text. You can also upload a file.")
+# Summary length slider
+max_output_length = st.slider("Summary length (tokens)", min_value=128, max_value=1024, value=512)
+# Chunking function
+def chunk_text(text, chunk_size=16384):
+    tokens = tokenizer.encode(text)
+    return [tokens[i:i+chunk_size] for i in range(0, len(tokens), chunk_size)]
+# Summarization function
+def summarize_long_text(text):
+    chunks = chunk_text(text)
+    summaries = []
+    for chunk in chunks:
+        input_ids = torch.tensor([chunk])
+        with torch.no_grad():
+            output_ids = model.generate(input_ids, max_length=max_output_length)
+        summary = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+        summaries.append(summary)
+    return "\n\n".join(summaries)
+# File uploader
+uploaded_file = st.file_uploader("Upload a file (PDF, Word, Excel, CSV)", type=["pdf", "docx", "xlsx", "csv"])
+file_text = ""
+if uploaded_file:
+    file_type = uploaded_file.name.split(".")[-1].lower()
+    try:
+        if file_type == "pdf":
+            reader = PyPDF2.PdfReader(uploaded_file)
+            file_text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
+        elif file_type == "docx":
+            file_text = docx2txt.process(uploaded_file)
+        elif file_type == "xlsx":
+            df = pd.read_excel(uploaded_file)
+            file_text = df.to_string(index=False)
+        elif file_type == "csv":
+            df = pd.read_csv(uploaded_file)
+            file_text = df.to_string(index=False)
+    except Exception as e:
+        st.error(f"Error reading file: {e}")
+    if file_text:
+        st.session_state.messages.append({"role": "user", "content": f"(Uploaded file)\n{file_text}"})
+        with st.chat_message("user"):
+            with st.expander("View Uploaded Text"):
+                st.text_area("File Content", file_text, height=300)
+        output_text = summarize_long_text(file_text)
+        st.session_state.messages.append({"role": "assistant", "content": output_text})
+        with st.chat_message("assistant"):
+            st.markdown(output_text)
+# Chat input
+user_input = st.chat_input("Type your message or paste long text here...")
+if user_input:
+    st.session_state.messages.append({"role": "user", "content": user_input})
+    with st.chat_message("user"):
+        st.markdown(user_input)
+    # Custom response for "hello"
+    if user_input.strip().lower() == "hello":
+        output_text = "How can I help you?"
+    else:
+        output_text = summarize_long_text(user_input)
+    st.session_state.messages.append({"role": "assistant", "content": output_text})
+    with st.chat_message("assistant"):
+        st.markdown(output_text)