Spaces:

harshithasudhakar
/

text-simplification

Sleeping

App Files Files Community

harshithasudhakar commited on Apr 4, 2025

Commit

941e747

verified ·

1 Parent(s): 877c158

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -38

app.py CHANGED Viewed

@@ -3,61 +3,59 @@ from transformers import pipeline
 import streamlit as st
 import fitz  # PyMuPDF for PDF text extraction
-st.set_page_config(page_title="Text Simplifier", layout="centered")
-st.title("📚 Jargon Simplifier")
-st.write("This tool simplifies complex or academic text into easier, plain language.")
-# ---------------------------- Available Models ----------------------------
 MODEL_OPTIONS = {
-    "PEGASUS (Simplification - pszemraj)": "pszemraj/pegasus-xsum-simplify",
-    "T5 Small (Prompted Simplify)": "t5-small",
-    "T5 Base (Prompted Simplify)": "t5-base"
 }
-# ---------------------------- Model Selection ----------------------------
-selected_model = st.selectbox("Choose a model:", list(MODEL_OPTIONS.keys()))
-model_name = MODEL_OPTIONS[selected_model]
-@st.cache_resource(show_spinner=True)
-def load_model(name):
-    return pipeline("text2text-generation", model=name)
-simplifier = load_model(model_name)
-# ---------------------------- Simplification Function ----------------------------
-def simplify_text(text, model_name):
-    if "t5" in model_name:
-        text = "simplify: " + text  # T5 needs task prefix
-    output = simplifier(text, max_length=256, min_length=30, do_sample=False)
-    return output[0]['generated_text']
-# ---------------------------- PDF Extraction ----------------------------
 def extract_text_from_pdf(uploaded_file):
     with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
-        text = "\n".join(page.get_text("text") for page in doc)
     return text
-# ---------------------------- UI ----------------------------
 option = st.radio("Choose input type:", ("Text Input", "Upload PDF"))
 if option == "Text Input":
-    user_text = st.text_area("✍️ Enter complex text here:")
     if st.button("Simplify") and user_text.strip():
-        simplified_text = simplify_text(user_text.strip(), model_name)
-        st.text_area("✅ Simplified Output:", value=simplified_text, height=200)
 elif option == "Upload PDF":
-    uploaded_file = st.file_uploader("📄 Upload a PDF file", type=["pdf"])
     if uploaded_file:
-        try:
             extracted_text = extract_text_from_pdf(uploaded_file)
-            preview = st.text_area("📄 Extracted Text Preview (first 1000 chars):", value=extracted_text[:1000], height=200)
-            if st.button("Simplify Extracted Text"):
-                simplified_text = simplify_text(extracted_text[:1000], model_name)
-                st.text_area("✅ Simplified Output:", value=simplified_text, height=200)
-        except Exception as e:
-            st.error(f"❌ Error reading PDF: {e}")
 st.markdown("---")
-st.caption("Made with ❤️ using HuggingFace Transformers and Streamlit.")

 import streamlit as st
 import fitz  # PyMuPDF for PDF text extraction
+# ------------------------------
+# Supported models
 MODEL_OPTIONS = {
+    "Long T5 (Scientific Simplifier)": "pszemraj/long-t5-tglobal-base-sci-simplify",
+    "T5 Base (General Simplifier)": "t5-base"
 }
+@st.cache_resource
+def load_model(model_name):
+    return pipeline("summarization", model=model_name)
+def simplify_text(text, simplifier, model_name):
+    try:
+        # T5 expects a "summarize: " prefix
+        if "t5" in model_name.lower():
+            text = "summarize: " + text
+        simplified = simplifier(text, max_length=256, min_length=30, do_sample=False)
+        return simplified[0]['summary_text']
+    except Exception as e:
+        return f"Error simplifying text: {e}"
 def extract_text_from_pdf(uploaded_file):
     with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
+        text = "\n".join(page.get_text() for page in doc)
     return text
+# ------------------------------
+# Streamlit UI
+st.set_page_config(page_title="Jargon Simplifier", layout="centered")
+st.title("🧠 Jargon to Simple: Academic Text Simplifier")
+selected_model_name = st.selectbox("Choose a simplification model:", list(MODEL_OPTIONS.keys()))
+model_id = MODEL_OPTIONS[selected_model_name]
+simplifier = load_model(model_id)
 option = st.radio("Choose input type:", ("Text Input", "Upload PDF"))
 if option == "Text Input":
+    user_text = st.text_area("Enter complex academic text:")
     if st.button("Simplify") and user_text.strip():
+        with st.spinner("Simplifying..."):
+            simplified_output = simplify_text(user_text, simplifier, model_id)
+        st.text_area("Simplified Output:", value=simplified_output, height=200)
 elif option == "Upload PDF":
+    uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
     if uploaded_file:
+        with st.spinner("Extracting and simplifying text..."):
             extracted_text = extract_text_from_pdf(uploaded_file)
+            truncated_text = extracted_text[:2000]  # Trim for model input
+            simplified_output = simplify_text(truncated_text, simplifier, model_id)
+        st.text_area("Simplified Output:", value=simplified_output, height=200)
 st.markdown("---")
+st.markdown("Made with ❤️ by Harshitha")