Spaces:

AI-Solutions-KK
/

HP_NLP_PARAPHRASER_APP

Sleeping

App Files Files Community

AI-Solutions-KK commited on Aug 13, 2025

Commit

6416ff6

unverified ·

1 Parent(s): f89b8b7

Testing app

Browse files

Old_paraphraser_app.py_single_long_code_streamlit_test

Files changed (2) hide show

app.py +841 -0
requirements.txt +14 -0

app.py ADDED Viewed

	@@ -0,0 +1,841 @@

+# app.py
+"""
+Merged Rephraser app
+- GUI from original (first) file
+- Models/logic from later big file (kept unchanged)
+- Grammar highlight (red for issues; green underline for corrected words)
+- File upload/download for .docx/.pdf/.txt with best-effort format preservation
+- Tools independent (no automatic chaining)
+- Prev/Next browsing for multi-version outputs
+"""
+import streamlit as st
+import io, os, random, re, difflib, html, tempfile
+from pathlib import Path
+# home "🏠 Home"
+# --- Home button at the top ---
+if st.button("🏠 Home"):
+    st.rerun()
+# Optional heavy libs (lazy imports used where needed)
+try:
+    import docx
+except Exception:
+    docx = None
+try:
+    import fitz  # PyMuPDF
+except Exception:
+    fitz = None
+try:
+    import language_tool_python
+except Exception:
+    language_tool_python = None
+try:
+    from textblob import TextBlob
+except Exception:
+    TextBlob = None
+# NLTK / WordNet
+try:
+    import nltk
+    from nltk.corpus import wordnet as wn
+    nltk_available = True
+except Exception:
+    nltk_available = False
+# spaCy
+try:
+    import spacy
+    nlp = spacy.load("en_core_web_sm")
+    SPACY_AVAILABLE = True
+except Exception:
+    nlp = None
+    SPACY_AVAILABLE = False
+# transformers check
+try:
+    import transformers
+    TRANSFORMERS_AVAILABLE = True
+except Exception:
+    TRANSFORMERS_AVAILABLE = False
+# SpellChecker
+try:
+    from spellchecker import SpellChecker
+    SPELLCHECKER_AVAILABLE = True
+    spell = SpellChecker()
+except Exception:
+    SPELLCHECKER_AVAILABLE = False
+# pyperclip optional
+try:
+    import pyperclip
+    PYPERCLIP = True
+except Exception:
+    PYPERCLIP = False
+# -----------------------
+# Session state init (preserve old behavior)
+# -----------------------
+if "versions" not in st.session_state:
+    st.session_state.versions = []
+if "version_index" not in st.session_state:
+    st.session_state.version_index = 0
+if "last_input" not in st.session_state:
+    st.session_state.last_input = ""
+if "current_text" not in st.session_state:
+    st.session_state.current_text = ""
+if "history" not in st.session_state:
+    st.session_state.history = []
+# bookkeeping for file uploads & grammar
+if "_uploaded_bytes" not in st.session_state:
+    st.session_state._uploaded_bytes = None
+if "_uploaded_name" not in st.session_state:
+    st.session_state._uploaded_name = None
+if "_last_grammar_issues" not in st.session_state:
+    st.session_state._last_grammar_issues = None
+if "_last_output_file" not in st.session_state:
+    st.session_state._last_output_file = None
+if "_last_output_name" not in st.session_state:
+    st.session_state._last_output_name = None
+if "_last_tool" not in st.session_state:
+    st.session_state._last_tool = None
+# -----------------------
+# Helpers: highlights & diffs
+# -----------------------
+def mark_grammar_issues(text, issues):
+    """Wrap problem spans in red (inline). issues is list of dicts with offset & length & message & replacements"""
+    if not issues:
+        return html.escape(text)
+    spans = []
+    for it in issues:
+        off = it.get("offset", 0)
+        length = it.get("length", 0)
+        msg = it.get("message", "")
+        spans.append((off, off + length, msg))
+    spans.sort()
+    out = ""
+    idx = 0
+    for s, e, msg in spans:
+        if s > idx:
+            out += html.escape(text[idx:s])
+        problem = html.escape(text[s:e])
+        out += f'<span title="{html.escape(msg)}" style="border-bottom:2px solid #c0392b;">{problem}</span>'
+        idx = e
+    if idx < len(text):
+        out += html.escape(text[idx:])
+    return out
+def underline_changes_in_output(orig, corrected):
+    """
+    Token-level diff: underline changed/inserted fragments in green in corrected text.
+    """
+    a = orig.split()
+    b = corrected.split()
+    sm = difflib.SequenceMatcher(a=a, b=b)
+    parts = []
+    for tag, i1, i2, j1, j2 in sm.get_opcodes():
+        if tag == "equal":
+            parts.append(" ".join(b[j1:j2]))
+        elif tag in ("replace", "insert"):
+            changed = " ".join(b[j1:j2])
+            parts.append(f'<span style="text-decoration: underline; text-decoration-color: #27ae60;">{html.escape(changed)}</span>')
+        elif tag == "delete":
+            pass
+    return " ".join(parts) if parts else html.escape(corrected)
+## Green line
+import html
+import difflib
+def text_to_html_with_highlights(orig, new):
+    """
+    Compare original and new text word-by-word.
+    Underline only the changed/added words in green.
+    """
+    orig_words = orig.split()
+    new_words = new.split()
+    diff = list(difflib.ndiff(orig_words, new_words))
+    highlighted = []
+    for word in diff:
+        if word.startswith("+ "):  # Added or changed word
+            highlighted.append(
+                f"<span style='color:black;text-decoration:underline;text-decoration-color:green'>{html.escape(word[2:])}</span>"
+            )
+        elif word.startswith("  "):  # Unchanged word
+            highlighted.append(html.escape(word[2:]))
+        # Words starting with "- " (removed) are skipped
+    return " ".join(highlighted)
+# -----------------------
+# Paraphraser functions (kept from your big code)
+# -----------------------
+def paraphrase_variants_fast(text, n_variants=3):
+    text = text.strip()
+    if not text:
+        return []
+    sents = re.split(r'(?<=[.!?])\s+', text)
+    variants = []
+    for v in range(n_variants):
+        outs = []
+        for s in sents:
+            sent = s.strip()
+            if not sent:
+                continue
+            if SPACY_AVAILABLE:
+                doc = nlp(sent)
+                # small structural transforms
+                if random.random() < 0.3 and len(list(doc.noun_chunks)) >= 2:
+                    chunks = list(doc.noun_chunks)
+                    text_chunks = [c.text for c in chunks]
+                    s2 = sent
+                    try:
+                        s2 = s2.replace(text_chunks[0], "<<<A>>>").replace(text_chunks[1], text_chunks[0]).replace("<<<A>>>", text_chunks[1])
+                    except Exception:
+                        s2 = sent
+                    outs.append(s2)
+                    continue
+                if ',' in sent and random.random() < 0.4:
+                    parts = [p.strip() for p in sent.split(',')]
+                    random.shuffle(parts)
+                    outs.append(", ".join(parts))
+                    continue
+                outs.append(_synonym_replace(sent, prob=0.15 + 0.05 * v))
+            else:
+                if random.random() < 0.2:
+                    words = sent.split()
+                    if len(words) > 3:
+                        i = random.randint(0, len(words) - 3)
+                        words[i], words[i+1] = words[i+1], words[i]
+                    outs.append(" ".join(words))
+                else:
+                    outs.append(_synonym_replace(sent, prob=0.12 + 0.04 * v))
+        final = " ".join(outs)
+        if random.random() < 0.3 and len(sents) > 1:
+            random.shuffle(sents)
+            final = " ".join(outs)
+        variants.append(final)
+    uniq = []
+    for x in variants:
+        if x not in uniq and x.strip():
+            uniq.append(x)
+    return uniq[:n_variants]
+def _synonym_replace(sentence, prob=0.12, max_replacements=2):
+    if not nltk_available:
+        words = sentence.split()
+        for i in range(len(words)):
+            if random.random() < prob:
+                j = random.randrange(len(words))
+                words[i], words[j] = words[j], words[i]
+        return " ".join(words)
+    tokens = re.findall(r"\w+|\W+", sentence)
+    words = [t for t in tokens]
+    replaced = 0
+    for i, tok in enumerate(words):
+        if not re.match(r'\w+', tok):
+            continue
+        lower = tok.lower()
+        if random.random() > prob:
+            continue
+        syns = wn.synsets(lower)
+        if not syns:
+            continue
+        cand = None
+        for s in syns:
+            for l in s.lemmas():
+                name = l.name().replace('_', ' ')
+                if name.lower() != lower and ' ' not in name:
+                    cand = name
+                    break
+            if cand:
+                break
+        if cand:
+            if tok[0].isupper():
+                cand = cand.capitalize()
+            words[i] = cand
+            replaced += 1
+        if replaced >= max_replacements:
+            break
+    return "".join(words)
+def simple_mix_versions(versions_list):
+    if not versions_list:
+        return ""
+    pieces = []
+    for v in versions_list:
+        s = v.strip()
+        if not s:
+            continue
+        sents = re.split(r'(?<=[.!?])\s+', s)
+        take_n = max(1, min(3, len(sents)))
+        picks = random.sample(sents, take_n) if len(sents) > take_n else sents
+        pieces.extend(picks)
+    random.shuffle(pieces)
+    return " ".join(pieces)
+# -----------------------
+# Plagiarism remover (kept)
+# -----------------------
+@st.cache_resource(show_spinner=False)
+def load_small_model(model_name="t5-small"):
+    if not TRANSFORMERS_AVAILABLE:
+        raise ImportError("transformers not installed")
+    from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+    tok = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+    pipe = pipeline("text2text-generation", model=model, tokenizer=tok, device=-1)
+    return tok, model, pipe
+def hf_paraphrase_with_pipe(pipe, text, max_len=256):
+    try:
+        out = pipe(text, max_length=max_len, do_sample=True, top_p=0.95, temperature=0.8, num_return_sequences=1)
+        if isinstance(out, list) and out:
+            return out[0].get("generated_text") or out[0].get("summary_text") or str(out[0])
+        return str(out)
+    except Exception:
+        return text
+def plagiarism_remover_pipeline(text, aggressive=1, light_only=False):
+    versions = []
+    v_light = paraphrase_variants_fast(text, n_variants=1)[0] if paraphrase_variants_fast(text, n_variants=1) else text
+    versions.append(v_light)
+    if TRANSFORMERS_AVAILABLE and not light_only:
+        try:
+            _, _, t5_pipe = load_small_model("t5-small")
+            v_t5 = hf_paraphrase_with_pipe(t5_pipe, "paraphrase: " + text)
+            versions.append(v_t5)
+        except Exception:
+            pass
+        try:
+            _, _, p_pipe = load_small_model("google/pegasus-xsum")
+            v_peg = hf_paraphrase_with_pipe(p_pipe, text)
+            versions.append(v_peg)
+        except Exception:
+            pass
+    v_combo = simple_mix_versions(versions)
+    versions.append(v_combo)
+    uniq = []
+    for v in versions:
+        if v and v.strip() and v not in uniq:
+            uniq.append(v)
+        if len(uniq) >= 5:
+            break
+    return uniq
+# -----------------------
+# Grammar & Spelling (kept)
+# -----------------------
+def grammar_and_spelling_check(text):
+    if language_tool_python is not None:
+        try:
+            tool = language_tool_python.LanguageTool('en-US')
+            matches = tool.check(text)
+            corrected = language_tool_python.utils.correct(text, matches)
+            issues = []
+            for m in matches:
+                issues.append({
+                    "message": m.message,
+                    "replacements": m.replacements,
+                    "offset": m.offset,
+                    "length": m.errorLength,
+                    "context": text[max(0, m.offset - 30): m.offset + 30]
+                })
+            return corrected, issues
+        except Exception:
+            pass
+    if TextBlob is not None:
+        try:
+            tb = TextBlob(text)
+            corr = str(tb.correct())
+            return corr, []
+        except Exception:
+            pass
+    return text, []
+def spelling_suggestions(word, top_n=5):
+    if SPELLCHECKER_AVAILABLE:
+        suggestions = spell.candidates(word)
+        return list(suggestions)[:top_n]
+    return []
+# -----------------------
+# File extract & write helpers (kept & added best-effort replace)
+# -----------------------
+def extract_text_from_docx_bytes(b):
+    if docx is None:
+        raise RuntimeError("python-docx not installed")
+    f = io.BytesIO(b)
+    document = docx.Document(f)
+    paras = [p.text for p in document.paragraphs]
+    return "\n\n".join(paras)
+def extract_text_from_pdf_bytes(b):
+    if fitz is None:
+        raise RuntimeError("PyMuPDF not installed")
+    doc = fitz.open(stream=b, filetype="pdf")
+    text = ""
+    for p in doc:
+        text += p.get_text() + "\n\n"
+    return text
+def extract_text_from_txt_bytes(b):
+    try:
+        return b.decode("utf-8")
+    except Exception:
+        try:
+            return b.decode("latin-1")
+        except Exception:
+            return str(b)
+def make_docx_bytes_from_text(text):
+    if docx is None:
+        raise RuntimeError("python-docx not installed")
+    out = io.BytesIO()
+    d = docx.Document()
+    for para in text.split("\n\n"):
+        d.add_paragraph(para)
+    d.save(out)
+    out.seek(0)
+    return out.read()
+def make_pdf_bytes_from_text(text):
+    if fitz is None:
+        raise RuntimeError("PyMuPDF not installed")
+    doc = fitz.open()
+    lines = text.split("\n")
+    page = doc.new_page()
+    y = 72
+    for line in lines:
+        if y > 720:
+            page = doc.new_page()
+            y = 72
+        page.insert_text((72, y), line)
+        y += 14
+    buf = doc.write()
+    doc.close()
+    return buf
+def _build_replacement_spans(orig_text, corrected_text):
+    a = orig_text.split()
+    b = corrected_text.split()
+    sm = difflib.SequenceMatcher(a=a, b=b)
+    spans = []
+    for tag, i1, i2, j1, j2 in sm.get_opcodes():
+        if tag == "equal":
+            continue
+        orig_span = " ".join(a[i1:i2]).strip()
+        corr_span = " ".join(b[j1:j2]).strip()
+        if orig_span:
+            spans.append((orig_span, corr_span))
+    spans.sort(key=lambda x: -len(x[0]))
+    return spans
+def apply_replacements_to_docx_bytes(original_bytes, orig_text, corrected_text):
+    """Replace occurrences of orig spans with corrected spans inside docx runs and table cells (best-effort)."""
+    if docx is None:
+        raise RuntimeError("python-docx not installed")
+    from io import BytesIO
+    document = docx.Document(BytesIO(original_bytes))
+    spans = _build_replacement_spans(orig_text, corrected_text)
+    if not spans:
+        out = BytesIO()
+        document.save(out)
+        out.seek(0)
+        return out.read()
+    def replace_in_paragraph_runs(par):
+        for orig_span, corr_span in spans:
+            for run in par.runs:
+                if orig_span in run.text:
+                    run.text = run.text.replace(orig_span, corr_span)
+    for p in document.paragraphs:
+        replace_in_paragraph_runs(p)
+    for table in document.tables:
+        for row in table.rows:
+            for cell in row.cells:
+                for p in cell.paragraphs:
+                    replace_in_paragraph_runs(p)
+    out = io.BytesIO()
+    document.save(out)
+    out.seek(0)
+    return out.read()
+def apply_replacements_to_pdf_bytes(original_bytes, orig_text, corrected_text):
+    """Best-effort PDF replacement: redact original token bbox and write corrected text in place using PyMuPDF."""
+    if fitz is None:
+        raise RuntimeError("PyMuPDF not installed")
+    orig_tokens = orig_text.split()
+    corr_tokens = corrected_text.split()
+    sm = difflib.SequenceMatcher(a=orig_tokens, b=corr_tokens)
+    ops = []
+    for tag, i1, i2, j1, j2 in sm.get_opcodes():
+        if tag == "equal":
+            continue
+        ops.append((tag, i1, i2, j1, j2))
+    if not ops:
+        return original_bytes
+    pdf = fitz.open(stream=original_bytes, filetype="pdf")
+    global_words = []
+    for pno in range(len(pdf)):
+        page = pdf[pno]
+        words = page.get_text("words")  # x0,y0,x1,y1, word, block_no, line_no, word_no
+        words_sorted = sorted(words, key=lambda w: (round(w[3],1), round(w[0],1)))
+        for w in words_sorted:
+            global_words.append((pno, w))
+    N = len(global_words)
+    M = len(orig_tokens)
+    map_len = min(N, M)
+    token_to_global = {}
+    for i in range(map_len):
+        token_to_global[i] = global_words[i]
+    redactions_per_page = {}
+    inserts_per_page = {}
+    for op in ops:
+        tag, i1, i2, j1, j2 = op
+        corr_span = " ".join(corr_tokens[j1:j2])
+        for ti in range(i1, i2):
+            if ti in token_to_global:
+                pno, wtuple = token_to_global[ti]
+                x0, y0, x1, y1 = wtuple[0], wtuple[1], wtuple[2], wtuple[3]
+                bbox = fitz.Rect(x0, y0, x1, y1)
+                redactions_per_page.setdefault(pno, []).append(bbox)
+                inserts_per_page.setdefault(pno, []).append((bbox, corr_span))
+                break
+    for pno, rects in redactions_per_page.items():
+        page = pdf[pno]
+        for r in rects:
+            page.add_redact_annot(r, fill=(1,1,1))
+        page.apply_redactions()
+        for bbox, corr_span in inserts_per_page.get(pno, []):
+            fontsize = max(6, round(bbox.height * 0.8))
+            try:
+                page.insert_textbox(bbox, corr_span, fontsize=fontsize, fontname="helv", align=0)
+            except Exception:
+                page.insert_text((bbox.x0, bbox.y0), corr_span, fontsize=fontsize, fontname="helv")
+    out = pdf.write()
+    pdf.close()
+    return out
+# -----------------------
+# UI (first file's GUI style) with Prev/Next variants and independent tools
+# -----------------------
+st.set_page_config(page_title="Rephraser", layout="wide")
+st.title("Rephraser — Paraphrase · Plagiarism Remover · Grammar & Spelling")
+st.markdown("Paste text or upload DOCX/PDF/TXT. Tools are independent and chainable (use output as input manually).")
+col_left, col_right = st.columns([2,1])
+with col_left:
+    input_mode = st.radio("Input:", ("Paste text", "Upload file (.docx/.pdf/.txt)"))
+    uploaded_bytes = None
+    uploaded_name = None
+    input_text = ""
+    if input_mode == "Paste text":
+        input_text = st.text_area("Paste your paragraph(s) here:", height=200, value=st.session_state.current_text or "")
+        # clear upload memory
+        st.session_state._uploaded_bytes = None
+        st.session_state._uploaded_name = None
+    else:
+        uploaded = st.file_uploader("Upload .docx, .pdf or .txt", type=["docx","pdf","txt"])
+        if uploaded is not None:
+            uploaded_bytes = uploaded.read()
+            uploaded_name = uploaded.name
+            st.session_state._uploaded_bytes = uploaded_bytes
+            st.session_state._uploaded_name = uploaded_name
+            try:
+                if uploaded.name.lower().endswith(".docx"):
+                    input_text = extract_text_from_docx_bytes(uploaded_bytes)
+                elif uploaded.name.lower().endswith(".pdf"):
+                    input_text = extract_text_from_pdf_bytes(uploaded_bytes)
+                else:
+                    input_text = extract_text_from_txt_bytes(uploaded_bytes)
+                st.success(f"Loaded {uploaded.name} (approx {len(input_text.split())} words)")
+            except Exception as e:
+                st.error(f"Could not extract text from file: {e}")
+    st.markdown("**Tools (choose one)**")
+    st.markdown("- **Para-phraser (fast):** Focused on rephrase sentence, regardless of Plagiarism ")
+    st.markdown("- **Plagiarism Remover (deep):** Focused on Plagiarism, Convert text to human like  ")
+    st.markdown("- **Grammar & Spelling:** Spelling And Grammar Check")
+with col_right:
+    st.header("Actions")
+    variants_to_generate = st.slider("Max variants (deep)", 1, 5, 3)
+    use_light_only = st.checkbox("Force light-only (no HF models)", value=True)
+    if st.button("1) Para-phraser (fast)"):
+        st.session_state._last_tool = "paraphrase"
+        source = input_text.strip() or st.session_state.current_text.strip()
+        if not source:
+            st.warning("Provide text or upload a file first.")
+        else:
+            st.session_state.history.append(st.session_state.current_text or source)
+            variants = paraphrase_variants_fast(source, n_variants=variants_to_generate)
+            if not variants:
+                st.error("No paraphrase produced.")
+            else:
+                st.session_state.versions = variants
+                st.session_state.version_index = 0
+                st.session_state.current_text = variants[0]
+                st.session_state.last_input = source
+                st.session_state._last_grammar_issues = None
+                st.session_state._last_output_file = None
+                st.success("Para-phraser done. Use Prev/Next to browse.")
+    if st.button("2) Plagiarism Remover (deep)"):
+        st.session_state._last_tool = "plagiarism"
+        source = input_text.strip() or st.session_state.current_text.strip()
+        if not source:
+            st.warning("Provide text or upload a file first.")
+        else:
+            st.session_state.history.append(st.session_state.current_text or source)
+            st.info("Running plagiarism remover pipeline...")
+            try:
+                variants = plagiarism_remover_pipeline(source, aggressive=1, light_only=use_light_only)
+            except Exception as e:
+                st.error(f"Pipeline failed: {e}")
+                variants = paraphrase_variants_fast(source, n_variants=variants_to_generate)
+            if not variants:
+                st.error("No variants produced.")
+            else:
+                st.session_state.versions = variants
+                st.session_state.version_index = 0
+                st.session_state.current_text = variants[0]
+                st.session_state.last_input = source
+                st.session_state._last_grammar_issues = None
+                st.session_state._last_output_file = None
+                st.success(f"Produced {len(variants)} variants.")
+    if st.button("3) Grammar & Spelling (check)"):
+        st.session_state._last_tool = "grammar"
+        source = st.session_state.current_text.strip() or input_text.strip()
+        if not source:
+            st.warning("Provide text or upload a file first.")
+        else:
+            st.session_state.history.append(st.session_state.current_text or source)
+            try:
+                corrected, issues = grammar_and_spelling_check(source)
+                st.session_state.current_text = corrected
+                st.session_state.versions = [corrected]
+                st.session_state.version_index = 0
+                st.session_state._last_grammar_issues = issues or []
+                st.success(f"Grammar check applied ({len(issues)} issues).")
+                # File-level output if uploaded
+                uploaded_bytes = st.session_state.get("_uploaded_bytes")
+                uploaded_name = st.session_state.get("_uploaded_name")
+                if uploaded_bytes and uploaded_name:
+                    suffix = Path(uploaded_name).suffix.lower()
+                    try:
+                        if suffix == ".docx" and docx is not None:
+                            out_bytes = apply_replacements_to_docx_bytes(uploaded_bytes, source, corrected)
+                            st.session_state._last_output_file = out_bytes
+                            st.session_state._last_output_name = f"corrected_{uploaded_name}"
+                        elif suffix == ".pdf" and fitz is not None:
+                            out_bytes = apply_replacements_to_pdf_bytes(uploaded_bytes, source, corrected)
+                            st.session_state._last_output_file = out_bytes
+                            st.session_state._last_output_name = f"corrected_{uploaded_name}"
+                        elif suffix == ".txt":
+                            st.session_state._last_output_file = corrected.encode("utf-8")
+                            st.session_state._last_output_name = f"corrected_{uploaded_name}"
+                        else:
+                            st.session_state._last_output_file = make_docx_bytes_from_text(corrected)
+                            st.session_state._last_output_name = "corrected_output.docx"
+                    except Exception as e:
+                        st.warning(f"Could not create corrected file preserving format: {e}")
+                        st.session_state._last_output_file = None
+                        st.session_state._last_output_name = None
+                if issues:
+                    st.subheader("Detected issues (sample):")
+                    for i, it in enumerate(issues[:30]):
+                        st.write(f"- {it.get('message')} → suggestions: {it.get('replacements')}")
+            except Exception as e:
+                st.error(f"Grammar check failed: {e}")
+# Navigation
+st.markdown("---")
+st.subheader("Preview / Versions")
+colv1, colv2, colv3 = st.columns([1,1,2])
+with colv1:
+    if st.button("◀ Previous Version"):
+        if st.session_state.versions:
+            st.session_state.version_index = max(0, st.session_state.version_index - 1)
+            st.session_state.current_text = st.session_state.versions[st.session_state.version_index]
+with colv2:
+    if st.button("Next Version ▶"):
+        if st.session_state.versions:
+            st.session_state.version_index = min(len(st.session_state.versions)-1, st.session_state.version_index + 1)
+            st.session_state.current_text = st.session_state.versions[st.session_state.version_index]
+with colv3:
+    st.write(f"Version {st.session_state.version_index+1} of {max(1, len(st.session_state.versions))}")
+# Preview
+st.markdown("---")
+st.subheader("Original (top) — Processed Output (bottom)")
+orig_display = st.session_state.last_input or ""
+out_display = st.session_state.current_text or (input_text or "")
+if st.session_state._last_tool == "grammar" and out_display.strip():
+    orig_html = mark_grammar_issues(orig_display, st.session_state._last_grammar_issues or []) if orig_display else html.escape(orig_display)
+    out_html = underline_changes_in_output(orig_display or "", out_display)
+    st.markdown("<b>Original (issues highlighted)</b>", unsafe_allow_html=True)
+    st.markdown(f"<div style='padding:8px;border:1px solid #e6e6e6;background:transparent;white-space:pre-wrap'>{orig_html}</div>", unsafe_allow_html=True)
+    st.markdown("<b>Corrected (changes underlined in green)</b>", unsafe_allow_html=True)
+    st.markdown(f"<div style='padding:8px;border:1px solid #e6e6e6;background:transparent;white-space:pre-wrap'>{out_html}</div>", unsafe_allow_html=True)
+else:
+    # generic preview (green underlines for changed parts — new function)
+    preview_html = text_to_html_with_highlights(orig_display, out_display) if orig_display else html.escape(out_display)
+    st.markdown(
+        f"""
+        <div style='padding:10px;border:1px solid #eee;background:transparent;white-space:pre-wrap'>
+            {preview_html}
+        </div>
+        """,
+        unsafe_allow_html=True
+    )
+# Editable area
+st.subheader("Editable result (you can manually edit before saving)")
+st.session_state.editable_area = st.text_area("Edit here:", value=st.session_state.current_text or out_display, height=300)
+# If corrected file available (uploaded+grammar), download
+if st.session_state._last_output_file is not None and st.session_state._last_output_name:
+    st.markdown("**Download corrected file**")
+    st.download_button("Download corrected file", data=st.session_state._last_output_file, file_name=st.session_state._last_output_name)
+# Spelling suggestions & apply edits
+# --- unchanged imports and code above ---
+def spelling_suggestions(word, top_n=5, sentence=None):
+    """Return contextual synonyms if NLTK WordNet is available, else fallback to spellchecker."""
+    if not word or not word.strip():
+        return []
+    # Map POS tags to WordNet POS
+    def get_wordnet_pos(treebank_tag):
+        from nltk.corpus import wordnet
+        if treebank_tag.startswith('J'):
+            return wordnet.ADJ
+        elif treebank_tag.startswith('V'):
+            return wordnet.VERB
+        elif treebank_tag.startswith('N'):
+            return wordnet.NOUN
+        elif treebank_tag.startswith('R'):
+            return wordnet.ADV
+        return None
+    # Prefer WordNet synonyms with POS from context
+    if nltk_available:
+        wn_pos = None
+        if sentence:
+            try:
+                tokens = nltk.word_tokenize(sentence)
+                tagged = nltk.pos_tag(tokens)
+                for tok, tag in tagged:
+                    if tok.lower() == word.lower():
+                        wn_pos = get_wordnet_pos(tag)
+                        break
+            except Exception:
+                pass
+        syns = wn.synsets(word, pos=wn_pos) if wn_pos else wn.synsets(word)
+        suggestions = set()
+        for s in syns:
+            for l in s.lemmas():
+                name = l.name().replace('_', ' ')
+                if name.lower() != word.lower():
+                    suggestions.add(name)
+        if suggestions:
+            return sorted(suggestions)[:top_n]
+    # Fallback to spellchecker
+    if SPELLCHECKER_AVAILABLE:
+        suggestions = spell.candidates(word)
+        return list(suggestions)[:top_n]
+    return []
+# --- rest of your unchanged functions and UI code ---
+# Spelling suggestions & apply edits
+st.markdown("---")
+st.markdown("**Spelling suggestions / replace single word:**")
+col_s1, col_s2 = st.columns([2,3])
+with col_s1:
+    word_for_sugg = st.text_input("Enter token to suggest replacements:", value="")
+    if st.button("Get suggestions"):
+        if not word_for_sugg.strip():
+            st.warning("Type a token to get suggestions.")
+        else:
+            suggs = spelling_suggestions(word_for_sugg, sentence=st.session_state.editable_area)  # UPDATED
+            if suggs:
+                sel = st.selectbox("Choose replacement:", options=["(keep)"] + suggs)
+                if sel and sel != "(keep)":
+                    st.session_state.editable_area = st.session_state.editable_area.replace(word_for_sugg, sel)
+                    st.success(f"Replaced '{word_for_sugg}' with '{sel}'")
+            else:
+                st.info("No suggestions found.")
+with col_s2:
+    if st.button("Apply editable area to current text"):
+        st.session_state.current_text = st.session_state.editable_area
+        st.success("Applied edits to current text.")
+# --- rest of your file remains exactly the same ---
+# Save / Download / Copy for plain text
+st.markdown("---")
+col_d1, col_d2, col_d3 = st.columns(3)
+with col_d1:
+    if st.button("Save as DOCX"):
+        try:
+            b = make_docx_bytes_from_text(st.session_state.editable_area or "")
+            st.download_button("Download DOCX", data=b, file_name="rephrased.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document")
+        except Exception as e:
+            st.error(f"Could not create DOCX: {e}")
+with col_d2:
+    if st.button("Save as PDF"):
+        try:
+            b = make_pdf_bytes_from_text(st.session_state.editable_area or "")
+            st.download_button("Download PDF", data=b, file_name="rephrased.pdf", mime="application/pdf")
+        except Exception as e:
+            st.error(f"Could not create PDF: {e}")
+with col_d3:
+    if st.button("Copy to clipboard"):
+        if PYPERCLIP:
+            pyperclip.copy(st.session_state.editable_area or "")
+            st.success("Copied to clipboard")
+        else:
+            path = os.path.join(tempfile.gettempdir(), "rephrased_output.txt")
+            with open(path, "w", encoding="utf-8") as f:
+                f.write(st.session_state.editable_area or "")
+            st.info(f"Saved to {path} (pyperclip not available)")
+# Undo
+if st.button("Undo"):
+    if st.session_state.history:
+        st.session_state.current_text = st.session_state.history.pop()
+        st.session_state.versions = [st.session_state.current_text]
+        st.session_state.version_index = 0
+        st.success("Undone last step")
+    else:
+        st.info("Nothing to undo")
+st.markdown("---")
+st.caption("Notes: Paraphraser & Plagiarism Remover code preserved. Grammar prefers LanguageTool (requires Java) else falls back to TextBlob. DOCX/PDF replacements are best-effort to preserve layout.")
+# refresh button
+# --- Refresh button at the bottom ---
+if st.button("🔄 Refresh"):
+    st.rerun()

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+streamlit
+python-docx
+PyMuPDF
+nltk
+spacy
+textblob
+pyspellchecker
+pyperclip
+# Optional / recommended for best results (heavy)
+transformers
+torch
+sentencepiece
+language-tool-python  # requires Java (install JDK/JRE)