Spaces:

AI-Solutions-KK
/

HP_NLP_PARAPHRASER_APP

Sleeping

App Files Files Community

HP_NLP_PARAPHRASER_APP / app.py

AI-Solutions-KK

Update app.py

501dc1d unverified 5 months ago

raw

history blame contribute delete

32.8 kB

	# app.py
	"""
	Merged Rephraser app
	- GUI from original (first) file
	- Models/logic from later big file (kept unchanged)
	- Grammar highlight (red for issues; green underline for corrected words)
	- File upload/download for .docx/.pdf/.txt with best-effort format preservation
	- Tools independent (no automatic chaining)
	- Prev/Next browsing for multi-version outputs
	"""

	import streamlit as st
	import io, os, random, re, difflib, html, tempfile
	from pathlib import Path

	# IMPORTANT: st.set_page_config MUST be the first Streamlit command
	st.set_page_config(page_title="Rephraser", layout="wide")

	# --- Home button at the top ---
	if st.button("🏠 Home"):
	st.rerun()

	# Optional heavy libs (lazy imports used where needed)
	try:
	import docx
	except Exception:
	docx = None

	try:
	import fitz # PyMuPDF
	except Exception:
	fitz = None

	try:
	import language_tool_python
	except Exception:
	language_tool_python = None

	try:
	from textblob import TextBlob
	except Exception:
	TextBlob = None

	# NLTK / WordNet
	try:
	import nltk
	from nltk.corpus import wordnet as wn
	nltk_available = True
	except Exception:
	nltk_available = False

	# spaCy
	try:
	import spacy
	nlp = spacy.load("en_core_web_sm")
	SPACY_AVAILABLE = True
	except Exception:
	nlp = None
	SPACY_AVAILABLE = False

	# transformers check
	try:
	import transformers
	TRANSFORMERS_AVAILABLE = True
	except Exception:
	TRANSFORMERS_AVAILABLE = False

	# SpellChecker
	try:
	from spellchecker import SpellChecker
	SPELLCHECKER_AVAILABLE = True
	spell = SpellChecker()
	except Exception:
	SPELLCHECKER_AVAILABLE = False

	# pyperclip optional
	try:
	import pyperclip
	PYPERCLIP = True
	except Exception:
	PYPERCLIP = False

	# -----------------------
	# Session state init (preserve old behavior)
	# -----------------------
	if "versions" not in st.session_state:
	st.session_state.versions = []
	if "version_index" not in st.session_state:
	st.session_state.version_index = 0
	if "last_input" not in st.session_state:
	st.session_state.last_input = ""
	if "current_text" not in st.session_state:
	st.session_state.current_text = ""
	if "history" not in st.session_state:
	st.session_state.history = []
	# bookkeeping for file uploads & grammar
	if "_uploaded_bytes" not in st.session_state:
	st.session_state._uploaded_bytes = None
	if "_uploaded_name" not in st.session_state:
	st.session_state._uploaded_name = None
	if "_last_grammar_issues" not in st.session_state:
	st.session_state._last_grammar_issues = None
	if "_last_output_file" not in st.session_state:
	st.session_state._last_output_file = None
	if "_last_output_name" not in st.session_state:
	st.session_state._last_output_name = None
	if "_last_tool" not in st.session_state:
	st.session_state._last_tool = None

	# -----------------------
	# Helpers: highlights & diffs
	# -----------------------
	def mark_grammar_issues(text, issues):
	"""Wrap problem spans in red (inline). issues is list of dicts with offset & length & message & replacements"""
	if not issues:
	return html.escape(text)
	spans = []
	for it in issues:
	off = it.get("offset", 0)
	length = it.get("length", 0)
	msg = it.get("message", "")
	spans.append((off, off + length, msg))
	spans.sort()
	out = ""
	idx = 0
	for s, e, msg in spans:
	if s > idx:
	out += html.escape(text[idx:s])
	problem = html.escape(text[s:e])
	out += f'<span title="{html.escape(msg)}" style="border-bottom:2px solid #c0392b;">{problem}</span>'
	idx = e
	if idx < len(text):
	out += html.escape(text[idx:])
	return out

	def underline_changes_in_output(orig, corrected):
	"""
	Token-level diff: underline changed/inserted fragments in green in corrected text.
	"""
	a = orig.split()
	b = corrected.split()
	sm = difflib.SequenceMatcher(a=a, b=b)
	parts = []
	for tag, i1, i2, j1, j2 in sm.get_opcodes():
	if tag == "equal":
	parts.append(" ".join(b[j1:j2]))
	elif tag in ("replace", "insert"):
	changed = " ".join(b[j1:j2])
	parts.append(f'<span style="text-decoration: underline; text-decoration-color: #27ae60;">{html.escape(changed)}</span>')
	elif tag == "delete":
	pass
	return " ".join(parts) if parts else html.escape(corrected)

	## Green line
	import html
	import difflib

	def text_to_html_with_highlights(orig, new):
	"""
	Compare original and new text word-by-word.
	Underline only the changed/added words in green.
	"""
	orig_words = orig.split()
	new_words = new.split()

	diff = list(difflib.ndiff(orig_words, new_words))
	highlighted = []
	for word in diff:
	if word.startswith("+ "): # Added or changed word
	highlighted.append(
	f"<span style='color:black;text-decoration:underline;text-decoration-color:green'>{html.escape(word[2:])}</span>"
	)
	elif word.startswith(" "): # Unchanged word
	highlighted.append(html.escape(word[2:]))
	# Words starting with "- " (removed) are skipped

	return " ".join(highlighted)

	# -----------------------
	# Paraphraser functions (kept from your big code)
	# -----------------------
	def paraphrase_variants_fast(text, n_variants=3):
	text = text.strip()
	if not text:
	return []
	sents = re.split(r'(?<=[.!?])\s+', text)
	variants = []
	for v in range(n_variants):
	outs = []
	for s in sents:
	sent = s.strip()
	if not sent:
	continue
	if SPACY_AVAILABLE:
	doc = nlp(sent)
	# small structural transforms
	if random.random() < 0.3 and len(list(doc.noun_chunks)) >= 2:
	chunks = list(doc.noun_chunks)
	text_chunks = [c.text for c in chunks]
	s2 = sent
	try:
	s2 = s2.replace(text_chunks[0], "<<<A>>>").replace(text_chunks[1], text_chunks[0]).replace("<<<A>>>", text_chunks[1])
	except Exception:
	s2 = sent
	outs.append(s2)
	continue
	if ',' in sent and random.random() < 0.4:
	parts = [p.strip() for p in sent.split(',')]
	random.shuffle(parts)
	outs.append(", ".join(parts))
	continue
	outs.append(_synonym_replace(sent, prob=0.15 + 0.05 * v))
	else:
	if random.random() < 0.2:
	words = sent.split()
	if len(words) > 3:
	i = random.randint(0, len(words) - 3)
	words[i], words[i+1] = words[i+1], words[i]
	outs.append(" ".join(words))
	else:
	outs.append(_synonym_replace(sent, prob=0.12 + 0.04 * v))
	final = " ".join(outs)
	if random.random() < 0.3 and len(sents) > 1:
	random.shuffle(sents)
	final = " ".join(outs)
	variants.append(final)
	uniq = []
	for x in variants:
	if x not in uniq and x.strip():
	uniq.append(x)
	return uniq[:n_variants]

	def _synonym_replace(sentence, prob=0.12, max_replacements=2):
	if not nltk_available:
	words = sentence.split()
	for i in range(len(words)):
	if random.random() < prob:
	j = random.randrange(len(words))
	words[i], words[j] = words[j], words[i]
	return " ".join(words)
	tokens = re.findall(r"\w+\|\W+", sentence)
	words = [t for t in tokens]
	replaced = 0
	for i, tok in enumerate(words):
	if not re.match(r'\w+', tok):
	continue
	lower = tok.lower()
	if random.random() > prob:
	continue
	syns = wn.synsets(lower)
	if not syns:
	continue
	cand = None
	for s in syns:
	for l in s.lemmas():
	name = l.name().replace('_', ' ')
	if name.lower() != lower and ' ' not in name:
	cand = name
	break
	if cand:
	break
	if cand:
	if tok[0].isupper():
	cand = cand.capitalize()
	words[i] = cand
	replaced += 1
	if replaced >= max_replacements:
	break
	return "".join(words)

	def simple_mix_versions(versions_list):
	if not versions_list:
	return ""
	pieces = []
	for v in versions_list:
	s = v.strip()
	if not s:
	continue
	sents = re.split(r'(?<=[.!?])\s+', s)
	take_n = max(1, min(3, len(sents)))
	picks = random.sample(sents, take_n) if len(sents) > take_n else sents
	pieces.extend(picks)
	random.shuffle(pieces)
	return " ".join(pieces)

	# -----------------------
	# Plagiarism remover (kept)
	# -----------------------
	@st.cache_resource(show_spinner=False)
	def load_small_model(model_name="t5-small"):
	if not TRANSFORMERS_AVAILABLE:
	raise ImportError("transformers not installed")
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
	tok = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
	pipe = pipeline("text2text-generation", model=model, tokenizer=tok, device=-1)
	return tok, model, pipe

	def hf_paraphrase_with_pipe(pipe, text, max_len=256):
	try:
	out = pipe(text, max_length=max_len, do_sample=True, top_p=0.95, temperature=0.8, num_return_sequences=1)
	if isinstance(out, list) and out:
	return out[0].get("generated_text") or out[0].get("summary_text") or str(out[0])
	return str(out)
	except Exception:
	return text

	def plagiarism_remover_pipeline(text, aggressive=1, light_only=False):
	versions = []
	v_light = paraphrase_variants_fast(text, n_variants=1)[0] if paraphrase_variants_fast(text, n_variants=1) else text
	versions.append(v_light)
	if TRANSFORMERS_AVAILABLE and not light_only:
	try:
	_, _, t5_pipe = load_small_model("t5-small")
	v_t5 = hf_paraphrase_with_pipe(t5_pipe, "paraphrase: " + text)
	versions.append(v_t5)
	except Exception:
	pass
	try:
	_, _, p_pipe = load_small_model("google/pegasus-xsum")
	v_peg = hf_paraphrase_with_pipe(p_pipe, text)
	versions.append(v_peg)
	except Exception:
	pass
	v_combo = simple_mix_versions(versions)
	versions.append(v_combo)
	uniq = []
	for v in versions:
	if v and v.strip() and v not in uniq:
	uniq.append(v)
	if len(uniq) >= 5:
	break
	return uniq

	# -----------------------
	# Grammar & Spelling (kept)
	# -----------------------
	def grammar_and_spelling_check(text):
	if language_tool_python is not None:
	try:
	tool = language_tool_python.LanguageTool('en-US')
	matches = tool.check(text)
	corrected = language_tool_python.utils.correct(text, matches)
	issues = []
	for m in matches:
	issues.append({
	"message": m.message,
	"replacements": m.replacements,
	"offset": m.offset,
	"length": m.errorLength,
	"context": text[max(0, m.offset - 30): m.offset + 30]
	})
	return corrected, issues
	except Exception:
	pass
	if TextBlob is not None:
	try:
	tb = TextBlob(text)
	corr = str(tb.correct())
	return corr, []
	except Exception:
	pass
	return text, []

	def spelling_suggestions(word, top_n=5, sentence=None):
	"""Return contextual synonyms if NLTK WordNet is available, else fallback to spellchecker."""
	if not word or not word.strip():
	return []

	# Map POS tags to WordNet POS
	def get_wordnet_pos(treebank_tag):
	from nltk.corpus import wordnet
	if treebank_tag.startswith('J'):
	return wordnet.ADJ
	elif treebank_tag.startswith('V'):
	return wordnet.VERB
	elif treebank_tag.startswith('N'):
	return wordnet.NOUN
	elif treebank_tag.startswith('R'):
	return wordnet.ADV
	return None

	# Prefer WordNet synonyms with POS from context
	if nltk_available:
	wn_pos = None
	if sentence:
	try:
	tokens = nltk.word_tokenize(sentence)
	tagged = nltk.pos_tag(tokens)
	for tok, tag in tagged:
	if tok.lower() == word.lower():
	wn_pos = get_wordnet_pos(tag)
	break
	except Exception:
	pass

	syns = wn.synsets(word, pos=wn_pos) if wn_pos else wn.synsets(word)
	suggestions = set()
	for s in syns:
	for l in s.lemmas():
	name = l.name().replace('_', ' ')
	if name.lower() != word.lower():
	suggestions.add(name)
	if suggestions:
	return sorted(suggestions)[:top_n]

	# Fallback to spellchecker
	if SPELLCHECKER_AVAILABLE:
	suggestions = spell.candidates(word)
	return list(suggestions)[:top_n]

	return []

	# -----------------------
	# File extract & write helpers (kept & added best-effort replace)
	# -----------------------
	def extract_text_from_docx_bytes(b):
	if docx is None:
	raise RuntimeError("python-docx not installed")
	f = io.BytesIO(b)
	document = docx.Document(f)
	paras = [p.text for p in document.paragraphs]
	return "\n\n".join(paras)

	def extract_text_from_pdf_bytes(b):
	if fitz is None:
	raise RuntimeError("PyMuPDF not installed")
	doc = fitz.open(stream=b, filetype="pdf")
	text = ""
	for p in doc:
	text += p.get_text() + "\n\n"
	return text

	def extract_text_from_txt_bytes(b):
	try:
	return b.decode("utf-8")
	except Exception:
	try:
	return b.decode("latin-1")
	except Exception:
	return str(b)

	def make_docx_bytes_from_text(text):
	if docx is None:
	raise RuntimeError("python-docx not installed")
	out = io.BytesIO()
	d = docx.Document()
	for para in text.split("\n\n"):
	d.add_paragraph(para)
	d.save(out)
	out.seek(0)
	return out.read()

	def make_pdf_bytes_from_text(text):
	if fitz is None:
	raise RuntimeError("PyMuPDF not installed")
	doc = fitz.open()
	lines = text.split("\n")
	page = doc.new_page()
	y = 72
	for line in lines:
	if y > 720:
	page = doc.new_page()
	y = 72
	page.insert_text((72, y), line)
	y += 14
	buf = doc.write()
	doc.close()
	return buf

	def _build_replacement_spans(orig_text, corrected_text):
	a = orig_text.split()
	b = corrected_text.split()
	sm = difflib.SequenceMatcher(a=a, b=b)
	spans = []
	for tag, i1, i2, j1, j2 in sm.get_opcodes():
	if tag == "equal":
	continue
	orig_span = " ".join(a[i1:i2]).strip()
	corr_span = " ".join(b[j1:j2]).strip()
	if orig_span:
	spans.append((orig_span, corr_span))
	spans.sort(key=lambda x: -len(x[0]))
	return spans

	def apply_replacements_to_docx_bytes(original_bytes, orig_text, corrected_text):
	"""Replace occurrences of orig spans with corrected spans inside docx runs and table cells (best-effort)."""
	if docx is None:
	raise RuntimeError("python-docx not installed")
	from io import BytesIO
	document = docx.Document(BytesIO(original_bytes))
	spans = _build_replacement_spans(orig_text, corrected_text)
	if not spans:
	out = BytesIO()
	document.save(out)
	out.seek(0)
	return out.read()
	def replace_in_paragraph_runs(par):
	for orig_span, corr_span in spans:
	for run in par.runs:
	if orig_span in run.text:
	run.text = run.text.replace(orig_span, corr_span)
	for p in document.paragraphs:
	replace_in_paragraph_runs(p)
	for table in document.tables:
	for row in table.rows:
	for cell in row.cells:
	for p in cell.paragraphs:
	replace_in_paragraph_runs(p)
	out = io.BytesIO()
	document.save(out)
	out.seek(0)
	return out.read()

	def apply_replacements_to_pdf_bytes(original_bytes, orig_text, corrected_text):
	"""Best-effort PDF replacement: redact original token bbox and write corrected text in place using PyMuPDF."""
	if fitz is None:
	raise RuntimeError("PyMuPDF not installed")
	orig_tokens = orig_text.split()
	corr_tokens = corrected_text.split()
	sm = difflib.SequenceMatcher(a=orig_tokens, b=corr_tokens)
	ops = []
	for tag, i1, i2, j1, j2 in sm.get_opcodes():
	if tag == "equal":
	continue
	ops.append((tag, i1, i2, j1, j2))
	if not ops:
	return original_bytes
	pdf = fitz.open(stream=original_bytes, filetype="pdf")
	global_words = []
	for pno in range(len(pdf)):
	page = pdf[pno]
	words = page.get_text("words") # x0,y0,x1,y1, word, block_no, line_no, word_no
	words_sorted = sorted(words, key=lambda w: (round(w[3],1), round(w[0],1)))
	for w in words_sorted:
	global_words.append((pno, w))
	N = len(global_words)
	M = len(orig_tokens)
	map_len = min(N, M)
	token_to_global = {}
	for i in range(map_len):
	token_to_global[i] = global_words[i]
	redactions_per_page = {}
	inserts_per_page = {}
	for op in ops:
	tag, i1, i2, j1, j2 = op
	corr_span = " ".join(corr_tokens[j1:j2])
	for ti in range(i1, i2):
	if ti in token_to_global:
	pno, wtuple = token_to_global[ti]
	x0, y0, x1, y1 = wtuple[0], wtuple[1], wtuple[2], wtuple[3]
	bbox = fitz.Rect(x0, y0, x1, y1)
	redactions_per_page.setdefault(pno, []).append(bbox)
	inserts_per_page.setdefault(pno, []).append((bbox, corr_span))
	break
	for pno, rects in redactions_per_page.items():
	page = pdf[pno]
	for r in rects:
	page.add_redact_annot(r, fill=(1,1,1))
	page.apply_redactions()
	for bbox, corr_span in inserts_per_page.get(pno, []):
	fontsize = max(6, round(bbox.height * 0.8))
	try:
	page.insert_textbox(bbox, corr_span, fontsize=fontsize, fontname="helv", align=0)
	except Exception:
	page.insert_text((bbox.x0, bbox.y0), corr_span, fontsize=fontsize, fontname="helv")
	out = pdf.write()
	pdf.close()
	return out

	# -----------------------
	# UI (first file's GUI style) with Prev/Next variants and independent tools
	# -----------------------
	st.title("Rephraser — Paraphrase · Plagiarism Remover · Grammar & Spelling")
	st.markdown("Paste text or upload DOCX/PDF/TXT. Tools are independent and chainable (use output as input manually).")

	col_left, col_right = st.columns([2,1])
	with col_left:
	input_mode = st.radio("Input:", ("Paste text", "Upload file (.docx/.pdf/.txt)"))
	uploaded_bytes = None
	uploaded_name = None
	input_text = ""
	if input_mode == "Paste text":
	input_text = st.text_area("Paste your paragraph(s) here:", height=200, value=st.session_state.current_text or "")
	# clear upload memory
	st.session_state._uploaded_bytes = None
	st.session_state._uploaded_name = None
	else:
	uploaded = st.file_uploader("Upload .docx, .pdf or .txt", type=["docx","pdf","txt"])
	if uploaded is not None:
	uploaded_bytes = uploaded.read()
	uploaded_name = uploaded.name
	st.session_state._uploaded_bytes = uploaded_bytes
	st.session_state._uploaded_name = uploaded_name
	try:
	if uploaded.name.lower().endswith(".docx"):
	input_text = extract_text_from_docx_bytes(uploaded_bytes)
	elif uploaded.name.lower().endswith(".pdf"):
	input_text = extract_text_from_pdf_bytes(uploaded_bytes)
	else:
	input_text = extract_text_from_txt_bytes(uploaded_bytes)
	st.success(f"Loaded {uploaded.name} (approx {len(input_text.split())} words)")
	except Exception as e:
	st.error(f"Could not extract text from file: {e}")
	st.markdown("Tools (choose one)")
	st.markdown("- Para-phraser (fast): Focused on rephrase sentence, regardless of Plagiarism ")
	st.markdown("- Plagiarism Remover (deep): Focused on Plagiarism, Convert text to human like ")
	st.markdown("- Grammar & Spelling: Spelling And Grammar Check")

	with col_right:
	st.header("Actions")
	variants_to_generate = st.slider("Max variants (deep)", 1, 5, 3)
	use_light_only = st.checkbox("Force light-only (no HF models)", value=True)
	if st.button("1) Para-phraser (fast)"):
	st.session_state._last_tool = "paraphrase"
	source = input_text.strip() or st.session_state.current_text.strip()
	if not source:
	st.warning("Provide text or upload a file first.")
	else:
	st.session_state.history.append(st.session_state.current_text or source)
	variants = paraphrase_variants_fast(source, n_variants=variants_to_generate)
	if not variants:
	st.error("No paraphrase produced.")
	else:
	st.session_state.versions = variants
	st.session_state.version_index = 0
	st.session_state.current_text = variants[0]
	st.session_state.last_input = source
	st.session_state._last_grammar_issues = None
	st.session_state._last_output_file = None
	st.success("Para-phraser done. Use Prev/Next to browse.")

	if st.button("2) Plagiarism Remover (deep)"):
	st.session_state._last_tool = "plagiarism"
	source = input_text.strip() or st.session_state.current_text.strip()
	if not source:
	st.warning("Provide text or upload a file first.")
	else:
	st.session_state.history.append(st.session_state.current_text or source)
	st.info("Running plagiarism remover pipeline...")
	try:
	variants = plagiarism_remover_pipeline(source, aggressive=1, light_only=use_light_only)
	except Exception as e:
	st.error(f"Pipeline failed: {e}")
	variants = paraphrase_variants_fast(source, n_variants=variants_to_generate)
	if not variants:
	st.error("No variants produced.")
	else:
	st.session_state.versions = variants
	st.session_state.version_index = 0
	st.session_state.current_text = variants[0]
	st.session_state.last_input = source
	st.session_state._last_grammar_issues = None
	st.session_state._last_output_file = None
	st.success(f"Produced {len(variants)} variants.")

	if st.button("3) Grammar & Spelling (check)"):
	st.session_state._last_tool = "grammar"
	source = st.session_state.current_text.strip() or input_text.strip()
	if not source:
	st.warning("Provide text or upload a file first.")
	else:
	st.session_state.history.append(st.session_state.current_text or source)
	try:
	corrected, issues = grammar_and_spelling_check(source)
	st.session_state.current_text = corrected
	st.session_state.versions = [corrected]
	st.session_state.version_index = 0
	st.session_state._last_grammar_issues = issues or []
	st.success(f"Grammar check applied ({len(issues)} issues).")

	# File-level output if uploaded
	uploaded_bytes = st.session_state.get("_uploaded_bytes")
	uploaded_name = st.session_state.get("_uploaded_name")
	if uploaded_bytes and uploaded_name:
	suffix = Path(uploaded_name).suffix.lower()
	try:
	if suffix == ".docx" and docx is not None:
	out_bytes = apply_replacements_to_docx_bytes(uploaded_bytes, source, corrected)
	st.session_state._last_output_file = out_bytes
	st.session_state._last_output_name = f"corrected_{uploaded_name}"
	elif suffix == ".pdf" and fitz is not None:
	out_bytes = apply_replacements_to_pdf_bytes(uploaded_bytes, source, corrected)
	st.session_state._last_output_file = out_bytes
	st.session_state._last_output_name = f"corrected_{uploaded_name}"
	elif suffix == ".txt":
	st.session_state._last_output_file = corrected.encode("utf-8")
	st.session_state._last_output_name = f"corrected_{uploaded_name}"
	else:
	st.session_state._last_output_file = make_docx_bytes_from_text(corrected)
	st.session_state._last_output_name = "corrected_output.docx"
	except Exception as e:
	st.warning(f"Could not create corrected file preserving format: {e}")
	st.session_state._last_output_file = None
	st.session_state._last_output_name = None

	if issues:
	st.subheader("Detected issues (sample):")
	for i, it in enumerate(issues[:30]):
	st.write(f"- {it.get('message')} → suggestions: {it.get('replacements')}")
	except Exception as e:
	st.error(f"Grammar check failed: {e}")

	# Navigation
	st.markdown("---")
	st.subheader("Preview / Versions")
	colv1, colv2, colv3 = st.columns([1,1,2])
	with colv1:
	if st.button("◀ Previous Version"):
	if st.session_state.versions:
	st.session_state.version_index = max(0, st.session_state.version_index - 1)
	st.session_state.current_text = st.session_state.versions[st.session_state.version_index]
	with colv2:
	if st.button("Next Version ▶"):
	if st.session_state.versions:
	st.session_state.version_index = min(len(st.session_state.versions)-1, st.session_state.version_index + 1)
	st.session_state.current_text = st.session_state.versions[st.session_state.version_index]
	with colv3:
	st.write(f"Version {st.session_state.version_index+1} of {max(1, len(st.session_state.versions))}")


	# Preview
	st.markdown("---")
	st.subheader("Original (top) — Processed Output (bottom)")
	orig_display = st.session_state.last_input or ""
	out_display = st.session_state.current_text or (input_text or "")

	if st.session_state._last_tool == "grammar" and out_display.strip():
	orig_html = mark_grammar_issues(orig_display, st.session_state._last_grammar_issues or []) if orig_display else html.escape(orig_display)
	out_html = underline_changes_in_output(orig_display or "", out_display)
	st.markdown("<b>Original (issues highlighted)</b>", unsafe_allow_html=True)
	st.markdown(f"<div style='padding:8px;border:1px solid #e6e6e6;background:transparent;white-space:pre-wrap'>{orig_html}</div>", unsafe_allow_html=True)
	st.markdown("<b>Corrected (changes underlined in green)</b>", unsafe_allow_html=True)
	st.markdown(f"<div style='padding:8px;border:1px solid #e6e6e6;background:transparent;white-space:pre-wrap'>{out_html}</div>", unsafe_allow_html=True)
	else:
	# generic preview (green underlines for changed parts — new function)
	preview_html = text_to_html_with_highlights(orig_display, out_display) if orig_display else html.escape(out_display)
	st.markdown(
	f"""
	<div style='padding:10px;border:1px solid #eee;background:transparent;white-space:pre-wrap'>
	{preview_html}
	</div>
	""",
	unsafe_allow_html=True
	)

	# Editable area
	st.subheader("Editable result (you can manually edit before saving)")
	st.session_state.editable_area = st.text_area("Edit here:", value=st.session_state.current_text or out_display, height=300)

	# If corrected file available (uploaded+grammar), download
	if st.session_state._last_output_file is not None and st.session_state._last_output_name:
	st.markdown("Download corrected file")
	st.download_button("Download corrected file", data=st.session_state._last_output_file, file_name=st.session_state._last_output_name)

	# Spelling suggestions & apply edits
	st.markdown("---")
	st.markdown("Spelling suggestions / replace single word:")
	col_s1, col_s2 = st.columns([2,3])
	with col_s1:
	word_for_sugg = st.text_input("Enter token to suggest replacements:", value="")
	if st.button("Get suggestions"):
	if not word_for_sugg.strip():
	st.warning("Type a token to get suggestions.")
	else:
	suggs = spelling_suggestions(word_for_sugg, sentence=st.session_state.editable_area)
	if suggs:
	sel = st.selectbox("Choose replacement:", options=["(keep)"] + suggs)
	if sel and sel != "(keep)":
	st.session_state.editable_area = st.session_state.editable_area.replace(word_for_sugg, sel)
	st.success(f"Replaced '{word_for_sugg}' with '{sel}'")
	else:
	st.info("No suggestions found.")
	with col_s2:
	if st.button("Apply editable area to current text"):
	st.session_state.current_text = st.session_state.editable_area
	st.success("Applied edits to current text.")

	# Save / Download / Copy for plain text
	st.markdown("---")
	col_d1, col_d2, col_d3 = st.columns(3)
	with col_d1:
	if st.button("Save as DOCX"):
	try:
	b = make_docx_bytes_from_text(st.session_state.editable_area or "")
	st.download_button("Download DOCX", data=b, file_name="rephrased.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document")
	except Exception as e:
	st.error(f"Could not create DOCX: {e}")
	with col_d2:
	if st.button("Save as PDF"):
	try:
	b = make_pdf_bytes_from_text(st.session_state.editable_area or "")
	st.download_button("Download PDF", data=b, file_name="rephrased.pdf", mime="application/pdf")
	except Exception as e:
	st.error(f"Could not create PDF: {e}")
	with col_d3:
	if st.button("Copy to clipboard"):
	if PYPERCLIP:
	pyperclip.copy(st.session_state.editable_area or "")
	st.success("Copied to clipboard")
	else:
	path = os.path.join(tempfile.gettempdir(), "rephrased_output.txt")
	with open(path, "w", encoding="utf-8") as f:
	f.write(st.session_state.editable_area or "")
	st.info(f"Saved to {path} (pyperclip not available)")

	# Undo
	if st.button("Undo"):
	if st.session_state.history:
	st.session_state.current_text = st.session_state.history.pop()
	st.session_state.versions = [st.session_state.current_text]
	st.session_state.version_index = 0
	st.success("Undone last step")
	else:
	st.info("Nothing to undo")

	st.markdown("---")
	st.caption("Notes: Paraphraser & Plagiarism Remover code preserved. Grammar prefers LanguageTool (requires Java) else falls back to TextBlob. DOCX/PDF replacements are best-effort to preserve layout.")

	# refresh button
	# --- Refresh button at the bottom ---
	if st.button("🔄 Refresh"):
	st.rerun()