huamnifierWithSimpleGrammer

Running

App Files Files

huamnifierWithSimpleGrammer / app.py

sashtech

Update app.py

a3485f7 verified over 1 year ago

raw

history blame

4.09 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import spacy
	import subprocess
	import nltk
	from nltk.corpus import wordnet
	from gensim import downloader as api
	import language_tool_python

	# Install Java
	def install_java():
	subprocess.run(["apt-get", "update"])
	subprocess.run(["apt-get", "install", "-y", "openjdk-11-jre"])

	install_java()

	# Ensure necessary NLTK data is downloaded
	nltk.download('wordnet')
	nltk.download('omw-1.4')
	nltk.download('punkt') # Download the Punkt tokenizer for sentence tokenization

	# Ensure the spaCy model is installed
	try:
	nlp = spacy.load("en_core_web_sm")
	except OSError:
	subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
	nlp = spacy.load("en_core_web_sm")

	# Load a smaller Word2Vec model from Gensim's pre-trained models
	word_vectors = api.load("glove-wiki-gigaword-50")

	# Check for GPU and set the device accordingly
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
	tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
	model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)

	# Function to correct grammar using LanguageTool
	def correct_grammar_with_languagetool(text):
	tool = language_tool_python.LanguageTool('en-US')
	matches = tool.check(text)
	corrected_text = language_tool_python.utils.correct(text, matches)
	return corrected_text

	# Function to get synonyms using NLTK WordNet
	def get_synonyms_nltk(word, pos):
	synsets = wordnet.synsets(word, pos=pos)
	if synsets:
	lemmas = synsets[0].lemmas()
	return [lemma.name() for lemma in lemmas]
	return []

	# Paraphrasing function using spaCy and NLTK
	def paraphrase_with_spacy_nltk(text):
	doc = nlp(text)
	paraphrased_words = []

	for token in doc:
	pos = None
	if token.pos_ in {"NOUN"}:
	pos = wordnet.NOUN
	elif token.pos_ in {"VERB"}:
	pos = wordnet.VERB
	elif token.pos_ in {"ADJ"}:
	pos = wordnet.ADJ
	elif token.pos_ in {"ADV"}:
	pos = wordnet.ADV

	synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []

	if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
	paraphrased_words.append(synonyms[0])
	else:
	paraphrased_words.append(token.text)

	paraphrased_sentence = ' '.join(paraphrased_words)
	return paraphrased_sentence

	# Sentence structuring using NLTK
	def structure_sentences(text):
	sentences = nltk.sent_tokenize(text) # Tokenize text into sentences
	structured_sentences = []

	for sentence in sentences:
	# Here you can apply any structuring rules or logic you need.
	structured_sentences.append(sentence)

	structured_text = ' '.join(structured_sentences)
	return structured_text

	# Combined function: Paraphrase -> Structure -> Grammar Check
	def humanize_text(text):
	# Step 1: Paraphrase
	paraphrased_text = paraphrase_with_spacy_nltk(text)

	# Step 2: Structure sentences
	structured_text = structure_sentences(paraphrased_text)

	# Step 3: Apply grammar correction
	final_text = correct_grammar_with_languagetool(structured_text)

	return final_text

	# Gradio interface definition
	with gr.Blocks() as interface:
	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(lines=5, label="Input Text")
	detect_button = gr.Button("AI Detection")
	humanize_button = gr.Button("Humanize Text")
	with gr.Column():
	output_text = gr.Textbox(label="Output")

	detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
	humanize_button.click(humanize_text, inputs=text_input, outputs=output_text)

	# Launch the Gradio app
	interface.launch(debug=False)