Buckets:

WhiteRoomProdigy
/

Humaneyes-bucket

Files

xet

WhiteRoomProdigy/Humaneyes-bucket / main.py

WhiteRoomProdigy

8 days ago

download

raw

3.96 kB

	import gradio as gr
	import torch
	from transformers import PegasusForConditionalGeneration, PegasusTokenizer
	import re
	import os

	def load_model():
	"""Load the model from local storage"""
	torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
	print(f"Using device: {torch_device}")

	# Load tokenizer and model from local directory
	tokenizer = PegasusTokenizer.from_pretrained('./models')
	model = PegasusForConditionalGeneration.from_pretrained('./models').to(torch_device)
	return tokenizer, model, torch_device

	def split_into_paragraphs(text):
	"""Split text into paragraphs while preserving empty lines."""
	paragraphs = text.split('\n\n')
	return [p.strip() for p in paragraphs if p.strip()]

	def split_into_sentences(paragraph):
	"""Split paragraph into sentences using regex."""
	sentences = re.split(r'(?<=[.!?])\s+', paragraph)
	return [s.strip() for s in sentences if s.strip()]

	def get_response(input_text, num_return_sequences, tokenizer, model, torch_device):
	batch = tokenizer.prepare_seq2seq_batch(
	[input_text],
	truncation=True,
	padding='longest',
	max_length=80,
	return_tensors="pt"
	).to(torch_device)

	translated = model.generate(
	**batch,
	num_beams=10,
	num_return_sequences=num_return_sequences,
	temperature=1.0,
	repetition_penalty=2.8,
	length_penalty=1.2,
	max_length=80,
	min_length=5,
	no_repeat_ngram_size=3
	)

	tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
	return tgt_text[0]

	def get_response_from_text(context, tokenizer, model, torch_device):
	"""Process entire text while preserving paragraph structure."""
	paragraphs = split_into_paragraphs(context)
	paraphrased_paragraphs = []

	for paragraph in paragraphs:
	sentences = split_into_sentences(paragraph)
	paraphrased_sentences = []

	for sentence in sentences:
	if len(sentence.split()) < 3:
	paraphrased_sentences.append(sentence)
	continue

	try:
	paraphrased = get_response(sentence, 1, tokenizer, model, torch_device)
	if not any(phrase in paraphrased.lower() for phrase in ['it\'s like', 'in other words']):
	paraphrased_sentences.append(paraphrased)
	else:
	paraphrased_sentences.append(sentence)
	except Exception as e:
	print(f"Error processing sentence: {e}")
	paraphrased_sentences.append(sentence)

	paraphrased_paragraphs.append(' '.join(paraphrased_sentences))

	return '\n\n'.join(paraphrased_paragraphs)

	def create_interface():
	"""Create and configure the Gradio interface"""
	# Load model and tokenizer
	tokenizer, model, torch_device = load_model()

	def greet(context):
	return get_response_from_text(context, tokenizer, model, torch_device)

	# Create interface with improved styling
	iface = gr.Interface(
	fn=greet,
	inputs=gr.Textbox(
	lines=15,
	label="Input Text",
	placeholder="Enter your text here...",
	elem_classes="input-text"
	),
	outputs=gr.Textbox(
	lines=15,
	label="Paraphrased Text",
	elem_classes="output-text"
	),
	title="Advanced Text Paraphraser",
	description="Enter text to generate a high-quality paraphrased version while maintaining paragraph structure.",
	theme="default",
	css="""
	.input-text, .output-text {
	font-size: 16px !important;
	font-family: Arial, sans-serif !important;
	min-height: 300px !important;
	}
	"""
	)
	return iface

	if __name__ == "__main__":
	# Create and launch the interface
	interface = create_interface()
	interface.launch()

Xet Storage Details

Size:: 3.96 kB
Xet hash:: 4f1b45b5bd14defbcfc467e2426475c99d3a5b98444e1764cbfde2ca77a96cb7

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.