Spaces:

Ars135
/

Ai_text

Sleeping

App Files Files Community

Ai_text / summarizer.py

Ars135

Upload 2 files

3bdcf00 verified 3 months ago

raw

history blame contribute delete

4.25 kB

	from ctransformers import AutoModelForCausalLM
	import os
	from huggingface_hub import hf_hub_download

	class TextSummarizer:
	_model_instance = None

	def __init__(self, model_path="mistral-7b-instruct-v0.1.Q4_K_M.gguf"):
	"""
	Initialize the local LLM summarizer.
	Loads the model only once (Singleton pattern).
	"""
	if TextSummarizer._model_instance is None:
	print("Loading model...")
	if not os.path.exists(model_path):
	print(f"Model file {model_path} not found. Downloading...")
	try:
	# Download specific file from the repo
	model_path = hf_hub_download(
	repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
	filename="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
	local_dir=".",
	local_dir_use_symlinks=False
	)
	print("Download complete.")
	except Exception as e:
	raise RuntimeError(f"Failed to download model: {e}")

	# Load the model
	# threads=2 is safer for free HF Spaces (usually 2 vCPU)
	TextSummarizer._model_instance = AutoModelForCausalLM.from_pretrained(
	model_path,
	model_type="mistral",
	context_length=4096,
	threads=2
	)
	print("Model loaded successfully.")

	self.llm = TextSummarizer._model_instance

	def summarize(self, text, target_words=100):
	"""
	Summarize the given text using Mistral-7B with a target word count.
	"""
	if not text or not text.strip():
	return "Error: Input text cannot be empty.", ""

	# Estimate max tokens needed (1 word ~= 1.3 tokens, plus buffer)
	# We set a hard limit to prevent infinite generation, but give enough room.
	max_new_tokens = int(target_words * 2.5)

	# Construct prompt for Mistral Instruct
	# Format: <s>[INST] {prompt} [/INST]
	prompt = f"<s>[INST] Please summarize the following text in approximately {target_words} words:\n\n{text} [/INST]"

	try:
	# Generate summary
	response = self.llm(prompt, max_new_tokens=max_new_tokens, temperature=0.2, repetition_penalty=1.1)

	summary_text = response.strip()

	# Stats
	input_words = len(text.split())
	summary_words = len(summary_text.split())
	# Approximate token count (simple whitespace split is a rough proxy, but for display it's okay)
	# For more accuracy we could use self.llm.tokenize(text) if available, but split is fast/sufficient for UI.
	summary_tokens = int(summary_words * 1.3)

	stats = f"Input Words: {input_words}. Summary Words: {summary_words} (~{summary_tokens} tokens)."

	return summary_text, stats
	except Exception as e:
	return f"Error during summarization: {e}", ""

	if __name__ == "__main__":
	# Simple test
	try:
	summarizer = TextSummarizer()
	text = """
	The Transformer is a deep learning model introduced in 2017 by Google researchers.
	It is primarily used in the field of natural language processing (NLP).
	Like recurrent neural networks (RNNs), Transformers are designed to handle sequential data,
	such as natural language, for tasks such as translation and text summarization.
	However, unlike RNNs, Transformers do not require that the sequential data be processed in order.
	For example, if the input data is a natural language sentence, the Transformer does not need to
	process the beginning of it before the end. Due to this feature, the Transformer allows for
	much more parallelization than RNNs and therefore reduced training times.
	"""
	print("Original Text:\n", text)
	summary, stats = summarizer.summarize(text)
	print("\nSummary:\n", summary)
	print("\nStats:", stats)
	except Exception as e:
	print(e)