Ai_text / summarizer.py
Ars135's picture
Upload 2 files
3bdcf00 verified
from ctransformers import AutoModelForCausalLM
import os
from huggingface_hub import hf_hub_download
class TextSummarizer:
_model_instance = None
def __init__(self, model_path="mistral-7b-instruct-v0.1.Q4_K_M.gguf"):
"""
Initialize the local LLM summarizer.
Loads the model only once (Singleton pattern).
"""
if TextSummarizer._model_instance is None:
print("Loading model...")
if not os.path.exists(model_path):
print(f"Model file {model_path} not found. Downloading...")
try:
# Download specific file from the repo
model_path = hf_hub_download(
repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
filename="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
local_dir=".",
local_dir_use_symlinks=False
)
print("Download complete.")
except Exception as e:
raise RuntimeError(f"Failed to download model: {e}")
# Load the model
# threads=2 is safer for free HF Spaces (usually 2 vCPU)
TextSummarizer._model_instance = AutoModelForCausalLM.from_pretrained(
model_path,
model_type="mistral",
context_length=4096,
threads=2
)
print("Model loaded successfully.")
self.llm = TextSummarizer._model_instance
def summarize(self, text, target_words=100):
"""
Summarize the given text using Mistral-7B with a target word count.
"""
if not text or not text.strip():
return "Error: Input text cannot be empty.", ""
# Estimate max tokens needed (1 word ~= 1.3 tokens, plus buffer)
# We set a hard limit to prevent infinite generation, but give enough room.
max_new_tokens = int(target_words * 2.5)
# Construct prompt for Mistral Instruct
# Format: <s>[INST] {prompt} [/INST]
prompt = f"<s>[INST] Please summarize the following text in approximately {target_words} words:\n\n{text} [/INST]"
try:
# Generate summary
response = self.llm(prompt, max_new_tokens=max_new_tokens, temperature=0.2, repetition_penalty=1.1)
summary_text = response.strip()
# Stats
input_words = len(text.split())
summary_words = len(summary_text.split())
# Approximate token count (simple whitespace split is a rough proxy, but for display it's okay)
# For more accuracy we could use self.llm.tokenize(text) if available, but split is fast/sufficient for UI.
summary_tokens = int(summary_words * 1.3)
stats = f"Input Words: {input_words}. Summary Words: {summary_words} (~{summary_tokens} tokens)."
return summary_text, stats
except Exception as e:
return f"Error during summarization: {e}", ""
if __name__ == "__main__":
# Simple test
try:
summarizer = TextSummarizer()
text = """
The Transformer is a deep learning model introduced in 2017 by Google researchers.
It is primarily used in the field of natural language processing (NLP).
Like recurrent neural networks (RNNs), Transformers are designed to handle sequential data,
such as natural language, for tasks such as translation and text summarization.
However, unlike RNNs, Transformers do not require that the sequential data be processed in order.
For example, if the input data is a natural language sentence, the Transformer does not need to
process the beginning of it before the end. Due to this feature, the Transformer allows for
much more parallelization than RNNs and therefore reduced training times.
"""
print("Original Text:\n", text)
summary, stats = summarizer.summarize(text)
print("\nSummary:\n", summary)
print("\nStats:", stats)
except Exception as e:
print(e)