Ars135 commited on
Commit
bf11c1e
·
verified ·
1 Parent(s): a471d59

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +60 -0
  2. app.py +39 -0
  3. requirements.txt +3 -0
  4. summarizer.py +87 -0
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: AI Text Summarizer
3
+ emoji: 📝
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 6.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # AI Text Summarizer
14
+
15
+ This project implements an AI-powered text summarizer using a local **Mistral-7B** LLM (via `ctransformers`) and provides a web interface using `gradio`.
16
+
17
+ ## Features
18
+ - **100% Local**: Runs entirely on your machine without internet (after model download).
19
+ - **High Accuracy**: Uses **Mistral-7B-Instruct**, a state-of-the-art open-source model.
20
+ - **Abstractive Summarization**: Generates new text rather than just selecting sentences.
21
+ - **No API Token Required**: Free and private.
22
+ - Simple and intuitive web interface.
23
+
24
+ ## Installation
25
+
26
+ 1. **Clone the repository** (if applicable) or navigate to the project directory.
27
+
28
+ 2. **Create a virtual environment** (recommended):
29
+ ```bash
30
+ python -m venv venv
31
+ source venv/bin/activate # On Windows use `venv\Scripts\activate`
32
+ ```
33
+
34
+ 3. **Install dependencies**:
35
+ ```bash
36
+ pip install -r requirements.txt
37
+ ```
38
+
39
+ ## Usage
40
+
41
+ ### Running the Web Interface
42
+ To start the Gradio web interface, run:
43
+ ```bash
44
+ python app.py
45
+ ```
46
+ This will launch a local server (usually at `http://127.0.0.1:7860`). Open this URL in your browser to use the summarizer.
47
+
48
+ ### Using the Script Directly
49
+ You can also use the `summarizer.py` script directly in your own code:
50
+ ```python
51
+ from summarizer import TextSummarizer
52
+
53
+ summarizer = TextSummarizer()
54
+ text = "Your long text here..."
55
+ summary = summarizer.summarize(text, sentences_count=3)
56
+ print(summary)
57
+ ```
58
+
59
+ ## Requirements
60
+ - Python 3.7+
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from summarizer import TextSummarizer
3
+
4
+ # Initialize the summarizer globally to load the model once
5
+ print("Initializing Summarizer...")
6
+ global_summarizer = TextSummarizer()
7
+
8
+ def summarize_text(text, max_tokens):
9
+ try:
10
+ # Use the global instance
11
+ summary, stats = global_summarizer.summarize(text, int(max_tokens))
12
+ return summary, stats
13
+ except Exception as e:
14
+ return f"An error occurred: {str(e)}", ""
15
+
16
+ # Create the Gradio interface
17
+ with gr.Blocks() as iface:
18
+ gr.Markdown("# AI Text Summarizer (Local Mistral-7B)")
19
+ gr.Markdown("Enter a long text to get a concise summary using the **Mistral-7B** model (running locally).")
20
+ gr.Markdown("> **Note:** The first run might take a moment to load the model. Subsequent runs will be faster.")
21
+
22
+ with gr.Row():
23
+ with gr.Column():
24
+ text_input = gr.Textbox(lines=10, label="Input Text", placeholder="Enter text to summarize here...")
25
+ max_tokens_slider = gr.Slider(minimum=50, maximum=500, value=100, step=10, label="Max Summary Length (Tokens)")
26
+ submit_btn = gr.Button("Summarize", variant="primary")
27
+
28
+ with gr.Column():
29
+ output_text = gr.Textbox(label="Summary", lines=8)
30
+ stats_output = gr.Label(label="Statistics")
31
+
32
+ submit_btn.click(
33
+ fn=summarize_text,
34
+ inputs=[text_input, max_tokens_slider],
35
+ outputs=[output_text, stats_output]
36
+ )
37
+
38
+ if __name__ == "__main__":
39
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ctransformers
2
+ huggingface_hub
3
+ gradio
summarizer.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctransformers import AutoModelForCausalLM
2
+ import os
3
+ from huggingface_hub import hf_hub_download
4
+
5
+ class TextSummarizer:
6
+ _model_instance = None
7
+
8
+ def __init__(self, model_path="mistral-7b-instruct-v0.1.Q4_K_M.gguf"):
9
+ """
10
+ Initialize the local LLM summarizer.
11
+ Loads the model only once (Singleton pattern).
12
+ """
13
+ if TextSummarizer._model_instance is None:
14
+ print("Loading model...")
15
+ if not os.path.exists(model_path):
16
+ print(f"Model file {model_path} not found. Downloading...")
17
+ try:
18
+ # Download specific file from the repo
19
+ model_path = hf_hub_download(
20
+ repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
21
+ filename="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
22
+ local_dir=".",
23
+ local_dir_use_symlinks=False
24
+ )
25
+ print("Download complete.")
26
+ except Exception as e:
27
+ raise RuntimeError(f"Failed to download model: {e}")
28
+
29
+ # Load the model
30
+ # threads=2 is safer for free HF Spaces (usually 2 vCPU)
31
+ TextSummarizer._model_instance = AutoModelForCausalLM.from_pretrained(
32
+ model_path,
33
+ model_type="mistral",
34
+ context_length=4096,
35
+ threads=2
36
+ )
37
+ print("Model loaded successfully.")
38
+
39
+ self.llm = TextSummarizer._model_instance
40
+
41
+ def summarize(self, text, max_new_tokens=250):
42
+ """
43
+ Summarize the given text using Mistral-7B.
44
+ """
45
+ if not text or not text.strip():
46
+ return "Error: Input text cannot be empty.", ""
47
+
48
+ # Construct prompt for Mistral Instruct
49
+ # Format: <s>[INST] {prompt} [/INST]
50
+ prompt = f"<s>[INST] Please summarize the following text concisely in 2-3 sentences:\n\n{text} [/INST]"
51
+
52
+ try:
53
+ # Generate summary
54
+ # Mistral is smarter, so we can use standard sampling
55
+ response = self.llm(prompt, max_new_tokens=max_new_tokens, temperature=0.2, repetition_penalty=1.1)
56
+
57
+ summary_text = response.strip()
58
+
59
+ # Stats
60
+ input_len = len(text.split())
61
+ summary_len = len(summary_text.split())
62
+ stats = f"Input Words: {input_len}. Summary Words: {summary_len}."
63
+
64
+ return summary_text, stats
65
+ except Exception as e:
66
+ return f"Error during summarization: {e}", ""
67
+
68
+ if __name__ == "__main__":
69
+ # Simple test
70
+ try:
71
+ summarizer = TextSummarizer()
72
+ text = """
73
+ The Transformer is a deep learning model introduced in 2017 by Google researchers.
74
+ It is primarily used in the field of natural language processing (NLP).
75
+ Like recurrent neural networks (RNNs), Transformers are designed to handle sequential data,
76
+ such as natural language, for tasks such as translation and text summarization.
77
+ However, unlike RNNs, Transformers do not require that the sequential data be processed in order.
78
+ For example, if the input data is a natural language sentence, the Transformer does not need to
79
+ process the beginning of it before the end. Due to this feature, the Transformer allows for
80
+ much more parallelization than RNNs and therefore reduced training times.
81
+ """
82
+ print("Original Text:\n", text)
83
+ summary, stats = summarizer.summarize(text)
84
+ print("\nSummary:\n", summary)
85
+ print("\nStats:", stats)
86
+ except Exception as e:
87
+ print(e)