druvx13 commited on
Commit
abdc137
Β·
verified Β·
1 Parent(s): bba23cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -78
app.py CHANGED
@@ -1,87 +1,135 @@
1
- import os
2
- import json
3
- import torch
4
- from transformers import pipeline, set_seed
5
  import gradio as gr
 
 
 
6
 
7
- # Model setup
8
- CACHE_DIR = "./model_cache"
9
- os.makedirs(CACHE_DIR, exist_ok=True)
10
-
11
- generator = pipeline(
12
- "text-generation",
13
- model="openai-community/openai-gpt",
14
- cache_dir=CACHE_DIR,
15
- device=-1, # CPU
 
 
 
 
 
 
16
  )
17
 
18
- # Chat history state
19
- # Initialize empty history list
20
-
21
- def init_history():
22
- return []
23
-
24
- # Generate text and record history
25
-
26
- def generate_and_record(
27
- prompt, max_length, temperature, top_k, top_p, repetition_penalty, seed, num_return_sequences, history
28
- ):
29
- if seed is not None:
30
- set_seed(int(seed))
31
- outputs = generator(
32
- prompt,
33
- max_length=int(max_length),
34
- temperature=float(temperature),
35
- top_k=int(top_k),
36
- top_p=float(top_p),
37
- repetition_penalty=float(repetition_penalty),
38
- num_return_sequences=int(num_return_sequences),
39
- do_sample=True,
40
- )
41
- texts = [out["generated_text"] for out in outputs]
42
- history.append({"prompt": prompt, "results": texts})
43
- return "\n\n---\n\n".join(texts), history
44
-
45
- # Export chat history to JSON file
46
 
47
- def export_history(history):
48
- path = "chat_history.json"
49
- with open(path, "w", encoding="utf-8") as f:
50
- json.dump(history, f, ensure_ascii=False, indent=2)
51
- return path
52
-
53
- # Build Gradio interface
54
-
55
- with gr.Blocks(title="GPT Text Generation") as demo:
56
- gr.Markdown("## Text Generation with openai-community/openai-gpt (CPU)")
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  with gr.Row():
59
- prompt_input = gr.Textbox(label="Prompt", placeholder="Enter your prompt here", lines=2)
60
- max_length = gr.Slider(32, 1024, value=128, step=32, label="Max Length")
61
- with gr.Row():
62
- temperature = gr.Slider(0.1, 1.5, value=1.0, step=0.1, label="Temperature")
63
- top_k = gr.Slider(0, 100, value=50, step=1, label="Top-K Sampling")
64
- with gr.Row():
65
- top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-P (Nucleus) Sampling")
66
- repetition_penalty = gr.Slider(0.5, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
67
- seed_input = gr.Number(value=None, precision=0, label="Seed (optional)")
68
- num_seq = gr.Dropdown(choices=[1, 2, 3, 5], value=1, label="Number of Generations")
69
-
70
- generate_btn = gr.Button("Generate")
71
- clear_btn = gr.Button("Clear History")
72
- export_btn = gr.Button("Export History")
73
-
74
- output_text = gr.TextArea(label="Generated Text", interactive=False, lines=10)
75
- history_state = gr.State(init_history())
76
-
77
- generate_btn.click(
78
- fn=generate_and_record,
79
- inputs=[prompt_input, max_length, temperature, top_k, top_p, repetition_penalty, seed_input, num_seq, history_state],
80
- outputs=[output_text, history_state]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  )
82
- clear_btn.click(lambda _: ([], ""), inputs=[history_state], outputs=[history_state, output_text])
83
- export_btn.click(fn=export_history, inputs=[history_state], outputs=[])
84
-
85
- # Launch app
86
 
87
- demo.queue().launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
+ import torch
4
+ import os
5
 
6
+ # πŸ”§ CPU Optimization Suite
7
+ os.environ["OMP_NUM_THREADS"] = "4" # Match your physical core count
8
+ os.environ["MKL_NUM_THREADS"] = "4"
9
+ torch.set_num_threads(4)
10
+ torch.manual_seed(42)
11
+
12
+ # πŸ“¦ Model Configuration
13
+ MODEL_NAME = "openai-community/openai-gpt"
14
+ cache_dir = "./model_cache"
15
+
16
+ # 🧠 Load Model with Surgical Precision
17
+ tokenizer = AutoTokenizer.from_pretrained(
18
+ MODEL_NAME,
19
+ cache_dir=cache_dir,
20
+ padding_side="left"
21
  )
22
 
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ MODEL_NAME,
25
+ torch_dtype=torch.float32, # FP32 for CPU stability
26
+ low_cpu_mem_usage=True,
27
+ cache_dir=cache_dir
28
+ ).eval()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ # πŸš€ Create CPU-Optimized Pipeline
31
+ text_generator = pipeline(
32
+ "text-generation",
33
+ model=model,
34
+ tokenizer=tokenizer,
35
+ device=-1 # Explicit CPU usage
36
+ )
 
 
 
37
 
38
+ def generate_response(prompt, max_new_tokens=128, temperature=0.7, top_p=0.9, num_sequences=1):
39
+ """Optimized for 18GB CPU with strict memory control"""
40
+ try:
41
+ # πŸ›‘οΈ Input Protection
42
+ inputs = tokenizer(
43
+ prompt,
44
+ return_tensors="pt",
45
+ truncation=True,
46
+ max_length=512,
47
+ padding="max_length"
48
+ )
49
+
50
+ with torch.inference_mode():
51
+ outputs = model.generate(
52
+ **inputs,
53
+ max_new_tokens=int(max_new_tokens),
54
+ temperature=float(temperature),
55
+ top_p=float(top_p),
56
+ do_sample=True,
57
+ num_return_sequences=int(num_sequences),
58
+ pad_token_id=tokenizer.eos_token_id,
59
+ eos_token_id=tokenizer.eos_token_id
60
+ )
61
+
62
+ return "\n\n".join([
63
+ f"πŸ“ Result {i+1}:\n{tokenizer.decode(output, skip_special_tokens=True)}"
64
+ for i, output in enumerate(outputs)
65
+ ])
66
+ except Exception as e:
67
+ return f"🚨 CPU Memory Alert: {str(e)}\nTry shorter inputs or fewer results!"
68
+
69
+ # 🎨 UI Layout with Gradio Blocks
70
+ with gr.Blocks(theme="soft", title="GPT-1 Legacy Engine") as demo:
71
+ gr.Markdown("""
72
+ # 🧠 Legacy GPT-1 Text Generator (CPU-Optimized Edition)
73
+ *Running the original transformer-based language model with surgical memory control*
74
+
75
+ πŸ”₯ Features:
76
+ - Thread-limited execution for stable performance
77
+ - Input length protection (512 tokens)
78
+ - Batch generation support
79
+ - Temperature-controlled creativity
80
+ - Interactive examples
81
+ """)
82
+
83
  with gr.Row():
84
+ with gr.Column(scale=2):
85
+ prompt = gr.Textbox(
86
+ label="Input Prompt",
87
+ placeholder="Enter your prompt here...",
88
+ lines=5
89
+ )
90
+
91
+ with gr.Accordion("βš™οΈ Generation Parameters", open=False):
92
+ max_new_tokens = gr.Slider(
93
+ minimum=32, maximum=256, value=128, step=16,
94
+ label="Max New Tokens (Output Length)"
95
+ )
96
+ temperature = gr.Slider(
97
+ minimum=0.1, maximum=1.5, value=0.7, step=0.1,
98
+ label="Creativity Level (Temperature)"
99
+ )
100
+ top_p = gr.Slider(
101
+ minimum=0.1, maximum=1.0, value=0.9, step=0.1,
102
+ label="Top-p Sampling (Nucleus Filtering)"
103
+ )
104
+ num_sequences = gr.Slider(
105
+ minimum=1, maximum=3, value=1, step=1,
106
+ label="Number of Results to Generate"
107
+ )
108
+
109
+ submit = gr.Button("✨ Generate Text", variant="primary")
110
+
111
+ with gr.Column(scale=2):
112
+ output = gr.Textbox(
113
+ label="Generated Text",
114
+ lines=15,
115
+ show_copy_button=True
116
+ )
117
+
118
+ submit.click(
119
+ fn=generate_response,
120
+ inputs=[prompt, max_new_tokens, temperature, top_p, num_sequences],
121
+ outputs=output
122
+ )
123
+
124
+ gr.Examples(
125
+ examples=[
126
+ ["The future of artificial intelligence will"],
127
+ ["Explain quantum physics like I'm five"],
128
+ ["Write a haiku about machine learning"]
129
+ ],
130
+ inputs=prompt,
131
+ label="πŸš€ Try These Prompts"
132
  )
 
 
 
 
133
 
134
+ if __name__ == "__main__":
135
+ demo.launch()