jsakshi commited on
Commit
3df258d
·
verified ·
1 Parent(s): 2194a8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -94
app.py CHANGED
@@ -1,96 +1,7 @@
1
- import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
 
4
- # CPU-friendly model settings
5
- # Using a smaller model with quantization for CPU compatibility
6
- model_name = "describeai/gemini" # Smaller 2B parameter model
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
8
 
9
- # Configure quantization for better CPU performance
10
- quantization_config = BitsAndBytesConfig(
11
- load_in_4bit=True,
12
- bnb_4bit_compute_dtype=torch.float16
13
- )
14
-
15
- # Load model with CPU optimizations
16
- try:
17
- model = AutoModelForCausalLM.from_pretrained(
18
- model_name,
19
- quantization_config=quantization_config,
20
- device_map="auto" # Will use CPU if no GPU is available
21
- )
22
- using_quantization = True
23
- except Exception as e:
24
- print(f"Quantization failed with error: {e}")
25
- print("Falling back to standard CPU loading...")
26
- model = AutoModelForCausalLM.from_pretrained(
27
- model_name,
28
- torch_dtype=torch.float32, # Use float32 for CPU
29
- device_map="cpu" # Explicitly use CPU
30
- )
31
- using_quantization = False
32
-
33
- print(f"Model loaded on CPU. Using quantization: {using_quantization}")
34
- print(f"Model size: {model_name}")
35
-
36
- # Define a function to generate text with adjusted parameters for CPU
37
- def generate_response(prompt, max_length=200): # Reduced max length
38
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
39
-
40
- print("Generating response (this may take a while on CPU)...")
41
- start_time = time.time()
42
-
43
- # Generate output with more conservative settings for CPU
44
- outputs = model.generate(
45
- **inputs,
46
- max_new_tokens=max_length,
47
- do_sample=False, # Deterministic generation is faster
48
- num_beams=1, # No beam search for speed
49
- )
50
-
51
- end_time = time.time()
52
- print(f"Generation completed in {end_time - start_time:.2f} seconds")
53
-
54
- # Decode and return the generated text
55
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
56
-
57
- # Remove the prompt from the response
58
- return generated_text[len(tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)):]
59
-
60
- # Test the model with simpler, shorter prompts for CPU evaluation
61
- import time
62
-
63
- test_prompts = [
64
- "Explain what machine learning is in one paragraph.",
65
- "Write a haiku about computers.",
66
- "List three benefits of open-source software."
67
- ]
68
-
69
- # Run evaluation
70
- print("\nEvaluating Gemini open source model on CPU\n")
71
- print("=" * 50)
72
-
73
- for i, prompt in enumerate(test_prompts):
74
- print(f"\nPrompt {i+1}: {prompt}")
75
- print("-" * 50)
76
-
77
- start_time = time.time()
78
- response = generate_response(prompt)
79
- end_time = time.time()
80
-
81
- print(f"Response time: {end_time - start_time:.2f} seconds")
82
- print(f"Response:\n{response}")
83
- print("=" * 50)
84
-
85
- # Memory usage information
86
- import psutil
87
- process = psutil.Process()
88
- memory_info = process.memory_info()
89
- print(f"\nMemory Usage: {memory_info.rss / (1024 * 1024):.2f} MB")
90
-
91
- # Save model output to a file for later analysis
92
- with open("gemini_cpu_evaluation_results.txt", "w") as f:
93
- f.write("GEMINI MODEL CPU EVALUATION RESULTS\n\n")
94
- for i, prompt in enumerate(test_prompts):
95
- f.write(f"Prompt {i+1}: {prompt}\n")
96
- f.write(f"Response:\n{generate_response(prompt)}\n\n")
 
1
+ from transformers import pipeline, set_seed
 
2
 
3
+ summarizer = pipeline('text2text-generation', model='describeai/gemini')
4
+ code = "print('hello world!')"
 
 
5
 
6
+ response = summarizer(code, max_length=100, num_beams=3)
7
+ print("Summarized code: " + response[0]['generated_text'])