Remostart commited on
Commit
9f2ac72
·
verified ·
1 Parent(s): f031212

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -27
app.py CHANGED
@@ -1,46 +1,73 @@
1
  import gradio as gr
2
  import torch
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
 
 
 
 
5
  # Load model & tokenizer
6
  MODEL_NAME = "ubiodee/Plutus_Tutor_new"
7
 
8
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
10
- model.eval()
11
-
12
- if torch.cuda.is_available():
13
- model.to("cuda")
 
 
 
 
 
 
 
 
 
14
 
15
- # Response function
16
  def generate_response(prompt):
17
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
18
-
19
- with torch.no_grad():
20
- outputs = model.generate(
 
 
21
  **inputs,
22
- max_new_tokens=500,
23
- temperature=0.3,
24
- top_p=0.3,
25
- do_sample=True,
26
  eos_token_id=tokenizer.eos_token_id,
27
- pad_token_id=tokenizer.pad_token_id,
28
- )
29
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
30
-
31
- # Remove the prompt from the output to return only the answer
32
- if response.startswith(prompt):
33
- response = response[len(prompt):].strip()
34
-
35
- return response
 
 
36
 
37
  # Gradio UI
38
  demo = gr.Interface(
39
  fn=generate_response,
40
- inputs=gr.Textbox(label="Enter your prompt", lines=4, placeholder="Learn about Plutus..."),
 
 
 
 
41
  outputs=gr.Textbox(label="Model Response"),
42
  title="Cardano Plutus AI Assistant",
43
- description="Your Personalised Plutus Tutor."
 
44
  )
45
 
46
- demo.launch()
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import torch
3
+ import logging
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
+ # Set up logging
7
+ logging.basicConfig(level=logging.INFO)
8
+ logger = logging.getLogger(__name__)
9
+
10
  # Load model & tokenizer
11
  MODEL_NAME = "ubiodee/Plutus_Tutor_new"
12
 
13
+ try:
14
+ logger.info("Loading tokenizer...")
15
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
16
+ logger.info("Loading model...")
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ MODEL_NAME,
19
+ device_map="auto", # Automatically place model on available device
20
+ torch_dtype=torch.float16, # Use half-precision to save memory
21
+ low_cpu_mem_usage=True # Optimize memory usage during loading
22
+ )
23
+ model.eval()
24
+ logger.info("Model and tokenizer loaded successfully.")
25
+ except Exception as e:
26
+ logger.error(f"Error loading model or tokenizer: {str(e)}")
27
+ raise
28
 
29
+ # Response function with streaming
30
  def generate_response(prompt):
31
+ try:
32
+ logger.info("Processing prompt...")
33
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
34
+
35
+ # Stream tokens for faster perceived response
36
+ for output in model.generate(
37
  **inputs,
38
+ max_new_tokens=200, # Reduced for faster inference
39
+ do_sample=False, # Greedy decoding for speed
 
 
40
  eos_token_id=tokenizer.eos_token_id,
41
+ pad_token_id=tokenizer.pad_token_id
42
+ ):
43
+ response = tokenizer.decode(output, skip_special_tokens=True)
44
+ # Remove prompt from output
45
+ if response.startswith(prompt):
46
+ response = response[len(prompt):].strip()
47
+ yield response
48
+ logger.info("Response generated successfully.")
49
+ except Exception as e:
50
+ logger.error(f"Error during generation: {str(e)}")
51
+ yield f"Error: {str(e)}"
52
 
53
  # Gradio UI
54
  demo = gr.Interface(
55
  fn=generate_response,
56
+ inputs=gr.Textbox(
57
+ label="Enter your prompt",
58
+ lines=4,
59
+ placeholder="Ask about Plutus or Cardano..."
60
+ ),
61
  outputs=gr.Textbox(label="Model Response"),
62
  title="Cardano Plutus AI Assistant",
63
+ description="Your Personalised Plutus Tutor. Optimized for fast responses.",
64
+ allow_flagging="never"
65
  )
66
 
67
+ # Launch the app
68
+ try:
69
+ logger.info("Launching Gradio interface...")
70
+ demo.launch()
71
+ except Exception as e:
72
+ logger.error(f"Error launching Gradio: {str(e)}")
73
+ raise