Vladislav Krasnov commited on
Commit
6627d48
·
1 Parent(s): a3be3b5

Update space 10

Browse files
Files changed (1) hide show
  1. app.py +19 -47
app.py CHANGED
@@ -2,36 +2,21 @@ import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
- # Your space info (for API endpoint calculation)
6
- USERNAME = "sarekuwa"
7
  SPACE_NAME = "livecoder"
8
  API_ENDPOINT = f"https://{USERNAME}-{SPACE_NAME}.hf.space/api/predict"
9
 
10
- # Print endpoint BEFORE launching (will appear in logs)
11
- print(f"API Endpoint for external use: {API_ENDPOINT}")
12
- print("Model loading...")
13
 
14
- # Use a lighter model for CPU - Phi-2 is too heavy
15
- # model_name = "microsoft/phi-2" # TOO HEAVY - 2.7B parameters
16
- model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # Lighter alternative
17
-
18
- try:
19
- tokenizer = AutoTokenizer.from_pretrained(model_name)
20
- tokenizer.pad_token = tokenizer.eos_token
21
-
22
- model = AutoModelForCausalLM.from_pretrained(
23
- model_name,
24
- torch_dtype=torch.float32,
25
- device_map="cpu"
26
- )
27
- print("Model loaded successfully")
28
- except Exception as e:
29
- print(f"Error loading model: {e}")
30
- # Fallback to simplest model
31
- model_name = "distilgpt2"
32
- tokenizer = AutoTokenizer.from_pretrained(model_name)
33
- tokenizer.pad_token = tokenizer.eos_token
34
- model = AutoModelForCausalLM.from_pretrained(model_name)
35
 
36
  def generate_response(message):
37
  """Process user input and generate response"""
@@ -39,36 +24,29 @@ def generate_response(message):
39
  return "Please enter a question."
40
 
41
  try:
42
- # Format prompt for chat model
43
- if "TinyLlama" in model_name or "phi" in model_name:
44
- prompt = f"<|user|>\n{message}\n<|assistant|>\n"
45
- else:
46
- prompt = f"User: {message}\nAssistant:"
47
 
48
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
49
 
50
- # Generate with lower token count for CPU
51
  with torch.no_grad():
52
  outputs = model.generate(
53
  inputs.input_ids,
54
- max_new_tokens=150, # Reduced for CPU
55
  temperature=0.7,
56
  do_sample=True,
57
  top_p=0.9,
58
  pad_token_id=tokenizer.pad_token_id,
59
- eos_token_id=tokenizer.eos_token_id,
60
- repetition_penalty=1.1
61
  )
62
 
63
  response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
64
  return response.strip()
65
 
66
  except Exception as e:
67
- return f"Error: {str(e)}"
68
 
69
- # Create interface
70
  interface = gr.Interface(
71
- fn=generate_response,
72
  inputs=gr.Textbox(label="Input", placeholder="Enter programming question...", lines=3),
73
  outputs=gr.Textbox(label="Output", lines=10),
74
  title="LiveCoder API",
@@ -76,13 +54,7 @@ interface = gr.Interface(
76
  allow_flagging="never"
77
  )
78
 
79
- # CRITICAL: Enable queue for async processing
80
- interface.queue(default_concurrency_limit=1)
81
-
82
  # Launch application
83
- interface.launch(
84
- server_name="0.0.0.0",
85
- server_port=7860,
86
- share=False,
87
- debug=False # Set to True for more logs
88
- )
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
+ # Load model and tokenizer
6
+ USERNAME = "sarekuwa"
7
  SPACE_NAME = "livecoder"
8
  API_ENDPOINT = f"https://{USERNAME}-{SPACE_NAME}.hf.space/api/predict"
9
 
10
+ model_name = "microsoft/phi-2"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
12
+ tokenizer.pad_token = tokenizer.eos_token
13
 
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ model_name,
16
+ torch_dtype=torch.float32,
17
+ device_map="cpu",
18
+ trust_remote_code=True
19
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def generate_response(message):
22
  """Process user input and generate response"""
 
24
  return "Please enter a question."
25
 
26
  try:
27
+ prompt = f"### Instruction: {message}\n### Response:"
 
 
 
 
28
 
29
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
30
 
 
31
  with torch.no_grad():
32
  outputs = model.generate(
33
  inputs.input_ids,
34
+ max_new_tokens=256,
35
  temperature=0.7,
36
  do_sample=True,
37
  top_p=0.9,
38
  pad_token_id=tokenizer.pad_token_id,
39
+ eos_token_id=tokenizer.eos_token_id
 
40
  )
41
 
42
  response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
43
  return response.strip()
44
 
45
  except Exception as e:
46
+ return f"Error generating response: {str(e)}"
47
 
 
48
  interface = gr.Interface(
49
+ fn=generate_response, # Connect function to interface
50
  inputs=gr.Textbox(label="Input", placeholder="Enter programming question...", lines=3),
51
  outputs=gr.Textbox(label="Output", lines=10),
52
  title="LiveCoder API",
 
54
  allow_flagging="never"
55
  )
56
 
 
 
 
57
  # Launch application
58
+ interface.launch(server_name="0.0.0.0", server_port=7860, share=False)
59
+
60
+ print(f"API Endpoint: {API_ENDPOINT}")