Nav772 commited on
Commit
13fdc21
·
verified ·
1 Parent(s): c248357

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -29
app.py CHANGED
@@ -10,52 +10,34 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
14
 
15
  class BasicAgent:
16
  def __init__(self):
17
- print("Mistral Agent loading on CPU...")
18
 
19
- model_id = "mistralai/Mistral-7B-Instruct-v0.1"
20
-
21
- # Load tokenizer and model
22
- self.tokenizer = AutoTokenizer.from_pretrained(model_id)
23
- self.model = AutoModelForCausalLM.from_pretrained(
24
- model_id,
25
- device_map="auto", # Will default to CPU
26
- low_cpu_mem_usage=True, # Helps a bit
27
- torch_dtype="auto"
28
- )
29
-
30
- # Create pipeline (CPU-only)
31
- self.pipeline = pipeline(
32
  "text-generation",
33
- model=self.model,
34
- tokenizer=self.tokenizer,
35
- device=-1 # force CPU
36
  )
37
 
38
  def __call__(self, question: str) -> str:
39
- print(f"Agent received question: {question[:50]}...")
40
 
41
- try:
42
- # Format with instruction template
43
- prompt = f"<s>[INST] {question.strip()} [/INST]"
44
 
45
- output = self.pipeline(
 
46
  prompt,
47
  max_new_tokens=256,
48
- do_sample=True,
49
  temperature=0.7,
50
  top_p=0.95
51
  )
52
-
53
  full_response = output[0]["generated_text"]
54
- answer = full_response.split("[/INST]")[-1].strip()
55
-
56
- return answer
57
  except Exception as e:
58
- print(f"❌ Mistral error: {e}")
59
  return f"❌ Model Error: {str(e)}"
60
 
61
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
10
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
+ from transformers import pipeline
14
 
15
  class BasicAgent:
16
  def __init__(self):
17
+ print("Loading Mistral-7B-Instruct-v0.1 using pipeline...")
18
 
19
+ self.pipe = pipeline(
 
 
 
 
 
 
 
 
 
 
 
 
20
  "text-generation",
21
+ model="mistralai/Mistral-7B-Instruct-v0.1",
22
+ device=-1 # CPU only
 
23
  )
24
 
25
  def __call__(self, question: str) -> str:
26
+ print(f"Received question: {question[:50]}...")
27
 
28
+ prompt = f"<s>[INST] {question.strip()} [/INST]"
 
 
29
 
30
+ try:
31
+ output = self.pipe(
32
  prompt,
33
  max_new_tokens=256,
 
34
  temperature=0.7,
35
  top_p=0.95
36
  )
 
37
  full_response = output[0]["generated_text"]
38
+ return full_response.split("[/INST]")[-1].strip()
 
 
39
  except Exception as e:
40
+ print(f"❌ Inference Error: {e}")
41
  return f"❌ Model Error: {str(e)}"
42
 
43
  def run_and_submit_all( profile: gr.OAuthProfile | None):