Spaces:

LLM-course
/

text-adventure-template

Running

App Files Files Community

nathanael-fijalkow commited on 14 days ago

Commit

615a63b

1 Parent(s): 7a36b3c

Updated agent with local option

Browse files

Files changed (1) hide show

agent.py +33 -7

agent.py CHANGED Viewed

@@ -35,6 +35,10 @@ from huggingface_hub import InferenceClient
 # Load environment variables
 load_dotenv()
 # =============================================================================
 # LLM Configuration - DO NOT MODIFY
 # =============================================================================
@@ -42,12 +46,25 @@ load_dotenv()
 # Model to use (fixed for fair evaluation)
 LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
-# Initialize the LLM client (uses HF_TOKEN from environment)
-_hf_token = os.getenv("HF_TOKEN")
-if not _hf_token:
-    raise ValueError("HF_TOKEN not found. Set it in your .env file.")
-LLM_CLIENT = InferenceClient(token=_hf_token)
 def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
@@ -74,7 +91,16 @@ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300)
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": prompt},
     ]
     response = LLM_CLIENT.chat.completions.create(
         model=LLM_MODEL,
         messages=messages,
@@ -82,7 +108,7 @@ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300)
         max_tokens=max_tokens,
         seed=seed,
     )
     return response.choices[0].message.content

 # Load environment variables
 load_dotenv()
+# Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model
+USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes")
+LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")
 # =============================================================================
 # LLM Configuration - DO NOT MODIFY
 # =============================================================================
 # Model to use (fixed for fair evaluation)
 LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
+# Initialize the LLM client based on mode
+_local_pipeline = None
+if USE_LOCAL_MODEL:
+    import torch
+    from transformers import pipeline as _hf_pipeline
+    _local_pipeline = _hf_pipeline(
+        "text-generation",
+        model=LOCAL_MODEL_ID,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+    )
+    LLM_CLIENT = None
+else:
+    _hf_token = os.getenv("HF_TOKEN")
+    if not _hf_token:
+        raise ValueError("HF_TOKEN not found. Set it in your .env file.")
+    LLM_CLIENT = InferenceClient(token=_hf_token)
 def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": prompt},
     ]
+    if USE_LOCAL_MODEL and _local_pipeline is not None:
+        outputs = _local_pipeline(
+            messages,
+            max_new_tokens=max_tokens,
+            temperature=0.0001,  # Near-deterministic (0.0 unsupported by some backends)
+            do_sample=True,
+        )
+        return outputs[0]["generated_text"][-1]["content"]
     response = LLM_CLIENT.chat.completions.create(
         model=LLM_MODEL,
         messages=messages,
         max_tokens=max_tokens,
         seed=seed,
     )
     return response.choices[0].message.content