Spaces:
Sleeping
Sleeping
Update llm_node.py
Browse files- llm_node.py +3 -9
llm_node.py
CHANGED
|
@@ -2,13 +2,10 @@ import os
|
|
| 2 |
from huggingface_hub import InferenceClient
|
| 3 |
|
| 4 |
def llm_node(question, search_result):
|
| 5 |
-
# Initialize the client (no repo_id here!)
|
| 6 |
client = InferenceClient(token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
|
| 7 |
|
| 8 |
-
|
| 9 |
-
model_id = "HuggingFaceH4/zephyr-7b-beta" # You can swap this with e.g., mistralai/Mistral-7B-Instruct-v0.2
|
| 10 |
|
| 11 |
-
# Craft the prompt carefully
|
| 12 |
prompt = f"""You are solving a GAIA benchmark evaluation question.
|
| 13 |
|
| 14 |
Here’s the question:
|
|
@@ -24,15 +21,12 @@ Here’s retrieved information:
|
|
| 24 |
|
| 25 |
Your answer:"""
|
| 26 |
|
| 27 |
-
# Call the model (pass model ID here)
|
| 28 |
response = client.text_generation(
|
| 29 |
model=model_id,
|
| 30 |
prompt=prompt,
|
| 31 |
-
max_new_tokens=
|
| 32 |
temperature=0.1,
|
| 33 |
-
top_p=0.
|
| 34 |
-
repetition_penalty=1.1
|
| 35 |
)
|
| 36 |
|
| 37 |
-
# Clean up response text
|
| 38 |
return response.strip()
|
|
|
|
| 2 |
from huggingface_hub import InferenceClient
|
| 3 |
|
| 4 |
def llm_node(question, search_result):
|
|
|
|
| 5 |
client = InferenceClient(token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
|
| 6 |
|
| 7 |
+
model_id = "google/flan-t5-small" # ✅ Small free-tier model
|
|
|
|
| 8 |
|
|
|
|
| 9 |
prompt = f"""You are solving a GAIA benchmark evaluation question.
|
| 10 |
|
| 11 |
Here’s the question:
|
|
|
|
| 21 |
|
| 22 |
Your answer:"""
|
| 23 |
|
|
|
|
| 24 |
response = client.text_generation(
|
| 25 |
model=model_id,
|
| 26 |
prompt=prompt,
|
| 27 |
+
max_new_tokens=200, # smaller due to model limits
|
| 28 |
temperature=0.1,
|
| 29 |
+
top_p=0.9
|
|
|
|
| 30 |
)
|
| 31 |
|
|
|
|
| 32 |
return response.strip()
|