Umer797 commited on
Commit
5a5c9b9
·
verified ·
1 Parent(s): 2cac8b2

Update llm_node.py

Browse files
Files changed (1) hide show
  1. llm_node.py +3 -9
llm_node.py CHANGED
@@ -2,13 +2,10 @@ import os
2
  from huggingface_hub import InferenceClient
3
 
4
  def llm_node(question, search_result):
5
- # Initialize the client (no repo_id here!)
6
  client = InferenceClient(token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
7
 
8
- # Define the model you want to use
9
- model_id = "HuggingFaceH4/zephyr-7b-beta" # You can swap this with e.g., mistralai/Mistral-7B-Instruct-v0.2
10
 
11
- # Craft the prompt carefully
12
  prompt = f"""You are solving a GAIA benchmark evaluation question.
13
 
14
  Here’s the question:
@@ -24,15 +21,12 @@ Here’s retrieved information:
24
 
25
  Your answer:"""
26
 
27
- # Call the model (pass model ID here)
28
  response = client.text_generation(
29
  model=model_id,
30
  prompt=prompt,
31
- max_new_tokens=500,
32
  temperature=0.1,
33
- top_p=0.95,
34
- repetition_penalty=1.1
35
  )
36
 
37
- # Clean up response text
38
  return response.strip()
 
2
  from huggingface_hub import InferenceClient
3
 
4
  def llm_node(question, search_result):
 
5
  client = InferenceClient(token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
6
 
7
+ model_id = "google/flan-t5-small" # Small free-tier model
 
8
 
 
9
  prompt = f"""You are solving a GAIA benchmark evaluation question.
10
 
11
  Here’s the question:
 
21
 
22
  Your answer:"""
23
 
 
24
  response = client.text_generation(
25
  model=model_id,
26
  prompt=prompt,
27
+ max_new_tokens=200, # smaller due to model limits
28
  temperature=0.1,
29
+ top_p=0.9
 
30
  )
31
 
 
32
  return response.strip()