DeekshithN05 commited on
Commit
56f4708
·
verified ·
1 Parent(s): dc9dec3

Upload 5 files

Browse files
Files changed (5) hide show
  1. .gitattributes +35 -0
  2. .gitignore +174 -0
  3. agent.py +146 -0
  4. app.py +232 -0
  5. requirements.txt +20 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
agent.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LangGraph Agent"""
2
+
3
+ import os
4
+ import json
5
+ import getpass
6
+ from dotenv import load_dotenv
7
+
8
+ from langgraph.graph import START, StateGraph, MessagesState
9
+ from langgraph.prebuilt import tools_condition, ToolNode
10
+
11
+ from langchain_core.messages import SystemMessage, HumanMessage
12
+ from langchain_core.vectorstores import InMemoryVectorStore
13
+ from langchain_core.documents import Document
14
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
15
+ from langchain_ollama import ChatOllama
16
+
17
+ from tools.math.multiply import multiply
18
+ from tools.math.add import add
19
+ from tools.math.subtract import subtract
20
+ from tools.math.divide import divide
21
+ from tools.math.modulus import modulus
22
+ from tools.math.power import power
23
+ from tools.math.square_root import square_root
24
+
25
+ from tools.search.arxiv_search import arxiv_search
26
+ from tools.search.web_search import web_search
27
+ from tools.search.wiki_search import wiki_search
28
+
29
+ from tools.file.analyze_csv_file import analyze_csv_file
30
+ from tools.file.analyze_excel_file import analyze_excel_file
31
+ from tools.file.analyze_image import analyze_image
32
+ from tools.file.download_file_from_url import download_file_from_url
33
+ from tools.file.save_content_to_file import save_content_to_file
34
+
35
+ # --- Load environment variables ---
36
+ load_dotenv()
37
+
38
+ # --- Constants ---
39
+ DATASET_PATH = "dataset/metadata.jsonl"
40
+ SYSTEM_PROMPT_PATH = "prompts/system_prompt.txt"
41
+ TOOLS = [
42
+ add,
43
+ subtract,
44
+ multiply,
45
+ divide,
46
+ modulus,
47
+ power,
48
+ square_root,
49
+ web_search,
50
+ wiki_search,
51
+ arxiv_search,
52
+ analyze_csv_file,
53
+ analyze_excel_file,
54
+ analyze_image,
55
+ download_file_from_url,
56
+ save_content_to_file,
57
+ ]
58
+
59
+
60
+ def load_vector_store() -> InMemoryVectorStore:
61
+ """Load vector store with dataset examples."""
62
+ if not os.path.exists(DATASET_PATH):
63
+ raise FileNotFoundError(f"Dataset not found at {DATASET_PATH}.")
64
+ embeddings = OpenAIEmbeddings()
65
+ vector_store = InMemoryVectorStore(embeddings)
66
+ documents = []
67
+ with open(DATASET_PATH, "r", encoding="utf-8") as f:
68
+ for line in f:
69
+ entry = json.loads(line)
70
+ content = (
71
+ f"Question: {entry['Question']}\nFinal answer: {entry['Final answer']}"
72
+ )
73
+ doc = Document(page_content=content, metadata={"source": entry["task_id"]})
74
+ documents.append(doc)
75
+ vector_store.add_documents(documents)
76
+ return vector_store
77
+
78
+
79
+ def get_llm(provider: str):
80
+ """Get LLM instance based on provider."""
81
+ if provider == "openai":
82
+ if not os.environ.get("OPENAI_API_KEY"):
83
+ os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter OpenAI API key: ")
84
+ return ChatOpenAI(model="gpt-4.1", temperature=0)
85
+ elif provider == "ollama":
86
+ return ChatOllama(model="llama3.2", temperature=0)
87
+ else:
88
+ raise ValueError("Unsupported provider: choose 'openai' or 'ollama'")
89
+
90
+
91
+ def load_system_prompt() -> SystemMessage:
92
+ """Load system prompt from file."""
93
+ if not os.path.exists(SYSTEM_PROMPT_PATH):
94
+ raise FileNotFoundError(f"System prompt not found at {SYSTEM_PROMPT_PATH}.")
95
+ with open(SYSTEM_PROMPT_PATH, "r", encoding="utf-8") as f:
96
+ return SystemMessage(content=f.read())
97
+
98
+
99
+ def build_graph(provider: str = "openai"):
100
+ """Build and compile the LangGraph agent."""
101
+ llm = get_llm(provider).bind_tools(TOOLS)
102
+ vector_store = load_vector_store()
103
+ system_msg = load_system_prompt()
104
+
105
+ def retriever(state: MessagesState):
106
+ """Retrieve similar examples based on user query."""
107
+ query = state["messages"][0].content
108
+ similar = vector_store.similarity_search(query, k=3)
109
+ if similar:
110
+ refs = "\n\n".join(doc.page_content for doc in similar)
111
+ example_msg = HumanMessage(content=f"Here are similar examples:\n\n{refs}")
112
+ return {"messages": [system_msg] + state["messages"] + [example_msg]}
113
+ return {"messages": [system_msg] + state["messages"]}
114
+
115
+ def assistant(state: MessagesState):
116
+ """Call LLM to generate next message."""
117
+ response = llm.invoke(state["messages"])
118
+ return {"messages": [response]}
119
+
120
+ # --- Build graph ---
121
+ graph = StateGraph(MessagesState)
122
+ graph.add_node("retriever", retriever)
123
+ graph.add_node("assistant", assistant)
124
+ graph.add_node("tools", ToolNode(TOOLS))
125
+
126
+ graph.add_edge(START, "retriever")
127
+ graph.add_edge("retriever", "assistant")
128
+ graph.add_conditional_edges("assistant", tools_condition)
129
+ graph.add_edge("tools", "assistant")
130
+
131
+ return graph.compile()
132
+
133
+
134
+ def run_agent(query: str, provider: str = "openai"):
135
+ """Run the agent on a given query."""
136
+ graph = build_graph(provider)
137
+ messages = [HumanMessage(content=query)]
138
+ result = graph.invoke({"messages": messages})
139
+ for msg in result["messages"]:
140
+ msg.pretty_print()
141
+
142
+
143
+ # --- Run locally ---
144
+ if __name__ == "__main__":
145
+ user_query = input("Enter your question: ")
146
+ run_agent(user_query)
app.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import re
4
+ import requests
5
+ import pandas as pd
6
+ from agent import build_graph
7
+ from langchain_core.messages import HumanMessage
8
+
9
+
10
+ # (Keep Constants as is)
11
+ # --- Constants ---
12
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
+
14
+
15
+ # --- Basic Agent Definition ---
16
+ class BasicAgent:
17
+ def __init__(self):
18
+ print("BasicAgent initialized.")
19
+ self.graph = build_graph()
20
+
21
+ def __call__(self, question: str) -> str:
22
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
23
+ # Wrap the question in a HumanMessage from langchain_core
24
+ messages = [HumanMessage(content=question)]
25
+ messages = self.graph.invoke({"messages": messages})
26
+ answer = messages["messages"][-1].content
27
+ # Use regex to extract the answer after FINAL ANSWER:
28
+ match = re.search(r"FINAL ANSWER:\s*(.+)", answer, re.IGNORECASE)
29
+ if match:
30
+ final_answer = match.group(1).strip()
31
+ # Optionally: strip trailing explanations (e.g., if comma-separated and extra stuff is appended)
32
+ final_answer = (
33
+ final_answer.split("\n")[0].split(",")[0] if final_answer else ""
34
+ )
35
+ return final_answer
36
+ return answer
37
+
38
+
39
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
40
+ """
41
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
42
+ and displays the results.
43
+ """
44
+ space_id = os.getenv("SPACE_ID")
45
+
46
+ if profile:
47
+ username = f"{profile.username}"
48
+ print(f"User logged in: {username}")
49
+ else:
50
+ print("User not logged in.")
51
+ return "Please Login to Hugging Face with the button.", None
52
+
53
+ api_url = DEFAULT_API_URL
54
+ questions_url = f"{api_url}/questions"
55
+ submit_url = f"{api_url}/submit"
56
+
57
+ # 1. Instantiate Agent ( modify this part to create your agent)
58
+ try:
59
+ agent = BasicAgent()
60
+ except Exception as e:
61
+ print(f"Error instantiating agent: {e}")
62
+ return f"Error initializing agent: {e}", None
63
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
64
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
65
+
66
+ # 2. Fetch Questions
67
+ print(f"Fetching questions from: {questions_url}")
68
+ try:
69
+ response = requests.get(questions_url, timeout=15)
70
+ response.raise_for_status()
71
+ questions_data = response.json()
72
+ if not questions_data:
73
+ print("Fetched questions list is empty.")
74
+ return "Fetched questions list is empty or invalid format.", None
75
+ print(f"Fetched {len(questions_data)} questions.")
76
+ except requests.exceptions.RequestException as e:
77
+ print(f"Error fetching questions: {e}")
78
+ return f"Error fetching questions: {e}", None
79
+ except requests.exceptions.JSONDecodeError as e:
80
+ print(f"Error decoding JSON response from questions endpoint: {e}")
81
+ print(f"Response text: {response.text[:500]}")
82
+ return f"Error decoding server response for questions: {e}", None
83
+ except Exception as e:
84
+ print(f"An unexpected error occurred fetching questions: {e}")
85
+ return f"An unexpected error occurred fetching questions: {e}", None
86
+
87
+ # 3. Run your Agent
88
+ results_log = []
89
+ answers_payload = []
90
+ print(f"Running agent on {len(questions_data)} questions...")
91
+ for item in questions_data:
92
+ task_id = item.get("task_id")
93
+ question_text = item.get("question")
94
+ if not task_id or question_text is None:
95
+ print(f"Skipping item with missing task_id or question: {item}")
96
+ continue
97
+ try:
98
+ submitted_answer = agent(question_text)
99
+ answers_payload.append(
100
+ {"task_id": task_id, "submitted_answer": submitted_answer}
101
+ )
102
+ results_log.append(
103
+ {
104
+ "Task ID": task_id,
105
+ "Question": question_text,
106
+ "Submitted Answer": submitted_answer,
107
+ }
108
+ )
109
+ except Exception as e:
110
+ print(f"Error running agent on task {task_id}: {e}")
111
+ results_log.append(
112
+ {
113
+ "Task ID": task_id,
114
+ "Question": question_text,
115
+ "Submitted Answer": f"AGENT ERROR: {e}",
116
+ }
117
+ )
118
+
119
+ if not answers_payload:
120
+ print("Agent did not produce any answers to submit.")
121
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
122
+
123
+ # 4. Prepare Submission
124
+ submission_data = {
125
+ "username": username.strip(),
126
+ "agent_code": agent_code,
127
+ "answers": answers_payload,
128
+ }
129
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
130
+ print(status_update)
131
+
132
+ # 5. Submit
133
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
134
+ try:
135
+ response = requests.post(submit_url, json=submission_data, timeout=60)
136
+ response.raise_for_status()
137
+ result_data = response.json()
138
+ final_status = (
139
+ f"Submission Successful!\n"
140
+ f"User: {result_data.get('username')}\n"
141
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
142
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
143
+ f"Message: {result_data.get('message', 'No message received.')}"
144
+ )
145
+ print("Submission successful.")
146
+ results_df = pd.DataFrame(results_log)
147
+ return final_status, results_df
148
+ except requests.exceptions.HTTPError as e:
149
+ error_detail = f"Server responded with status {e.response.status_code}."
150
+ try:
151
+ error_json = e.response.json()
152
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
153
+ except requests.exceptions.JSONDecodeError:
154
+ error_detail += f" Response: {e.response.text[:500]}"
155
+ status_message = f"Submission Failed: {error_detail}"
156
+ print(status_message)
157
+ results_df = pd.DataFrame(results_log)
158
+ return status_message, results_df
159
+ except requests.exceptions.Timeout:
160
+ status_message = "Submission Failed: The request timed out."
161
+ print(status_message)
162
+ results_df = pd.DataFrame(results_log)
163
+ return status_message, results_df
164
+ except requests.exceptions.RequestException as e:
165
+ status_message = f"Submission Failed: Network error - {e}"
166
+ print(status_message)
167
+ results_df = pd.DataFrame(results_log)
168
+ return status_message, results_df
169
+ except Exception as e:
170
+ status_message = f"An unexpected error occurred during submission: {e}"
171
+ print(status_message)
172
+ results_df = pd.DataFrame(results_log)
173
+ return status_message, results_df
174
+
175
+
176
+ # --- Build Gradio Interface using Blocks ---
177
+ with gr.Blocks() as demo:
178
+ gr.Markdown("# Basic Agent Evaluation Runner")
179
+ gr.Markdown(
180
+ """
181
+ **Instructions:**
182
+
183
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
184
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
185
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
186
+
187
+ ---
188
+ **Disclaimers:**
189
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
190
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
191
+ """
192
+ )
193
+
194
+ gr.LoginButton()
195
+
196
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
197
+
198
+ status_output = gr.Textbox(
199
+ label="Run Status / Submission Result", lines=5, interactive=False
200
+ )
201
+ # Removed max_rows=10 from DataFrame constructor
202
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
203
+
204
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
205
+
206
+ if __name__ == "__main__":
207
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
208
+ # Check for SPACE_HOST and SPACE_ID at startup for information
209
+ space_host_startup = os.getenv("SPACE_HOST")
210
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
211
+
212
+ if space_host_startup:
213
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
214
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
215
+ else:
216
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
217
+
218
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
219
+ print(f"✅ SPACE_ID found: {space_id_startup}")
220
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
221
+ print(
222
+ f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
223
+ )
224
+ else:
225
+ print(
226
+ "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
227
+ )
228
+
229
+ print("-" * (60 + len(" App Starting ")) + "\n")
230
+
231
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
232
+ demo.launch(debug=True, share=False)
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ requests
3
+ langchain
4
+ langchain-community
5
+ langchain_openai
6
+ langchain-ollama
7
+ langchain-core
8
+ langchain-google-genai
9
+ langchain-huggingface
10
+ langchain-groq
11
+ langchain-tavily
12
+ langchain-chroma
13
+ langgraph
14
+ huggingface_hub
15
+ arxiv
16
+ pymupdf
17
+ wikipedia
18
+ pgvector
19
+ python-dotenv
20
+ pinecone-client