Spaces:
Sleeping
Sleeping
Add final formatting step
Browse files
app.py
CHANGED
|
@@ -3,7 +3,8 @@ import gradio as gr
|
|
| 3 |
import requests
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
| 6 |
-
from llama_index.core.
|
|
|
|
| 7 |
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
|
| 8 |
from tools import (
|
| 9 |
APIProcessor,
|
|
@@ -11,8 +12,9 @@ from tools import (
|
|
| 11 |
transcribe_image_from_link,
|
| 12 |
transcribe_webpage,
|
| 13 |
)
|
|
|
|
| 14 |
from search import GoogleSearch
|
| 15 |
-
|
| 16 |
|
| 17 |
# (Keep Constants as is)
|
| 18 |
# --- Constants ---
|
|
@@ -30,13 +32,13 @@ class BasicAgent:
|
|
| 30 |
self.llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
|
| 31 |
print("BasicAgent initialized.")
|
| 32 |
|
| 33 |
-
def __call__(self, question: str, task_id: str, file_name: str) -> str:
|
| 34 |
google_search = GoogleSearch().google_search
|
| 35 |
google_image_search = GoogleSearch().google_image_search
|
| 36 |
|
| 37 |
get_and_process_question_attachment = APIProcessor(
|
| 38 |
file_url=DEFAULT_API_URL + "/files/" + task_id, file_name=file_name
|
| 39 |
-
).get_and_process_attachment
|
| 40 |
|
| 41 |
agent = AgentWorkflow.from_tools_or_functions(
|
| 42 |
[
|
|
@@ -50,10 +52,22 @@ class BasicAgent:
|
|
| 50 |
llm=self.llm,
|
| 51 |
system_prompt=SYSTEM_PROMPT,
|
| 52 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
-
response =
|
| 55 |
|
| 56 |
-
|
|
|
|
|
|
|
| 57 |
|
| 58 |
|
| 59 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
@@ -226,6 +240,36 @@ with gr.Blocks() as demo:
|
|
| 226 |
|
| 227 |
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
| 228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
if __name__ == "__main__":
|
| 230 |
print("\n" + "-" * 30 + " App Starting " + "-" * 30)
|
| 231 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
|
@@ -253,3 +297,5 @@ if __name__ == "__main__":
|
|
| 253 |
|
| 254 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
| 255 |
demo.launch(debug=True, share=False)
|
|
|
|
|
|
|
|
|
| 3 |
import requests
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
| 6 |
+
from llama_index.core.workflow import Context
|
| 7 |
+
from llama_index.core.agent.workflow import AgentWorkflow, ToolCallResult, AgentStream
|
| 8 |
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
|
| 9 |
from tools import (
|
| 10 |
APIProcessor,
|
|
|
|
| 12 |
transcribe_image_from_link,
|
| 13 |
transcribe_webpage,
|
| 14 |
)
|
| 15 |
+
from utils import format_final_answer
|
| 16 |
from search import GoogleSearch
|
| 17 |
+
import asyncio
|
| 18 |
|
| 19 |
# (Keep Constants as is)
|
| 20 |
# --- Constants ---
|
|
|
|
| 32 |
self.llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
|
| 33 |
print("BasicAgent initialized.")
|
| 34 |
|
| 35 |
+
async def __call__(self, question: str, task_id: str, file_name: str) -> str:
|
| 36 |
google_search = GoogleSearch().google_search
|
| 37 |
google_image_search = GoogleSearch().google_image_search
|
| 38 |
|
| 39 |
get_and_process_question_attachment = APIProcessor(
|
| 40 |
file_url=DEFAULT_API_URL + "/files/" + task_id, file_name=file_name
|
| 41 |
+
).get_and_process_attachment
|
| 42 |
|
| 43 |
agent = AgentWorkflow.from_tools_or_functions(
|
| 44 |
[
|
|
|
|
| 52 |
llm=self.llm,
|
| 53 |
system_prompt=SYSTEM_PROMPT,
|
| 54 |
)
|
| 55 |
+
ctx = Context(agent)
|
| 56 |
+
handler = agent.run(question, ctx=ctx)
|
| 57 |
+
async for ev in handler.stream_events():
|
| 58 |
+
if isinstance(ev, ToolCallResult):
|
| 59 |
+
print("")
|
| 60 |
+
print(
|
| 61 |
+
"Called tool: ", ev.tool_name, ev.tool_kwargs, "=>", ev.tool_output
|
| 62 |
+
)
|
| 63 |
+
elif isinstance(ev, AgentStream): # showing the thought process
|
| 64 |
+
print(ev.delta, end="", flush=True)
|
| 65 |
|
| 66 |
+
response = await handler
|
| 67 |
|
| 68 |
+
##format final answer
|
| 69 |
+
final_answer = format_final_answer(question, response)
|
| 70 |
+
return final_answer
|
| 71 |
|
| 72 |
|
| 73 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
|
| 240 |
|
| 241 |
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
| 242 |
|
| 243 |
+
|
| 244 |
+
# async def main():
|
| 245 |
+
# agent = BasicAgent()
|
| 246 |
+
# api_url = DEFAULT_API_URL
|
| 247 |
+
# questions_url = f"{api_url}/questions"
|
| 248 |
+
# print(f"Fetching questions from: {questions_url}")
|
| 249 |
+
|
| 250 |
+
# response = requests.get(questions_url, timeout=15)
|
| 251 |
+
# response.raise_for_status()
|
| 252 |
+
# questions_data = response.json()
|
| 253 |
+
|
| 254 |
+
# # 3. Run your Agent
|
| 255 |
+
# results_log = []
|
| 256 |
+
# answers_payload = []
|
| 257 |
+
# print(f"Running agent on {len(questions_data)} questions...")
|
| 258 |
+
# item = questions_data[0]
|
| 259 |
+
# task_id = item.get("task_id")
|
| 260 |
+
# question_text = item.get("question")
|
| 261 |
+
# file_name = item.get("file_name")
|
| 262 |
+
# submitted_answer = await agent(question_text, task_id, file_name)
|
| 263 |
+
# answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 264 |
+
# results_log.append(
|
| 265 |
+
# {
|
| 266 |
+
# "Task ID": task_id,
|
| 267 |
+
# "Question": question_text,
|
| 268 |
+
# "Submitted Answer": submitted_answer,
|
| 269 |
+
# }
|
| 270 |
+
# )
|
| 271 |
+
|
| 272 |
+
|
| 273 |
if __name__ == "__main__":
|
| 274 |
print("\n" + "-" * 30 + " App Starting " + "-" * 30)
|
| 275 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
|
|
|
| 297 |
|
| 298 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
| 299 |
demo.launch(debug=True, share=False)
|
| 300 |
+
|
| 301 |
+
# asyncio.run(main())
|
requirements.txt
CHANGED
|
@@ -7,6 +7,6 @@ pandas
|
|
| 7 |
aiohttp
|
| 8 |
Pillow
|
| 9 |
yt-dlp
|
| 10 |
-
|
| 11 |
llama-index-utils-workflow
|
| 12 |
llama-index-llms-huggingface-api
|
|
|
|
| 7 |
aiohttp
|
| 8 |
Pillow
|
| 9 |
yt-dlp
|
| 10 |
+
html2text
|
| 11 |
llama-index-utils-workflow
|
| 12 |
llama-index-llms-huggingface-api
|
tools.py
CHANGED
|
@@ -8,8 +8,9 @@ import os
|
|
| 8 |
import io
|
| 9 |
import yt_dlp
|
| 10 |
import re
|
| 11 |
-
|
| 12 |
from requests.exceptions import RequestException
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
def transcribe_image_from_link(image_link: str) -> str:
|
|
@@ -49,6 +50,13 @@ def transcribe_image_from_link(image_link: str) -> str:
|
|
| 49 |
return transcribed_text
|
| 50 |
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
def transcribe_webpage(website_url: str) -> str:
|
| 53 |
"""Visits website url and returns markdown of contents"""
|
| 54 |
try:
|
|
@@ -56,21 +64,25 @@ def transcribe_webpage(website_url: str) -> str:
|
|
| 56 |
response = requests.get(website_url, timeout=20)
|
| 57 |
response.raise_for_status() # Raise an exception for bad status codes
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
| 65 |
|
| 66 |
# Remove multiple line breaks
|
| 67 |
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
| 68 |
|
| 69 |
-
|
| 70 |
-
f.write("\n\nMarkdown content:\n\n")
|
| 71 |
-
f.write(markdown_content)
|
| 72 |
-
|
| 73 |
-
return markdown_content
|
| 74 |
|
| 75 |
except requests.exceptions.Timeout:
|
| 76 |
return "The request timed out. Please try again later or check the URL."
|
|
@@ -218,11 +230,15 @@ if __name__ == "__main__":
|
|
| 218 |
# return "https://agents-course-unit4-scoring.hf.space" + "/files/" + task_id
|
| 219 |
|
| 220 |
# audio_task_processor = APIProcessor(
|
| 221 |
-
# file_name="
|
| 222 |
-
# file_url=get_file_api_url("
|
| 223 |
# )
|
| 224 |
|
| 225 |
# response = audio_task_processor.get_and_process_attachment()
|
| 226 |
# print(response)
|
| 227 |
-
result = parse_youtube_video("https://www.youtube.com/watch?v=1htKBjuUWec")
|
| 228 |
-
print(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
import io
|
| 9 |
import yt_dlp
|
| 10 |
import re
|
| 11 |
+
import html2text
|
| 12 |
from requests.exceptions import RequestException
|
| 13 |
+
from bs4 import BeautifulSoup
|
| 14 |
|
| 15 |
|
| 16 |
def transcribe_image_from_link(image_link: str) -> str:
|
|
|
|
| 50 |
return transcribed_text
|
| 51 |
|
| 52 |
|
| 53 |
+
def truncate_content(content: str, max_length: int = 10000) -> str:
|
| 54 |
+
if len(content) <= max_length:
|
| 55 |
+
return content
|
| 56 |
+
else:
|
| 57 |
+
return content[:max_length]
|
| 58 |
+
|
| 59 |
+
|
| 60 |
def transcribe_webpage(website_url: str) -> str:
|
| 61 |
"""Visits website url and returns markdown of contents"""
|
| 62 |
try:
|
|
|
|
| 64 |
response = requests.get(website_url, timeout=20)
|
| 65 |
response.raise_for_status() # Raise an exception for bad status codes
|
| 66 |
|
| 67 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 68 |
+
content_div = soup.find("div", id="mw-content-text")
|
| 69 |
+
|
| 70 |
+
if not content_div:
|
| 71 |
+
return "Main content not found."
|
| 72 |
+
|
| 73 |
+
# Only extract <p> and <table> tags
|
| 74 |
+
elements = content_div.find_all(["p", "table"])
|
| 75 |
|
| 76 |
+
# Join selected HTML chunks
|
| 77 |
+
html_subset = "".join(str(el) for el in elements)
|
| 78 |
+
|
| 79 |
+
# Convert the HTML content to Markdown
|
| 80 |
+
markdown_content = html2text.HTML2Text().handle(str(html_subset))
|
| 81 |
|
| 82 |
# Remove multiple line breaks
|
| 83 |
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
| 84 |
|
| 85 |
+
return truncate_content(markdown_content, 20000)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
except requests.exceptions.Timeout:
|
| 88 |
return "The request timed out. Please try again later or check the URL."
|
|
|
|
| 230 |
# return "https://agents-course-unit4-scoring.hf.space" + "/files/" + task_id
|
| 231 |
|
| 232 |
# audio_task_processor = APIProcessor(
|
| 233 |
+
# file_name="",
|
| 234 |
+
# file_url=get_file_api_url("8e867cd7-cff9-4e6c-867a-ff5ddc2550be"),
|
| 235 |
# )
|
| 236 |
|
| 237 |
# response = audio_task_processor.get_and_process_attachment()
|
| 238 |
# print(response)
|
| 239 |
+
# result = parse_youtube_video("https://www.youtube.com/watch?v=1htKBjuUWec")
|
| 240 |
+
# print(result)
|
| 241 |
+
text = transcribe_webpage(
|
| 242 |
+
"https://en.wikipedia.org/wiki/Mercedes_Sosa#Studio_albums"
|
| 243 |
+
)
|
| 244 |
+
print(text)
|
utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from PIL import Image
|
| 2 |
from io import BytesIO
|
| 3 |
import base64
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
def encode_image_in_base64(image: bytes):
|
|
@@ -50,3 +51,27 @@ def process_image_for_gpt(image_bytes: bytes) -> str:
|
|
| 50 |
image_bytes = replace_transparent_pixels(image_bytes)
|
| 51 |
base64_image = encode_image_in_base64(image_bytes)
|
| 52 |
return base64_image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from PIL import Image
|
| 2 |
from io import BytesIO
|
| 3 |
import base64
|
| 4 |
+
from openai import OpenAI
|
| 5 |
|
| 6 |
|
| 7 |
def encode_image_in_base64(image: bytes):
|
|
|
|
| 51 |
image_bytes = replace_transparent_pixels(image_bytes)
|
| 52 |
base64_image = encode_image_in_base64(image_bytes)
|
| 53 |
return base64_image
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def format_final_answer(question: str, answer: str) -> str:
|
| 57 |
+
"""Always call to format final answer"""
|
| 58 |
+
client = OpenAI()
|
| 59 |
+
response = client.chat.completions.create(
|
| 60 |
+
model="gpt-4o",
|
| 61 |
+
messages=[
|
| 62 |
+
{
|
| 63 |
+
"role": "system",
|
| 64 |
+
"content": """ You're tasked with reformatting an answer from an unreliable AI into the expected format as per their instructions.
|
| 65 |
+
<instructions>
|
| 66 |
+
You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
| 67 |
+
</instructions>
|
| 68 |
+
<question>"""
|
| 69 |
+
+ question
|
| 70 |
+
+ """
|
| 71 |
+
Now here is their answer. Only reply with the corrected formatting
|
| 72 |
+
""",
|
| 73 |
+
},
|
| 74 |
+
{"role": "user", "content": str(answer)},
|
| 75 |
+
],
|
| 76 |
+
)
|
| 77 |
+
return response.choices[0].message.content.strip()
|