Hetfield08 commited on
Commit
0817dff
·
verified ·
1 Parent(s): 0541004

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -50
app.py CHANGED
@@ -25,78 +25,78 @@ def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return
25
 
26
  #from datasets import load_dataset
27
  #ds = load_dataset("nfliu/decontextualization")
28
- def get_page_content(url):
29
  """
30
  Obtiene el contenido de una página web
31
  """
 
 
 
32
  try:
33
- # Añadir headers para evitar bloqueos
34
  headers = {
35
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
36
  }
37
- response = requests.get(url, headers=headers, timeout=10)
38
  response.raise_for_status()
39
 
40
- # Parsear el contenido HTML
41
  soup = BeautifulSoup(response.text, 'html.parser')
42
 
43
  # Eliminar scripts y estilos
44
  for script in soup(["script", "style"]):
45
  script.decompose()
46
 
47
- # Obtener el texto
48
  text = soup.get_text()
49
-
50
- # Limpiar el texto
51
  lines = (line.strip() for line in text.splitlines())
52
  text = ' '.join(chunk for chunk in lines if chunk)
53
 
54
- return text[:1000] # Devolver los primeros 1000 caracteres
55
 
56
  except Exception as e:
57
- return f"Error al acceder a la página: {str(e)}"
58
- @tool
59
  def answer_nfl_question_tool(question: str) -> str:
60
- """
61
- A tool that answers the user's questions about the NFL rules, tactics and players.
62
-
63
- Args:
64
- question: The user's question that agent have to lookup the answer using the DuckDuckGo browser
65
- """
66
- # Prepare the duckduckgo query
67
- search_query = f"NFL {question}"
68
- print(f"🔍 Searching DuckDuckGo with query: {search_query}")
69
-
70
- # Search using DuckDuckGo
71
- with DDGS() as ddgs:
72
- search_results = list(ddgs.text(search_query, max_results=3))
73
-
74
- print(f"📊 Found {len(search_results)} results")
75
-
76
- if not search_results:
77
- return "❌ We haven't found answers to this question."
78
-
79
- # Format results
80
- detailed_results = []
81
- detailed_results.append(f"Search Results for: {search_query}\n")
82
- detailed_results.append("=" * 50 + "\n")
83
-
84
- # Process each search result
85
- for i, result in enumerate(search_results, 1):
86
- detailed_results.append(f"\nSource {i}: {result.get('link') or result.get('url', 'No URL available')}\n")
87
- detailed_results.append("-" * 30 + "\n")
88
- detailed_results.append(f"Title: {result.get('title', 'No title available')}\n")
89
- detailed_results.append(f"Description: {result.get('snippet', 'No description available')}\n")
90
-
91
- # Obtener y añadir el contenido de la página
92
- print(f"📄 Accessing page {i}...")
93
- page_content = get_page_content(url)
94
- detailed_results.append(f"Content: {page_content}\n")
95
-
96
- # Añadir un pequeño delay entre peticiones
97
- time.sleep(1)
98
-
99
- return "\n".join(detailed_results)
 
 
100
 
101
 
102
  @tool
 
25
 
26
  #from datasets import load_dataset
27
  #ds = load_dataset("nfliu/decontextualization")
28
+ def get_page_content(page_url):
29
  """
30
  Obtiene el contenido de una página web
31
  """
32
+ if not page_url or page_url == 'No URL available':
33
+ return "No content available - invalid URL"
34
+
35
  try:
 
36
  headers = {
37
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
38
  }
39
+ response = requests.get(page_url, headers=headers, timeout=10)
40
  response.raise_for_status()
41
 
 
42
  soup = BeautifulSoup(response.text, 'html.parser')
43
 
44
  # Eliminar scripts y estilos
45
  for script in soup(["script", "style"]):
46
  script.decompose()
47
 
 
48
  text = soup.get_text()
 
 
49
  lines = (line.strip() for line in text.splitlines())
50
  text = ' '.join(chunk for chunk in lines if chunk)
51
 
52
+ return text[:1000]
53
 
54
  except Exception as e:
55
+ return f"Error accessing the page: {str(e)}"
56
+
57
  def answer_nfl_question_tool(question: str) -> str:
58
+ try:
59
+ search_query = f"NFL {question}"
60
+ print(f"🔍 Searching DuckDuckGo with query: {search_query}")
61
+
62
+ with DDGS() as ddgs:
63
+ search_results = list(ddgs.text(search_query, max_results=3))
64
+
65
+ print(f"📊 Found {len(search_results)} results")
66
+
67
+ if not search_results:
68
+ return "❌ No results found for this question."
69
+
70
+ detailed_results = []
71
+ detailed_results.append(f"Search Results for: {search_query}\n")
72
+ detailed_results.append("=" * 50 + "\n")
73
+
74
+ for i, result in enumerate(search_results, 1):
75
+ # Verificar que result es un diccionario y tiene las claves necesarias
76
+ if not isinstance(result, dict):
77
+ continue
78
+
79
+ page_url = result.get('link') or result.get('url')
80
+ if not page_url:
81
+ continue
82
+
83
+ title = result.get('title', 'No title available')
84
+
85
+ detailed_results.append(f"\nSource {i}: {page_url}\n")
86
+ detailed_results.append("-" * 30 + "\n")
87
+ detailed_results.append(f"Title: {title}\n")
88
+
89
+ print(f"📄 Accessing page {i}...")
90
+ page_content = get_page_content(page_url)
91
+ detailed_results.append(f"Content: {page_content}\n")
92
+
93
+ time.sleep(1)
94
+
95
+ return "\n".join(detailed_results)
96
+
97
+ except Exception as e:
98
+ return f"Error during search: {str(e)}"
99
+
100
 
101
 
102
  @tool