Final_Assignment_Template

Sleeping

App Files Files Community

Sandiago21 commited on Apr 3

Commit

555d88a

verified ·

1 Parent(s): 152c460

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -8

app.py CHANGED Viewed

@@ -114,10 +114,10 @@ def reasoning_generate(prompt):
         tokens removed and leading/trailing whitespace stripped.
     """
-    inputs = reasoning_tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
-        outputs = reasoning_model.generate(
             **inputs,
             max_new_tokens=config.reasoning_max_len,
             temperature=config.temperature,
@@ -126,7 +126,7 @@ def reasoning_generate(prompt):
     generated = outputs[0][inputs["input_ids"].shape[-1]:]
-    return reasoning_tokenizer.decode(generated, skip_special_tokens=True).strip()
 def reasoning_generate(prompt):
@@ -338,7 +338,7 @@ def visit_webpage(url: str) -> str:
         return [main_text[:1000],]
-def visit_webpage(url: str) -> str:
     headers = {
         "User-Agent": "Mozilla/5.0"
     }
@@ -374,6 +374,46 @@ def visit_webpage(url: str) -> str:
         return [main_text[:1000],]
 def web_search(query: str, num_results: int = 10):
     """
     Search the internet for the query provided
@@ -708,6 +748,7 @@ def route(state: AgentState):
     else:
         return "allow"
 def tool_executor(state: AgentState):
     """
     Tool execution node for a risk-aware LLM agent.
@@ -792,7 +833,7 @@ def tool_executor(state: AgentState):
             for result in results:
                 try:
-                    webpage_results = visit_webpage(result)
                     webpage_result = " \n ".join(webpage_results)
                     # for webpage_result in webpage_results:
@@ -810,15 +851,58 @@ def tool_executor(state: AgentState):
                     if query_webpage_information_similarity_score > best_query_webpage_information_similarity_score:
                         best_query_webpage_information_similarity_score = query_webpage_information_similarity_score
                         best_webpage_information = webpage_result
                 except Exception as e:
                     logger.info(f"Tool Executor - Exception: {e}")
         elif action.tool == "visit_webpage":
             try:
-                webpage_results = visit_webpage(**action.args)
                 webpage_result = " \n ".join(webpage_results)
                 # for webpage_result in webpage_results:
                 query_embeddings = sentence_transformer_model.encode_query(state["messages"][-1].content).reshape(1, -1)
                 webpage_information_embeddings = sentence_transformer_model.encode_query(webpage_result).reshape(1, -1)
@@ -936,8 +1020,8 @@ class BasicAgent:
                 # agent_answer = str(df)
                 # agent_answer = str(response.status_code) + " - " + task_id
-            except:
-                agent_answer = ""
         else:
             agent_answer = fixed_answer
@@ -948,6 +1032,7 @@ class BasicAgent:
         return agent_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,

         tokens removed and leading/trailing whitespace stripped.
     """
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
+        outputs = model.generate(
             **inputs,
             max_new_tokens=config.reasoning_max_len,
             temperature=config.temperature,
     generated = outputs[0][inputs["input_ids"].shape[-1]:]
+    return tokenizer.decode(generated, skip_special_tokens=True).strip()
 def reasoning_generate(prompt):
         return [main_text[:1000],]
+def visit_webpage_wiki(url: str) -> str:
     headers = {
         "User-Agent": "Mozilla/5.0"
     }
         return [main_text[:1000],]
+def visit_webpage_main(url: str):
+    headers = {"User-Agent": "Mozilla/5.0"}
+    response = requests.get(url, headers=headers, timeout=10)
+    response.raise_for_status()
+    soup = BeautifulSoup(response.text, "html.parser")
+    # Remove scripts/styles
+    for tag in soup(["script", "style"]):
+        tag.extract()
+    # 🔥 Try to focus on body (fallback if no clear container)
+    content = soup.find("body")
+    # ✅ Extract broader set of elements
+    elements = content.find_all(["p", "dd", "td", "div"])
+    texts = []
+    for el in elements:
+        text = el.get_text(strip=True)
+        if text and len(text) > 30:  # filter noise
+            texts.append(text)
+    main_text = "\n".join(texts)
+    # ✅ Extract all tables (not just wikitable)
+    table_texts = []
+    for table in soup.find_all("table"):
+        for row in table.find_all("tr"):
+            cols = [c.get_text(strip=True) for c in row.find_all(["td", "th"])]
+            if cols:
+                table_texts.append(" | ".join(cols))
+    if table_texts:
+        return [main_text[:1500], "\n".join(table_texts)[:5000]]
+    else:
+        return [main_text[:1500]]
 def web_search(query: str, num_results: int = 10):
     """
     Search the internet for the query provided
     else:
         return "allow"
 def tool_executor(state: AgentState):
     """
     Tool execution node for a risk-aware LLM agent.
             for result in results:
                 try:
+                    webpage_results = visit_webpage_wiki(result)
                     webpage_result = " \n ".join(webpage_results)
                     # for webpage_result in webpage_results:
                     if query_webpage_information_similarity_score > best_query_webpage_information_similarity_score:
                         best_query_webpage_information_similarity_score = query_webpage_information_similarity_score
                         best_webpage_information = webpage_result
+                    webpage_results = visit_webpage_main(result)
+                    webpage_result = " \n ".join(webpage_results)
+                    # for webpage_result in webpage_results:
+                    query_embeddings = sentence_transformer_model.encode_query(state["messages"][-1].content).reshape(1, -1)
+                    webpage_information_embeddings = sentence_transformer_model.encode_query(webpage_result).reshape(1, -1)
+                    query_webpage_information_similarity_score = float(cosine_similarity(query_embeddings, webpage_information_embeddings)[0][0])
+                    # logger.info(f"Webpage Information and Similarity Score: {result} - {webpage_result} - {query_webpage_information_similarity_score}")
+                    if query_webpage_information_similarity_score > 0.65:
+                        webpage_information_complete += webpage_result
+                        webpage_information_complete += " \n "
+                        webpage_information_complete += " \n "
+                    if query_webpage_information_similarity_score > best_query_webpage_information_similarity_score:
+                        best_query_webpage_information_similarity_score = query_webpage_information_similarity_score
+                        best_webpage_information = webpage_result
                 except Exception as e:
                     logger.info(f"Tool Executor - Exception: {e}")
         elif action.tool == "visit_webpage":
             try:
+                webpage_results = visit_webpage_wiki(result)
                 webpage_result = " \n ".join(webpage_results)
+                # for webpage_result in webpage_results:
+                query_embeddings = sentence_transformer_model.encode_query(state["messages"][-1].content).reshape(1, -1)
+                webpage_information_embeddings = sentence_transformer_model.encode_query(webpage_result).reshape(1, -1)
+                query_webpage_information_similarity_score = float(cosine_similarity(query_embeddings, webpage_information_embeddings)[0][0])
+                # logger.info(f"Webpage Information and Similarity Score: {result} - {webpage_result} - {query_webpage_information_similarity_score}")
+                if query_webpage_information_similarity_score > 0.65:
+                    webpage_information_complete += webpage_result
+                    webpage_information_complete += " \n "
+                    webpage_information_complete += " \n "
+                if query_webpage_information_similarity_score > best_query_webpage_information_similarity_score:
+                    best_query_webpage_information_similarity_score = query_webpage_information_similarity_score
+                    best_webpage_information = webpage_result
+                webpage_results = visit_webpage_main(result)
+                webpage_result = " \n ".join(webpage_results)
                 # for webpage_result in webpage_results:
                 query_embeddings = sentence_transformer_model.encode_query(state["messages"][-1].content).reshape(1, -1)
                 webpage_information_embeddings = sentence_transformer_model.encode_query(webpage_result).reshape(1, -1)
                 # agent_answer = str(df)
                 # agent_answer = str(response.status_code) + " - " + task_id
+            except Exception as e:
+                agent_answer = str(e)
         else:
             agent_answer = fixed_answer
         return agent_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,