Spaces:
Sleeping
Sleeping
File size: 2,528 Bytes
814a3b5 81473e3 92cda7a 814a3b5 c949f11 814a3b5 92cda7a aebecbc c869cda 92cda7a c869cda 92cda7a aebecbc e1dee87 aebecbc 35c38d8 81473e3 aebecbc 81473e3 aebecbc e1dee87 35c38d8 cace612 92cda7a 35c38d8 aebecbc 92cda7a 12bdfd6 81473e3 12bdfd6 6aa1326 92cda7a 81473e3 35c38d8 81473e3 35c38d8 aebecbc 92cda7a 35c38d8 81473e3 92cda7a 35c38d8 92cda7a 81473e3 35c38d8 c949f11 35c38d8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | """
tools_engine.py - Improved perform_web_search to return structured results with URLs and snippets,
and canonical intent detection unchanged.
"""
from duckduckgo_search import DDGS
from transformers import pipeline
import re
print(">>> Tools: Loading Intent Classification Model...")
intent_classifier = pipeline("zero-shot-classification", model="typeform/distilbert-base-uncased-mnli")
def analyze_intent(user_text):
if not user_text:
return "general"
text_lower = user_text.lower().strip()
direct_chat_triggers = [
"hi","hello","hey","hlo","namaste",
"what is your name", "who are you", "your name"
]
if text_lower in direct_chat_triggers or any(text_lower.startswith(t + " ") for t in direct_chat_triggers):
return "general"
candidate_labels = ["internet search","general conversation","coding request","checking time"]
try:
result = intent_classifier(user_text, candidate_labels)
top_label = result['labels'][0]
confidence = result['scores'][0]
mapping = {
"internet search": "internet_search",
"general conversation": "general",
"coding request": "coding_request",
"checking time": "checking_time"
}
if confidence > 0.45:
return mapping.get(top_label, "general")
except Exception:
pass
return "general"
def perform_web_search(user_text, max_results=4):
"""
Return structured results:
{
"query": "...",
"results": [
{"title": "...", "snippet": "...", "url": "..."},
...
]
}
"""
try:
query = user_text
# sanitize small verbs
remove_phrases = ["search for","find","google","look up","lookup","what is","tell me"]
q = query.lower()
for p in remove_phrases:
q = q.replace(p, "")
q = q.strip() or query
results = list(DDGS().text(q, max_results=max_results))
structured = {"query": q, "results": []}
for r in results:
title = r.get("title","").strip()
body = re.sub(r'\s+',' ', r.get("body","").strip())
url = r.get("href") or r.get("url") or r.get("link") or ""
# short snippet
snippet = body[:320]
structured["results"].append({"title": title, "snippet": snippet, "url": url})
return structured
except Exception as e:
print(f"Search error: {e}")
return {"query": user_text, "results": []}
|