Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,19 +12,18 @@ import pdfplumber
|
|
| 12 |
# ==== CONFIG ====
|
| 13 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 14 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 15 |
-
|
| 16 |
-
# SOTA models: for general and code queries
|
| 17 |
CONVERSATIONAL_MODELS = [
|
| 18 |
"deepseek-ai/DeepSeek-V2-Chat",
|
| 19 |
"Qwen/Qwen2-72B-Instruct",
|
| 20 |
"mistralai/Mixtral-8x22B-Instruct-v0.1",
|
| 21 |
-
"meta-llama/Meta-Llama-3-70B-Instruct"
|
|
|
|
| 22 |
]
|
| 23 |
-
CODING_MODEL = "deepseek-ai/DeepSeek-Coder-33B-Instruct"
|
| 24 |
|
| 25 |
wiki_api = wikipediaapi.Wikipedia(language="en", user_agent="SmartAgent/1.0 (chockqoteewy@gmail.com)")
|
| 26 |
|
| 27 |
-
# ====
|
| 28 |
def extract_links(text):
|
| 29 |
url_pattern = re.compile(r'(https?://[^\s\)\],]+)')
|
| 30 |
return url_pattern.findall(text or "")
|
|
@@ -42,26 +41,36 @@ def download_file(url, out_dir="tmp_files"):
|
|
| 42 |
except Exception:
|
| 43 |
return None
|
| 44 |
|
|
|
|
| 45 |
def analyze_file(file_path):
|
| 46 |
-
|
| 47 |
-
|
| 48 |
df = pd.read_excel(file_path)
|
| 49 |
return f"Excel summary: {df.head().to_markdown(index=False)}"
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
| 51 |
df = pd.read_csv(file_path)
|
| 52 |
return f"CSV summary: {df.head().to_markdown(index=False)}"
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
| 54 |
with pdfplumber.open(file_path) as pdf:
|
| 55 |
first_page = pdf.pages[0].extract_text()
|
| 56 |
return f"PDF text sample: {first_page[:1000]}"
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
| 58 |
with open(file_path, encoding='utf-8') as f:
|
| 59 |
txt = f.read()
|
| 60 |
return f"TXT file sample: {txt[:1000]}"
|
| 61 |
-
|
| 62 |
-
return f"
|
| 63 |
-
|
| 64 |
-
return f"
|
| 65 |
|
| 66 |
def analyze_webpage(url):
|
| 67 |
try:
|
|
@@ -74,6 +83,7 @@ def analyze_webpage(url):
|
|
| 74 |
except Exception as e:
|
| 75 |
return f"Webpage error: {e}"
|
| 76 |
|
|
|
|
| 77 |
def duckduckgo_search(query):
|
| 78 |
try:
|
| 79 |
with DDGS() as ddgs:
|
|
@@ -103,31 +113,25 @@ def is_coding_question(text):
|
|
| 103 |
return True
|
| 104 |
return False
|
| 105 |
|
| 106 |
-
def
|
| 107 |
-
try:
|
| 108 |
-
hf_client = InferenceClient(CODING_MODEL, token=HF_TOKEN)
|
| 109 |
-
result = hf_client.text_generation(query, max_new_tokens=1024)
|
| 110 |
-
if isinstance(result, dict) and "generated_text" in result:
|
| 111 |
-
return f"[{CODING_MODEL}] {result['generated_text']}"
|
| 112 |
-
elif isinstance(result, str):
|
| 113 |
-
return f"[{CODING_MODEL}] {result}"
|
| 114 |
-
return "Unknown result format from coder model."
|
| 115 |
-
except Exception as e:
|
| 116 |
-
return f"Coder Model Error: {e}"
|
| 117 |
-
|
| 118 |
-
def llm_conversational(query):
|
| 119 |
last_error = None
|
| 120 |
for model_id in CONVERSATIONAL_MODELS:
|
| 121 |
try:
|
| 122 |
hf_client = InferenceClient(model_id, token=HF_TOKEN)
|
| 123 |
-
result = hf_client.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
if isinstance(result, dict) and "generated_text" in result:
|
| 125 |
-
return f"[{model_id}]
|
|
|
|
|
|
|
| 126 |
elif isinstance(result, str):
|
| 127 |
-
return f"[{model_id}]
|
| 128 |
except Exception as e:
|
| 129 |
last_error = f"{model_id}: {e}"
|
| 130 |
-
return f"LLM Error (all advanced models): {last_error
|
| 131 |
|
| 132 |
# ==== SMART AGENT ====
|
| 133 |
class SmartAgent:
|
|
@@ -152,28 +156,40 @@ class SmartAgent:
|
|
| 152 |
if results:
|
| 153 |
return "\n\n".join(results)
|
| 154 |
|
| 155 |
-
# 2.
|
| 156 |
if is_coding_question(question):
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
-
# 3. DuckDuckGo for
|
| 162 |
result = duckduckgo_search(question)
|
| 163 |
if result:
|
| 164 |
return result
|
| 165 |
|
| 166 |
-
# 4. Wikipedia for encyclopedic
|
| 167 |
result = wikipedia_search(question)
|
| 168 |
if result:
|
| 169 |
return result
|
| 170 |
|
| 171 |
-
# 5.
|
| 172 |
result = llm_conversational(question)
|
| 173 |
if result:
|
| 174 |
return result
|
| 175 |
|
| 176 |
-
return "No answer could be found by available
|
| 177 |
|
| 178 |
# ==== SUBMISSION LOGIC ====
|
| 179 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
|
| 12 |
# ==== CONFIG ====
|
| 13 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 14 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 15 |
+
# Your list of SOTA chat models, in order of preference
|
|
|
|
| 16 |
CONVERSATIONAL_MODELS = [
|
| 17 |
"deepseek-ai/DeepSeek-V2-Chat",
|
| 18 |
"Qwen/Qwen2-72B-Instruct",
|
| 19 |
"mistralai/Mixtral-8x22B-Instruct-v0.1",
|
| 20 |
+
"meta-llama/Meta-Llama-3-70B-Instruct",
|
| 21 |
+
"deepseek-ai/DeepSeek-Coder-33B-Instruct"
|
| 22 |
]
|
|
|
|
| 23 |
|
| 24 |
wiki_api = wikipediaapi.Wikipedia(language="en", user_agent="SmartAgent/1.0 (chockqoteewy@gmail.com)")
|
| 25 |
|
| 26 |
+
# ==== UTILITY: Link/file detection ====
|
| 27 |
def extract_links(text):
|
| 28 |
url_pattern = re.compile(r'(https?://[^\s\)\],]+)')
|
| 29 |
return url_pattern.findall(text or "")
|
|
|
|
| 41 |
except Exception:
|
| 42 |
return None
|
| 43 |
|
| 44 |
+
# ==== File/Link Analyzers ====
|
| 45 |
def analyze_file(file_path):
|
| 46 |
+
if file_path.endswith((".xlsx", ".xls")):
|
| 47 |
+
try:
|
| 48 |
df = pd.read_excel(file_path)
|
| 49 |
return f"Excel summary: {df.head().to_markdown(index=False)}"
|
| 50 |
+
except Exception as e:
|
| 51 |
+
return f"Excel error: {e}"
|
| 52 |
+
elif file_path.endswith(".csv"):
|
| 53 |
+
try:
|
| 54 |
df = pd.read_csv(file_path)
|
| 55 |
return f"CSV summary: {df.head().to_markdown(index=False)}"
|
| 56 |
+
except Exception as e:
|
| 57 |
+
return f"CSV error: {e}"
|
| 58 |
+
elif file_path.endswith(".pdf"):
|
| 59 |
+
try:
|
| 60 |
with pdfplumber.open(file_path) as pdf:
|
| 61 |
first_page = pdf.pages[0].extract_text()
|
| 62 |
return f"PDF text sample: {first_page[:1000]}"
|
| 63 |
+
except Exception as e:
|
| 64 |
+
return f"PDF error: {e}"
|
| 65 |
+
elif file_path.endswith(".txt"):
|
| 66 |
+
try:
|
| 67 |
with open(file_path, encoding='utf-8') as f:
|
| 68 |
txt = f.read()
|
| 69 |
return f"TXT file sample: {txt[:1000]}"
|
| 70 |
+
except Exception as e:
|
| 71 |
+
return f"TXT error: {e}"
|
| 72 |
+
else:
|
| 73 |
+
return f"Unsupported file type: {file_path}"
|
| 74 |
|
| 75 |
def analyze_webpage(url):
|
| 76 |
try:
|
|
|
|
| 83 |
except Exception as e:
|
| 84 |
return f"Webpage error: {e}"
|
| 85 |
|
| 86 |
+
# ==== SEARCH TOOLS ====
|
| 87 |
def duckduckgo_search(query):
|
| 88 |
try:
|
| 89 |
with DDGS() as ddgs:
|
|
|
|
| 113 |
return True
|
| 114 |
return False
|
| 115 |
|
| 116 |
+
def llm_conversational(question):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
last_error = None
|
| 118 |
for model_id in CONVERSATIONAL_MODELS:
|
| 119 |
try:
|
| 120 |
hf_client = InferenceClient(model_id, token=HF_TOKEN)
|
| 121 |
+
result = hf_client.conversational(
|
| 122 |
+
messages=[{"role": "user", "content": question}],
|
| 123 |
+
max_new_tokens=512,
|
| 124 |
+
)
|
| 125 |
+
# Extract generated_text
|
| 126 |
if isinstance(result, dict) and "generated_text" in result:
|
| 127 |
+
return f"[{model_id}] " + result["generated_text"]
|
| 128 |
+
elif hasattr(result, "generated_text"):
|
| 129 |
+
return f"[{model_id}] " + result.generated_text
|
| 130 |
elif isinstance(result, str):
|
| 131 |
+
return f"[{model_id}] " + result
|
| 132 |
except Exception as e:
|
| 133 |
last_error = f"{model_id}: {e}"
|
| 134 |
+
return f"LLM Error (all advanced models): {last_error}"
|
| 135 |
|
| 136 |
# ==== SMART AGENT ====
|
| 137 |
class SmartAgent:
|
|
|
|
| 156 |
if results:
|
| 157 |
return "\n\n".join(results)
|
| 158 |
|
| 159 |
+
# 2. Coding/algorithmic questions: Prefer DeepSeek-Coder-33B
|
| 160 |
if is_coding_question(question):
|
| 161 |
+
coder_client = InferenceClient("deepseek-ai/DeepSeek-Coder-33B-Instruct", token=HF_TOKEN)
|
| 162 |
+
try:
|
| 163 |
+
coder_result = coder_client.conversational(
|
| 164 |
+
messages=[{"role": "user", "content": question}],
|
| 165 |
+
max_new_tokens=512,
|
| 166 |
+
)
|
| 167 |
+
if isinstance(coder_result, dict) and "generated_text" in coder_result:
|
| 168 |
+
return "[deepseek-ai/DeepSeek-Coder-33B-Instruct] " + coder_result["generated_text"]
|
| 169 |
+
elif hasattr(coder_result, "generated_text"):
|
| 170 |
+
return "[deepseek-ai/DeepSeek-Coder-33B-Instruct] " + coder_result.generated_text
|
| 171 |
+
elif isinstance(coder_result, str):
|
| 172 |
+
return "[deepseek-ai/DeepSeek-Coder-33B-Instruct] " + coder_result
|
| 173 |
+
except Exception as e:
|
| 174 |
+
# fallback to other chat models
|
| 175 |
+
pass
|
| 176 |
|
| 177 |
+
# 3. DuckDuckGo for current/web knowledge
|
| 178 |
result = duckduckgo_search(question)
|
| 179 |
if result:
|
| 180 |
return result
|
| 181 |
|
| 182 |
+
# 4. Wikipedia for encyclopedic queries
|
| 183 |
result = wikipedia_search(question)
|
| 184 |
if result:
|
| 185 |
return result
|
| 186 |
|
| 187 |
+
# 5. Fallback to conversational LLMs
|
| 188 |
result = llm_conversational(question)
|
| 189 |
if result:
|
| 190 |
return result
|
| 191 |
|
| 192 |
+
return "No answer could be found by available tools."
|
| 193 |
|
| 194 |
# ==== SUBMISSION LOGIC ====
|
| 195 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|