Spaces:

ShivanshCodex
/

Web_Scrapping_Agent

Runtime error

App Files Files Community

Web_Scrapping_Agent / agent /query_analyzer.py

ShivanshCodex

Upload 47 files

f085180 verified 11 months ago

raw

history blame contribute delete

3.08 kB

	# agent/query_analyzer.py

	from typing import Dict, List, Union
	import google.generativeai as genai
	from .config import GEMINI_API_KEY
	import logging
	import time
	import ast

	# ─── Logging Config ─────────────────────────────────────────────
	logger = logging.getLogger(__name__)
	logger.setLevel(logging.INFO)

	class QueryAnalyzer:
	def __init__(self):
	self.model = genai.GenerativeModel("gemini-2.0-pro-exp-02-05")

	def analyze_query(self, query: str) -> Dict[str, Union[str, List[List[str]]]]:
	"""
	Uses Gemini to extract structured web search intent and keyword clusters.

	Returns:
	Dict with Intent, Info Types, Time Range, KeywordChunks
	"""
	prompt = f"""
	You are a web research planning assistant. Break down the user query into:

	1. Intent: Describe what the user is trying to learn
	2. Info Types: Type of data/sources needed
	3. Time Range: Temporal scope (e.g., recent, past year)
	4. KeywordChunks: Group 2–5 related search phrases into Python-style list of lists

	User Query:
	\"\"\"{query}\"\"\"

	Format:
	Intent: ...
	Info Types: ...
	Time Range: ...
	KeywordChunks:
	[["term1", "term2"], ["group2a", "group2b"]]
	"""

	try:
	response = self.model.generate_content(prompt)
	logger.info("Gemini query analysis succeeded")
	time.sleep(5)
	return self._parse_response(response.text.strip(), query)

	except Exception as e:
	logger.error(f"Query analysis failed: {e}")
	return {
	"Intent": "unknown",
	"Info Types": "general",
	"Time Range": "none",
	"KeywordChunks": [[query]]
	}

	def _parse_response(self, response_text: str, fallback_query: str) -> Dict[str, Union[str, List[List[str]]]]:
	result = {
	"Intent": "unknown",
	"Info Types": "general",
	"Time Range": "none",
	"KeywordChunks": [[fallback_query]]
	}

	lines = response_text.splitlines()
	chunk_lines = []
	inside_chunk = False

	for line in lines:
	if "KeywordChunks:" in line:
	inside_chunk = True
	continue
	if inside_chunk:
	chunk_lines.append(line)
	elif ':' in line:
	key, value = line.split(":", 1)
	result[key.strip()] = value.strip()

	if chunk_lines:
	try:
	chunk_text = "\n".join(chunk_lines).strip()
	result["KeywordChunks"] = ast.literal_eval(chunk_text)
	except Exception as e:
	logger.warning(f"Failed to parse keyword chunks: {e}")
	result["KeywordChunks"] = [[fallback_query]]

	return result