Spaces:

InnoTrack
/

Graduation_Project-v1.2

Running

App Files Files Community

Graduation_Project-v1.2 / src /recommendation_engine /chatbot_engine.py

bat-6

feat: implement full project generation module with automated documentation and prompt parsing

b09149c 19 days ago

Raw

History Blame Contribute Delete

21.5 kB

	from src.recommendation_engine.memory_store import (
	get_user_memory,
	save_user_memory,
	default_state
	)

	from src.recommendation_engine.llm_router import analyze_user_input

	from src.recommendation_engine.command_handler import (
	is_command,
	handle_command
	)

	from src.recommendation_engine.idea_generator import generate_ideas
	from src.recommendation_engine.feature_generator import generate_features

	from src.recommendation_engine.llm_client import generate_text, generate_list
	from src.recommendation_engine.prompt_builder import build_chat_prompt, build_niche_domains_prompt
	from src.recommendation_engine.response_formatter import format_response
	from src.recommendation_engine.state_manager import update_state
	from src.recommendation_engine.context_builder import extract_domain, DOMAIN_KEYWORDS

	from src.recommendation_engine.full_project_generator import (
	generate_full_project
	)

	import re

	# ─────────────────────────────────────────────
	# Project Idea Validator + Categorizer
	# ─────────────────────────────────────────────
	def validate_and_categorize_project(title: str, abstract: str = "") -> dict:
	"""
	Uses Gemini to:
	1. Verify whether the title is a valid graduation project idea.
	2. Assign it to the best-matching domain from the known list.

	Returns:
	{
	"is_valid": bool,
	"domain": str \| None,
	"reason": str
	}
	"""
	known_domains = [d for d in DOMAIN_KEYWORDS.keys() if d != "Others"]
	domain_list_str = "\n".join(f"- {d}" for d in known_domains)

	prompt = f"""
	You are an expert academic advisor evaluating graduation project ideas.

	Project Title: "{title}"
	{"Abstract: " + abstract[:400] if abstract else ""}

	Task 1 – Validity Check:
	Is this a valid, feasible graduation project idea for a university student?
	- It must be a technical or academic topic (not a random phrase, celebrity name, or nonsense)
	- It should be specific enough to build something real
	Answer: YES or NO

	Task 2 – Domain Classification:
	If valid, which ONE of the following domains best fits this project?
	{domain_list_str}

	Return your answer in this EXACT format (two lines only):
	VALID: YES
	DOMAIN: <domain name from the list above>

	If invalid:
	VALID: NO
	DOMAIN: None
	REASON: <one sentence why>
	"""
	try:
	raw = generate_text(prompt, task="intent").strip()
	lines = {line.split(":", 1)[0].strip().upper(): line.split(":", 1)[1].strip()
	for line in raw.splitlines() if ":" in line}

	is_valid = lines.get("VALID", "NO").upper() == "YES"
	domain = lines.get("DOMAIN", "").strip()
	reason = lines.get("REASON", "")

	if domain == "None" or domain not in known_domains:
	domain = None
	return {"is_valid": is_valid, "domain": domain, "reason": reason}

	except Exception:
	return {"is_valid": True, "domain": None, "reason": ""}


	def extract_number(text: str, default=5):
	cleaned = str(text).strip()
	if cleaned in ["1", "2"]:
	return default
	nums = re.findall(r"\d+", text)
	return min(int(nums[0]), 20) if nums else default

	def validate_and_format_domain(domain: str) -> str:
	# 1. Quick local validation for standard domains
	extracted = extract_domain(domain)
	if extracted and extracted.lower() != "others":
	return extracted

	# 2. Fall back to LLM validation
	prompt = f"""
	Determine if the following domain/field is a valid academic, engineering, scientific, or technology domain suitable for a university graduation project (e.g., Computer Science, Engineering, Medicine, Business, Agriculture, Biology, etc.).
	Also, correct any typos and format it cleanly (e.g., Title Case).

	Domain to evaluate: "{domain}"

	Rules:
	- If it is a valid field of study, technology, or academic discipline (e.g., "artificial intelligence", "robotics", "bioinformatics", "educational games"), return ONLY the corrected and formatted domain name (e.g., "Artificial Intelligence").
	- If it is unrelated to academic/technology graduation projects, or contains names of celebrities, sports teams, food, pop culture, or random questions (e.g., "messi", "fc barcelona", "pizza", "what is this"), return exactly "INVALID".

	Return ONLY the formatted domain name or "INVALID". Do not include any other text.
	"""
	try:
	res = generate_text(prompt, task="intent").strip()
	if not res or res.upper() == "INVALID":
	return ""
	return res.strip('"').strip("'")
	except Exception:
	return ""

	def is_weak_project_title(title: str) -> bool:

	if not title:
	return True

	title = title.strip()

	words = title.split()


	if len(words) < 4:
	return True

	weak_words = {
	"system",
	"platform",
	"app",
	"website",
	"application",
	"project",
	"ai",
	"smart",
	"tool"
	}

	meaningful = [
	w.lower()
	for w in words
	if w.lower() not in weak_words
	]

	return len(words) < 3

	def is_generic_project_reference(text: str) -> bool:

	text = text.strip().lower()

	generic_titles = {
	"my project",
	"this project",
	"the project",
	"my system",
	"this system",
	"my app",
	"my application",
	"my idea",
	"project",
	"system",
	"app",
	"idea"
	}

	return text in generic_titles

	def looks_like_real_project_title(title: str) -> bool:

	if not title:
	return False

	title = title.strip()

	words = title.split()


	if len(words) < 2:
	return False


	unique_ratio = len(set(words)) / len(words)

	if unique_ratio < 0.5:
	return False


	nonsense_patterns = [
	"asd",
	"qwe",
	"zxc",
	"testtest",
	"aaaa",
	"xxxxx"
	]

	lowered = title.lower()

	question_starts = (
	"how ",
	"what ",
	"why ",
	"when ",
	"where ",
	"can ",
	"could ",
	"should ",
	"is ",
	"are ",
	"do ",
	"does "
	)

	for qs in question_starts:
	if lowered.startswith(qs):
	return False

	for p in nonsense_patterns:
	if p in lowered:
	return False


	keywords = {


	"management",
	"analysis",
	"detection",
	"tracking",
	"recognition",
	"monitoring",
	"security",
	"attendance",
	"automation",
	"prediction",
	"dashboard",
	"diagnosis",
	"learning",
	"recommendation",
	"classification",
	"authentication",
	"optimization",


	"healthcare",
	"fintech",
	"education",
	"library",
	"hospital",
	"school",
	"medical",
	"industrial",
	"agriculture",
	"transport",


	"ai",
	"iot",
	"blockchain",
	"cloud",
	"robotics",
	"vision",
	"embedded",


	"system",
	"platform",
	"application",
	"app",
	"website",
	"portal",
	"tool",
	"game",
	"generator",
	"engine",
	"software",
	"database",
	"model",
	"chatbot",
	"chat",
	"assistant",
	"network",
	"api",
	"mobile",
	"web",
	"smart"
	}

	if not any(
	k in lowered
	for k in keywords
	):
	return False

	return True

	FOLLOWUP_WORDS = [
	"another",
	"more",
	"again",
	"other ideas",
	"more ideas",
	"more features",
	"another features"
	]

	def finalize_response(
	user_input,
	response,
	history,
	state,
	user_id
	):

	history.append({
	"role": "user",
	"content": user_input
	})

	history.append({
	"role": "assistant",
	"content": response
	})

	history = history[-20:]

	save_user_memory(user_id, {
	"history": history,
	"state": state
	})

	return response

	def is_gibberish_text(text: str) -> bool:

	text = text.strip().lower()


	if text in {"1", "2", "3"}:
	return False


	if len(text) < 3:


	allowed_short = {
	"hi",
	"hey",
	"hello",
	"ai",
	"ml",
	"ui",
	"ux",
	"vr",
	"ar",
	"iot",
	"no",
	"la",
	"n",
	"y",
	"ok"
	}

	if text in allowed_short:
	return False

	return True

	gibberish_patterns = [
	"asd",
	"qwe",
	"zxc",
	"aaa",
	"bbb",
	"ccc",
	"xxx",
	"testtest"
	]

	for p in gibberish_patterns:
	if p in text:
	return True

	words = text.split()


	if len(words) >= 3:

	unique_ratio = len(set(words)) / len(words)

	if unique_ratio < 0.5:
	return True

	return False

	def is_project_related(text: str) -> bool:

	text = text.lower().strip()

	keywords = [


	"project",
	"system",
	"platform",
	"application",
	"app",
	"website",
	"dashboard",
	"management",


	"ai",
	"ml",
	"machine learning",
	"deep learning",
	"computer vision",
	"blockchain",
	"iot",
	"web",
	"mobile",
	"cloud",
	"security",
	"database",
	"api",


	"generate",
	"feature",
	"features",
	"idea",
	"ideas",
	"improve",
	"description",
	"technologies",
	"architecture",


	"healthcare",
	"education",
	"fintech",
	"smart",
	"attendance",
	"monitoring",
	"tracking",
	"analysis",
	"recognition"
	]

	return any(
	keyword in text
	for keyword in keywords
	)

	def is_general_question_or_unrelated_chat(text: str) -> bool:
	lowered = text.strip().lower()

	# Ends with question mark
	if lowered.endswith("?"):
	return True

	# Starts with common question words
	question_starts = (
	"how ", "what ", "why ", "when ", "where ", "can ", "could ", "should ",
	"is ", "are ", "do ", "does ", "explain ", "tell me ", "show me ", "describe "
	)
	if lowered.startswith(question_starts):
	return True

	# Contains common question phrases
	question_phrases = (
	"what is", "what's", "tell me about", "can you", "could you", "how to", "how do"
	)
	if any(phrase in lowered for phrase in question_phrases):
	return True

	return False


	def chatbot(user_id: str, user_input: str):
	text = user_input.lower().strip()

	if is_command(user_input):
	return handle_command(user_input)

	memory = get_user_memory(user_id)
	history = memory.get("history", [])
	state = memory.get("state") or default_state()

	# The Orchestrator handles all context and validation
	from src.recommendation_engine.llm_router import analyze_user_input
	analysis = analyze_user_input(user_input, state)

	action = analysis.get("action", "reply_directly")
	reply_text = analysis.get("reply_text")
	domain = analysis.get("domain")
	project_title = analysis.get("project_title")
	number = analysis.get("number")
	abstract = analysis.get("abstract")
	description = analysis.get("description")

	if action == "reply_directly":
	if project_title and not state.get("project_title"):
	state["project_title"] = project_title
	if domain and not state.get("domain"):
	state["domain"] = domain

	custom_saved = False
	if abstract:
	state["abstract"] = abstract
	state["custom_abstract"] = True
	custom_saved = True
	if description:
	state["description"] = description
	state["custom_description"] = True
	custom_saved = True

	save_user_memory(user_id, {"history": history, "state": state})

	final_reply = reply_text or "I didn't quite catch that. Can you clarify?"
	if custom_saved:
	final_reply = "✅ I have saved your custom project details!\n\n" + final_reply

	return finalize_response(
	user_input,
	final_reply,
	history,
	state,
	user_id
	)

	elif action == "trigger_idea_generation":
	if domain:
	domain_lower = domain.lower()
	if domain_lower in ["other", "others", "general", "any"]:
	state["domain"] = "general"
	state["waiting_for_domain"] = False
	elif domain_lower in ["domain", "domains", "list", "options", "help"]:
	state["domain"] = None
	else:
	state["domain"] = domain
	state["waiting_for_domain"] = False
	elif not any(w in user_input.lower() for w in FOLLOWUP_WORDS):
	state["domain"] = None

	if not state.get("domain"):
	state["waiting_for_domain"] = True
	save_user_memory(user_id, {"history": history, "state": state})
	domain_list = "\n".join(f"- {d}" for d in DOMAIN_KEYWORDS.keys() if d != "Others")
	response = (
	f"Which domain is your project in? 📚\n\n"
	f"{domain_list}\n\n"
	f"💡 Just type one of the domains above (e.g. AI or Healthcare)\n"
	f"If your domain isn't listed, type Others to see more options."
	)
	return finalize_response(user_input, response, history, state, user_id)

	top_k = number or extract_number(user_input, 5)

	all_past_ideas = state.get("all_generated_ideas", [])
	if state.get("ideas"):
	for i in state["ideas"]:
	if i not in all_past_ideas:
	all_past_ideas.append(i)

	result = generate_ideas(
	domain=state.get("domain"),
	top_k=top_k,
	previous_generated_ideas=all_past_ideas
	)

	ideas = result.get("final_ideas", [])

	state["all_generated_ideas"] = all_past_ideas + ideas
	state["ideas"] = ideas
	state["last_action"] = "idea"

	state["project_title"] = ""
	state["features"] = []
	state["all_generated_features"] = []
	state["description"] = ""
	state["abstract"] = ""
	state["technologies"] = []

	response = format_response("idea", "", state)
	return finalize_response(user_input, response, history, state, user_id)

	elif action == "trigger_feature_generation":
	if project_title:
	state["project_title"] = project_title

	if not state.get("project_title"):
	return finalize_response(
	user_input,
	"I need a project title to generate features! 📝\nJust type your project title.",
	history,
	state,
	user_id
	)

	top_k = number or extract_number(user_input, 5)

	all_past_features = state.get("all_generated_features", [])
	if state.get("features"):
	for f in state["features"]:
	if f not in all_past_features:
	all_past_features.append(f)

	result = generate_features(
	title=state.get("project_title"),
	description=state.get("description", ""),
	features=[],
	previous_generated_features=all_past_features,
	top_k=top_k
	)

	new_features = result.get("recommended_features", [])

	state["all_generated_features"] = all_past_features + new_features
	state["features"] = new_features
	state["last_action"] = "feature"

	response = format_response("feature", "", state)

	if state.get("custom_abstract") or state.get("custom_description"):
	state["waiting_for_abstract_update"] = True
	response += "\n\n✨ Would you like me to seamlessly weave these new features into your custom abstract and description? (Yes/No)"

	return finalize_response(user_input, response, history, state, user_id)

	elif action == "trigger_full_project_generation":
	if project_title:
	state["project_title"] = project_title

	if not state.get("features"):
	feature_result = generate_features(
	title=state.get("project_title"),
	description=state.get("description", ""),
	features=[],
	previous_generated_features=[],
	top_k=8
	)
	state["features"] = feature_result.get("recommended_features", [])

	custom_desc = state.get("custom_description", False)
	custom_abs = state.get("custom_abstract", False)

	result = generate_full_project(
	title=state.get("project_title"),
	features=state.get("features", []),
	description=state.get("description", "") if custom_desc else "",
	abstract=state.get("abstract", "") if custom_abs else "",
	custom_description=custom_desc,
	custom_abstract=custom_abs
	)

	state = update_state(state, result, mode="merge")
	if state.get("domain"):
	state["category"] = state.get("domain")

	response = f"""
	📦 Full Project Generated

	📌 Project Title:
	{state.get("project_title")}

	📂 Category:
	{state.get("category")}

	🛠 Technologies:
	{", ".join(state.get("technologies", []))}

	📄 Abstract:
	{state.get("abstract")}

	📄 Detailed Description:
	{state.get("description")}

	❗ Problem Statement:
	{state.get("problem_statement")}

	💡 Proposed Solution:
	{state.get("proposed_solution")}

	🎯 Objectives:
	{chr(10).join("- " + x for x in state.get("objectives", []))}

	━━━━━━━━━━━━━━━━━━━━━━
	👉 What's next? You can say "improve features", or tell me to "replace abstract with..." your own custom text!
	"""
	return finalize_response(user_input, response, history, state, user_id)

	elif action == "confirmation_yes":
	if state.get("waiting_for_abstract_update"):
	from src.recommendation_engine.full_project_generator import rewrite_custom_sections
	state["waiting_for_abstract_update"] = False

	rewritten = rewrite_custom_sections(
	features=state.get("features", []),
	abstract=state.get("abstract", "") if state.get("custom_abstract") else "",
	description=state.get("description", "") if state.get("custom_description") else ""
	)

	if state.get("custom_abstract") and rewritten.get("abstract"):
	state["abstract"] = rewritten["abstract"]
	if state.get("custom_description") and rewritten.get("description"):
	state["description"] = rewritten["description"]

	save_user_memory(user_id, {"history": history, "state": state})
	return finalize_response(
	user_input,
	"✅ Done! I've upgraded your custom abstract and description with the new features while keeping your original style intact.\n\nType '2' to generate and view your newly upgraded full project!",
	history,
	state,
	user_id
	)

	state["waiting_for_project_idea_confirm"] = False
	state["waiting_for_title_confirmation"] = False
	save_user_memory(user_id, {"history": history, "state": state})
	return finalize_response(user_input, "Great! Confirmed. Let's move on.", history, state, user_id)

	elif action == "confirmation_no":
	if state.get("waiting_for_abstract_update"):
	state["waiting_for_abstract_update"] = False
	save_user_memory(user_id, {"history": history, "state": state})
	return finalize_response(
	user_input,
	"👍 Got it! I will leave your custom abstract and description exactly as you wrote them.\n\nType '2' whenever you're ready to view the full project.",
	history,
	state,
	user_id
	)

	state["waiting_for_project_idea_confirm"] = False
	state["waiting_for_title_confirmation"] = False
	save_user_memory(user_id, {"history": history, "state": state})
	return finalize_response(user_input, "No problem! Let's try something else.", history, state, user_id)

	elif action == "clear_session":
	state = default_state()
	save_user_memory(user_id, {"history": history, "state": state})
	return finalize_response(
	user_input,
	"✅ Session cleared! We are starting fresh. How can I help you today?",
	history,
	state,
	user_id
	)

	else:
	return finalize_response(user_input, "I am not sure how to handle that.", history, state, user_id)