bat-6's picture
feat: implement full project generation module with automated documentation and prompt parsing
b09149c
Raw
History Blame Contribute Delete
21.5 kB
from src.recommendation_engine.memory_store import (
get_user_memory,
save_user_memory,
default_state
)
from src.recommendation_engine.llm_router import analyze_user_input
from src.recommendation_engine.command_handler import (
is_command,
handle_command
)
from src.recommendation_engine.idea_generator import generate_ideas
from src.recommendation_engine.feature_generator import generate_features
from src.recommendation_engine.llm_client import generate_text, generate_list
from src.recommendation_engine.prompt_builder import build_chat_prompt, build_niche_domains_prompt
from src.recommendation_engine.response_formatter import format_response
from src.recommendation_engine.state_manager import update_state
from src.recommendation_engine.context_builder import extract_domain, DOMAIN_KEYWORDS
from src.recommendation_engine.full_project_generator import (
generate_full_project
)
import re
# ─────────────────────────────────────────────
# Project Idea Validator + Categorizer
# ─────────────────────────────────────────────
def validate_and_categorize_project(title: str, abstract: str = "") -> dict:
"""
Uses Gemini to:
1. Verify whether the title is a valid graduation project idea.
2. Assign it to the best-matching domain from the known list.
Returns:
{
"is_valid": bool,
"domain": str | None,
"reason": str
}
"""
known_domains = [d for d in DOMAIN_KEYWORDS.keys() if d != "Others"]
domain_list_str = "\n".join(f"- {d}" for d in known_domains)
prompt = f"""
You are an expert academic advisor evaluating graduation project ideas.
Project Title: "{title}"
{"Abstract: " + abstract[:400] if abstract else ""}
Task 1 – Validity Check:
Is this a valid, feasible graduation project idea for a university student?
- It must be a technical or academic topic (not a random phrase, celebrity name, or nonsense)
- It should be specific enough to build something real
Answer: YES or NO
Task 2 – Domain Classification:
If valid, which ONE of the following domains best fits this project?
{domain_list_str}
Return your answer in this EXACT format (two lines only):
VALID: YES
DOMAIN: <domain name from the list above>
If invalid:
VALID: NO
DOMAIN: None
REASON: <one sentence why>
"""
try:
raw = generate_text(prompt, task="intent").strip()
lines = {line.split(":", 1)[0].strip().upper(): line.split(":", 1)[1].strip()
for line in raw.splitlines() if ":" in line}
is_valid = lines.get("VALID", "NO").upper() == "YES"
domain = lines.get("DOMAIN", "").strip()
reason = lines.get("REASON", "")
if domain == "None" or domain not in known_domains:
domain = None
return {"is_valid": is_valid, "domain": domain, "reason": reason}
except Exception:
return {"is_valid": True, "domain": None, "reason": ""}
def extract_number(text: str, default=5):
cleaned = str(text).strip()
if cleaned in ["1", "2"]:
return default
nums = re.findall(r"\d+", text)
return min(int(nums[0]), 20) if nums else default
def validate_and_format_domain(domain: str) -> str:
# 1. Quick local validation for standard domains
extracted = extract_domain(domain)
if extracted and extracted.lower() != "others":
return extracted
# 2. Fall back to LLM validation
prompt = f"""
Determine if the following domain/field is a valid academic, engineering, scientific, or technology domain suitable for a university graduation project (e.g., Computer Science, Engineering, Medicine, Business, Agriculture, Biology, etc.).
Also, correct any typos and format it cleanly (e.g., Title Case).
Domain to evaluate: "{domain}"
Rules:
- If it is a valid field of study, technology, or academic discipline (e.g., "artificial intelligence", "robotics", "bioinformatics", "educational games"), return ONLY the corrected and formatted domain name (e.g., "Artificial Intelligence").
- If it is unrelated to academic/technology graduation projects, or contains names of celebrities, sports teams, food, pop culture, or random questions (e.g., "messi", "fc barcelona", "pizza", "what is this"), return exactly "INVALID".
Return ONLY the formatted domain name or "INVALID". Do not include any other text.
"""
try:
res = generate_text(prompt, task="intent").strip()
if not res or res.upper() == "INVALID":
return ""
return res.strip('"').strip("'")
except Exception:
return ""
def is_weak_project_title(title: str) -> bool:
if not title:
return True
title = title.strip()
words = title.split()
if len(words) < 4:
return True
weak_words = {
"system",
"platform",
"app",
"website",
"application",
"project",
"ai",
"smart",
"tool"
}
meaningful = [
w.lower()
for w in words
if w.lower() not in weak_words
]
return len(words) < 3
def is_generic_project_reference(text: str) -> bool:
text = text.strip().lower()
generic_titles = {
"my project",
"this project",
"the project",
"my system",
"this system",
"my app",
"my application",
"my idea",
"project",
"system",
"app",
"idea"
}
return text in generic_titles
def looks_like_real_project_title(title: str) -> bool:
if not title:
return False
title = title.strip()
words = title.split()
if len(words) < 2:
return False
unique_ratio = len(set(words)) / len(words)
if unique_ratio < 0.5:
return False
nonsense_patterns = [
"asd",
"qwe",
"zxc",
"testtest",
"aaaa",
"xxxxx"
]
lowered = title.lower()
question_starts = (
"how ",
"what ",
"why ",
"when ",
"where ",
"can ",
"could ",
"should ",
"is ",
"are ",
"do ",
"does "
)
for qs in question_starts:
if lowered.startswith(qs):
return False
for p in nonsense_patterns:
if p in lowered:
return False
keywords = {
"management",
"analysis",
"detection",
"tracking",
"recognition",
"monitoring",
"security",
"attendance",
"automation",
"prediction",
"dashboard",
"diagnosis",
"learning",
"recommendation",
"classification",
"authentication",
"optimization",
"healthcare",
"fintech",
"education",
"library",
"hospital",
"school",
"medical",
"industrial",
"agriculture",
"transport",
"ai",
"iot",
"blockchain",
"cloud",
"robotics",
"vision",
"embedded",
"system",
"platform",
"application",
"app",
"website",
"portal",
"tool",
"game",
"generator",
"engine",
"software",
"database",
"model",
"chatbot",
"chat",
"assistant",
"network",
"api",
"mobile",
"web",
"smart"
}
if not any(
k in lowered
for k in keywords
):
return False
return True
FOLLOWUP_WORDS = [
"another",
"more",
"again",
"other ideas",
"more ideas",
"more features",
"another features"
]
def finalize_response(
user_input,
response,
history,
state,
user_id
):
history.append({
"role": "user",
"content": user_input
})
history.append({
"role": "assistant",
"content": response
})
history = history[-20:]
save_user_memory(user_id, {
"history": history,
"state": state
})
return response
def is_gibberish_text(text: str) -> bool:
text = text.strip().lower()
if text in {"1", "2", "3"}:
return False
if len(text) < 3:
allowed_short = {
"hi",
"hey",
"hello",
"ai",
"ml",
"ui",
"ux",
"vr",
"ar",
"iot",
"no",
"la",
"n",
"y",
"ok"
}
if text in allowed_short:
return False
return True
gibberish_patterns = [
"asd",
"qwe",
"zxc",
"aaa",
"bbb",
"ccc",
"xxx",
"testtest"
]
for p in gibberish_patterns:
if p in text:
return True
words = text.split()
if len(words) >= 3:
unique_ratio = len(set(words)) / len(words)
if unique_ratio < 0.5:
return True
return False
def is_project_related(text: str) -> bool:
text = text.lower().strip()
keywords = [
"project",
"system",
"platform",
"application",
"app",
"website",
"dashboard",
"management",
"ai",
"ml",
"machine learning",
"deep learning",
"computer vision",
"blockchain",
"iot",
"web",
"mobile",
"cloud",
"security",
"database",
"api",
"generate",
"feature",
"features",
"idea",
"ideas",
"improve",
"description",
"technologies",
"architecture",
"healthcare",
"education",
"fintech",
"smart",
"attendance",
"monitoring",
"tracking",
"analysis",
"recognition"
]
return any(
keyword in text
for keyword in keywords
)
def is_general_question_or_unrelated_chat(text: str) -> bool:
lowered = text.strip().lower()
# Ends with question mark
if lowered.endswith("?"):
return True
# Starts with common question words
question_starts = (
"how ", "what ", "why ", "when ", "where ", "can ", "could ", "should ",
"is ", "are ", "do ", "does ", "explain ", "tell me ", "show me ", "describe "
)
if lowered.startswith(question_starts):
return True
# Contains common question phrases
question_phrases = (
"what is", "what's", "tell me about", "can you", "could you", "how to", "how do"
)
if any(phrase in lowered for phrase in question_phrases):
return True
return False
def chatbot(user_id: str, user_input: str):
text = user_input.lower().strip()
if is_command(user_input):
return handle_command(user_input)
memory = get_user_memory(user_id)
history = memory.get("history", [])
state = memory.get("state") or default_state()
# The Orchestrator handles all context and validation
from src.recommendation_engine.llm_router import analyze_user_input
analysis = analyze_user_input(user_input, state)
action = analysis.get("action", "reply_directly")
reply_text = analysis.get("reply_text")
domain = analysis.get("domain")
project_title = analysis.get("project_title")
number = analysis.get("number")
abstract = analysis.get("abstract")
description = analysis.get("description")
if action == "reply_directly":
if project_title and not state.get("project_title"):
state["project_title"] = project_title
if domain and not state.get("domain"):
state["domain"] = domain
custom_saved = False
if abstract:
state["abstract"] = abstract
state["custom_abstract"] = True
custom_saved = True
if description:
state["description"] = description
state["custom_description"] = True
custom_saved = True
save_user_memory(user_id, {"history": history, "state": state})
final_reply = reply_text or "I didn't quite catch that. Can you clarify?"
if custom_saved:
final_reply = "βœ… I have saved your custom project details!\n\n" + final_reply
return finalize_response(
user_input,
final_reply,
history,
state,
user_id
)
elif action == "trigger_idea_generation":
if domain:
domain_lower = domain.lower()
if domain_lower in ["other", "others", "general", "any"]:
state["domain"] = "general"
state["waiting_for_domain"] = False
elif domain_lower in ["domain", "domains", "list", "options", "help"]:
state["domain"] = None
else:
state["domain"] = domain
state["waiting_for_domain"] = False
elif not any(w in user_input.lower() for w in FOLLOWUP_WORDS):
state["domain"] = None
if not state.get("domain"):
state["waiting_for_domain"] = True
save_user_memory(user_id, {"history": history, "state": state})
domain_list = "\n".join(f"- {d}" for d in DOMAIN_KEYWORDS.keys() if d != "Others")
response = (
f"Which domain is your project in? πŸ“š\n\n"
f"{domain_list}\n\n"
f"πŸ’‘ Just type one of the domains above (e.g. **AI** or **Healthcare**)\n"
f"If your domain isn't listed, type **Others** to see more options."
)
return finalize_response(user_input, response, history, state, user_id)
top_k = number or extract_number(user_input, 5)
all_past_ideas = state.get("all_generated_ideas", [])
if state.get("ideas"):
for i in state["ideas"]:
if i not in all_past_ideas:
all_past_ideas.append(i)
result = generate_ideas(
domain=state.get("domain"),
top_k=top_k,
previous_generated_ideas=all_past_ideas
)
ideas = result.get("final_ideas", [])
state["all_generated_ideas"] = all_past_ideas + ideas
state["ideas"] = ideas
state["last_action"] = "idea"
state["project_title"] = ""
state["features"] = []
state["all_generated_features"] = []
state["description"] = ""
state["abstract"] = ""
state["technologies"] = []
response = format_response("idea", "", state)
return finalize_response(user_input, response, history, state, user_id)
elif action == "trigger_feature_generation":
if project_title:
state["project_title"] = project_title
if not state.get("project_title"):
return finalize_response(
user_input,
"I need a project title to generate features! πŸ“\nJust type your project title.",
history,
state,
user_id
)
top_k = number or extract_number(user_input, 5)
all_past_features = state.get("all_generated_features", [])
if state.get("features"):
for f in state["features"]:
if f not in all_past_features:
all_past_features.append(f)
result = generate_features(
title=state.get("project_title"),
description=state.get("description", ""),
features=[],
previous_generated_features=all_past_features,
top_k=top_k
)
new_features = result.get("recommended_features", [])
state["all_generated_features"] = all_past_features + new_features
state["features"] = new_features
state["last_action"] = "feature"
response = format_response("feature", "", state)
if state.get("custom_abstract") or state.get("custom_description"):
state["waiting_for_abstract_update"] = True
response += "\n\n✨ **Would you like me to seamlessly weave these new features into your custom abstract and description? (Yes/No)**"
return finalize_response(user_input, response, history, state, user_id)
elif action == "trigger_full_project_generation":
if project_title:
state["project_title"] = project_title
if not state.get("features"):
feature_result = generate_features(
title=state.get("project_title"),
description=state.get("description", ""),
features=[],
previous_generated_features=[],
top_k=8
)
state["features"] = feature_result.get("recommended_features", [])
custom_desc = state.get("custom_description", False)
custom_abs = state.get("custom_abstract", False)
result = generate_full_project(
title=state.get("project_title"),
features=state.get("features", []),
description=state.get("description", "") if custom_desc else "",
abstract=state.get("abstract", "") if custom_abs else "",
custom_description=custom_desc,
custom_abstract=custom_abs
)
state = update_state(state, result, mode="merge")
if state.get("domain"):
state["category"] = state.get("domain")
response = f"""
πŸ“¦ Full Project Generated
πŸ“Œ Project Title:
{state.get("project_title")}
πŸ“‚ Category:
{state.get("category")}
πŸ›  Technologies:
{", ".join(state.get("technologies", []))}
πŸ“„ Abstract:
{state.get("abstract")}
πŸ“„ Detailed Description:
{state.get("description")}
❗ Problem Statement:
{state.get("problem_statement")}
πŸ’‘ Proposed Solution:
{state.get("proposed_solution")}
🎯 Objectives:
{chr(10).join("- " + x for x in state.get("objectives", []))}
━━━━━━━━━━━━━━━━━━━━━━
πŸ‘‰ What's next? You can say "improve features", or tell me to "replace abstract with..." your own custom text!
"""
return finalize_response(user_input, response, history, state, user_id)
elif action == "confirmation_yes":
if state.get("waiting_for_abstract_update"):
from src.recommendation_engine.full_project_generator import rewrite_custom_sections
state["waiting_for_abstract_update"] = False
rewritten = rewrite_custom_sections(
features=state.get("features", []),
abstract=state.get("abstract", "") if state.get("custom_abstract") else "",
description=state.get("description", "") if state.get("custom_description") else ""
)
if state.get("custom_abstract") and rewritten.get("abstract"):
state["abstract"] = rewritten["abstract"]
if state.get("custom_description") and rewritten.get("description"):
state["description"] = rewritten["description"]
save_user_memory(user_id, {"history": history, "state": state})
return finalize_response(
user_input,
"βœ… **Done!** I've upgraded your custom abstract and description with the new features while keeping your original style intact.\n\nType **'2'** to generate and view your newly upgraded full project!",
history,
state,
user_id
)
state["waiting_for_project_idea_confirm"] = False
state["waiting_for_title_confirmation"] = False
save_user_memory(user_id, {"history": history, "state": state})
return finalize_response(user_input, "Great! Confirmed. Let's move on.", history, state, user_id)
elif action == "confirmation_no":
if state.get("waiting_for_abstract_update"):
state["waiting_for_abstract_update"] = False
save_user_memory(user_id, {"history": history, "state": state})
return finalize_response(
user_input,
"πŸ‘ **Got it!** I will leave your custom abstract and description exactly as you wrote them.\n\nType **'2'** whenever you're ready to view the full project.",
history,
state,
user_id
)
state["waiting_for_project_idea_confirm"] = False
state["waiting_for_title_confirmation"] = False
save_user_memory(user_id, {"history": history, "state": state})
return finalize_response(user_input, "No problem! Let's try something else.", history, state, user_id)
elif action == "clear_session":
state = default_state()
save_user_memory(user_id, {"history": history, "state": state})
return finalize_response(
user_input,
"βœ… Session cleared! We are starting fresh. How can I help you today?",
history,
state,
user_id
)
else:
return finalize_response(user_input, "I am not sure how to handle that.", history, state, user_id)