| import logging |
| from typing import Dict, Any, List, Set |
|
|
| from src.recommendation_engine.context_builder import build_domain_context |
| from src.recommendation_engine.prompt_builder import build_idea_prompt |
| from src.recommendation_engine.llm_client import generate_text |
| from src.recommendation_engine.validator import validate_generated_list |
|
|
| from src.similarity_model import compare_two_ideas, load_metadata |
| from src.recommendation_engine.novelty_checker import is_idea_novel |
| from src.recommendation_engine.config import DEFAULT_IDEA_COUNT |
|
|
| logger = logging.getLogger(__name__) |
|
|
| MAX_RETRIES = 5 |
| SIMILARITY_THRESHOLD_LOCAL = 0.60 |
|
|
| def normalize_idea(text: str) -> str: |
| return " ".join(text.lower().strip().split()) |
|
|
| def clean_ideas(ideas: List[str]) -> List[str]: |
| return [i.strip() for i in ideas if 2 <= len(i.split()) <= 8] |
|
|
| def is_duplicate_local(idea: str, existing: Set[str]) -> bool: |
| for old in existing: |
| if compare_two_ideas(idea, old) >= SIMILARITY_THRESHOLD_LOCAL: |
| return True |
| return False |
|
|
| def fallback_by_domain(domain: str) -> List[str]: |
| import random |
|
|
| domain = (domain or "general").lower() |
|
|
| fallback_map = { |
| "education": [ |
| "AI adaptive learning system", |
| "Student performance prediction platform", |
| "Gamified learning mobile application", |
| "Automated grading system", |
| "Virtual classroom engagement analyzer", |
| "AI-powered language tutor for kids", |
| "Adaptive curriculum recommendation engine", |
| "Dyslexia assistant mobile application" |
| ], |
| "healthcare": [ |
| "AI disease prediction system", |
| "Smart patient monitoring system", |
| "Medical diagnosis assistant", |
| "IoT health tracking device", |
| "Hospital resource optimization system", |
| "Wearable fall detection for elderly", |
| "Computer vision surgery tools tracker", |
| "Smart appointment booking and triage chat" |
| ], |
| "fintech": [ |
| "Fraud detection AI system", |
| "Smart expense tracking app", |
| "Blockchain payment system", |
| "Credit risk prediction model", |
| "AI investment advisor", |
| "Automated personal budgeting assistant", |
| "Cryptocurrency portfolio analyzer", |
| "AI invoice processing portal" |
| ], |
| "general": [ |
| "AI-powered agricultural crop disease detection mobile app", |
| "Blockchain-secured digital academic certificate verification portal", |
| "AR-based indoor navigation assistant for visually impaired students", |
| "Smart waste sorting bin using computer vision and mechanical sorting", |
| "Decentralized federated learning for collaborative medical image classification", |
| "Automated license plate recognition and parking management system", |
| "AI-driven smart library book recommendation portal", |
| "Smart home energy management and load prediction system", |
| "Real-time object detection for warehouse inventory tracking", |
| "Autonomous delivery robot path planning simulation" |
| ] |
| } |
|
|
| ideas = fallback_map.get(domain) or fallback_map.get("general") |
| shuffled_ideas = list(ideas) |
| random.shuffle(shuffled_ideas) |
| return shuffled_ideas |
|
|
| def dynamic_fallback_ideas( |
| domain: str, |
| count: int, |
| exclude_set: Set[str], |
| previous_ideas: List[str] |
| ) -> List[str]: |
| logger.info(f"Generating dynamic fallback ideas for domain={domain}") |
| prompt = f""" |
| You are an advanced AI research and innovation consultant. |
| The standard project generator has failed to find unique ideas for the selected domain because of strict originality rules. |
| |
| DOMAIN: |
| {domain} |
| |
| EXISTING CONTEXT: |
| Please suggest {count} highly creative, unique, yet practical graduation project ideas that do not exist in standard databases. |
| Avoid any standard topics (like simple prediction dashboards, basic chatbots, or generic automation). |
| Think of combining different domains to create implementable software/hardware systems (e.g., mobile health tracking + edge-inference, edge computing + precision agriculture, secure e-commerce + decentralized identity). |
| Ensure the ideas are completely feasible, realistic, and implementable within 6–9 months by a team of undergraduate students. Do NOT suggest highly theoretical, PhD-level, or impossible/sci-fi research topics (no quantum computing algorithms, non-existent sensors, etc.). |
| |
| PREVIOUS IDEAS TO AVOID: |
| {", ".join(exclude_set | set(previous_ideas))} |
| |
| FORMAT: |
| - One idea per line |
| - No numbering |
| - No explanations |
| - Keep them concise (4-12 words) |
| """ |
| raw_text = generate_text(prompt, task="idea", temperature=0.95) |
| if not raw_text: |
| return [] |
| generated = validate_generated_list(text=raw_text, top_k=count) |
| generated = clean_ideas(generated) |
| |
| valid_fallbacks = [] |
| for idea in generated: |
| if not is_idea_novel(idea): |
| continue |
| valid_fallbacks.append(idea) |
| return valid_fallbacks |
|
|
| def generate_ideas( |
| domain: str, |
| top_k: int = DEFAULT_IDEA_COUNT, |
| previous_generated_ideas: List[str] = None |
| ) -> Dict[str, Any]: |
|
|
| if previous_generated_ideas is None: |
| previous_generated_ideas = [] |
|
|
| top_k = max(1, min(top_k, 20)) |
| domain = domain or "general" |
|
|
| logger.info(f"Starting idea generation | domain={domain} | top_k={top_k}") |
|
|
| context = build_domain_context(domain) |
|
|
| final_ideas: List[str] = [] |
| final_set: Set[str] = set() |
| previous_norm = set(normalize_idea(i) for i in previous_generated_ideas) |
|
|
| all_generated: List[str] = [] |
| attempts = 0 |
|
|
| |
| |
| |
| while len(final_ideas) < top_k and attempts < MAX_RETRIES: |
|
|
| attempts += 1 |
| logger.info(f"Attempt #{attempts}") |
|
|
| generation_count = max(top_k * 3, 12) |
|
|
| prompt = build_idea_prompt( |
| context=context, |
| count=generation_count, |
| previous_ideas=previous_generated_ideas |
| ) |
|
|
| raw_text = generate_text(prompt, task="idea") |
|
|
| if not raw_text: |
| logger.warning("Empty LLM response") |
| continue |
|
|
| generated = validate_generated_list( |
| text=raw_text, |
| top_k=generation_count |
| ) |
|
|
| generated = clean_ideas(generated) |
|
|
| logger.info(f"Generated {len(generated)} ideas") |
| all_generated.extend(generated) |
|
|
| |
| |
| |
| for idea in generated: |
|
|
| normalized = normalize_idea(idea) |
|
|
| if not normalized: |
| continue |
|
|
| |
| if not is_idea_novel(idea): |
| logger.info(f"[SKIP DATASET SIMILAR] {idea}") |
| continue |
|
|
| |
| if normalized in final_set: |
| continue |
|
|
| |
| if normalized in previous_norm: |
| logger.info(f"[SKIP PREVIOUS] {idea}") |
| continue |
|
|
| |
| skip_similar_prev = False |
| for old in previous_generated_ideas: |
| if compare_two_ideas(idea, old) >= 0.85: |
| logger.info(f"[SKIP SIMILAR PREVIOUS] {idea}") |
| skip_similar_prev = True |
| break |
|
|
| if skip_similar_prev: |
| continue |
|
|
| |
| if is_duplicate_local(idea, final_set): |
| logger.info(f"[SKIP SIMILAR] {idea}") |
| continue |
|
|
| |
| logger.info(f"[NEW IDEA] {idea}") |
|
|
| final_ideas.append(idea) |
| final_set.add(normalized) |
|
|
| if len(final_ideas) >= top_k: |
| break |
|
|
| |
| |
| |
| if len(final_ideas) < top_k: |
|
|
| logger.warning("Using dynamic fallback ideas") |
| needed = top_k - len(final_ideas) |
| dynamic_fallback = dynamic_fallback_ideas( |
| domain=domain, |
| count=needed * 2, |
| exclude_set=final_set, |
| previous_ideas=previous_generated_ideas |
| ) |
|
|
| for f in dynamic_fallback: |
| normalized = normalize_idea(f) |
| if normalized not in final_set: |
| final_ideas.append(f) |
| final_set.add(normalized) |
| if len(final_ideas) >= top_k: |
| break |
|
|
| if len(final_ideas) < top_k: |
|
|
| logger.warning("Using static fallback ideas") |
|
|
| fallback = fallback_by_domain(domain) |
|
|
| for f in fallback: |
|
|
| normalized = normalize_idea(f) |
|
|
| if normalized not in final_set: |
| final_ideas.append(f) |
| final_set.add(normalized) |
|
|
| if len(final_ideas) >= top_k: |
| break |
|
|
| logger.info(f"Final ideas: {final_ideas}") |
|
|
| return { |
| "domain": domain, |
| "generated_ideas": all_generated, |
| "final_ideas": final_ideas |
| } |
|
|