# File: backend/gemini_utils.py import os import json from fastapi import HTTPException from dotenv import load_dotenv, find_dotenv from google import genai from google.genai import types BASE_DIR = os.path.dirname(os.path.abspath(__file__)) ROOT_DIR = os.path.dirname(BASE_DIR) load_dotenv(os.path.join(BASE_DIR, '.env')) # Looks in backend/.env load_dotenv(os.path.join(ROOT_DIR, '.env')) # Looks in DocuSort/.env api_key = os.environ.get("GEMINI_API_KEY") client = None if api_key: client = genai.Client(api_key=api_key) else: print("⚠️ WARNING: GEMINI_API_KEY is STILL NOT FOUND. Check your .env file!") def process_natural_language_sort(files: list, user_prompt: str, existing_courses: list = None, existing_categories: list = None) -> dict: if not client: raise HTTPException(status_code=500, detail="Gemini API Key is missing on the server. Python cannot find your .env file.") file_list_input = [{"id": f.id, "name": f.name} for f in files] # --- REBALANCED TAXONOMY SYSTEM --- taxonomy_context = "" if existing_courses or existing_categories: taxonomy_context = "CONTEXT: The user has an existing folder structure:\n" if existing_courses: taxonomy_context += f"- Existing Parent Folders (Course Codes): {', '.join(existing_courses)}\n" if existing_categories: taxonomy_context += f"- Existing Categories: {', '.join(existing_categories)}\n\n" taxonomy_context += ( "ORGANIZATION BALANCE DIRECTIVES:\n" "1. Be smart and specific. Look at keywords inside the filenames to discover courses and categories.\n" "2. If a filename contains a unique subject indicator (like 'HCI', 'MTH', 'Accounting'), extract that subject as the new custom Parent Folder. Do not dump them into 'General' if a specific subject folder can be created!\n" "3. If a filename matches an existing course code folder in the context above, reuse it. But if it doesn't match, create a fresh, accurate custom Parent Folder or use 'General' only as a last resort.\n" "4. Match categories specifically (e.g., 'Lab', 'Assignment', 'Exam', 'Syllabus', 'Project'). Use existing names if they fit, or invent clean new ones if needed.\n\n" ) system_instruction = ( "You are an expert file sorting assistant. Your job is to organize an array of file names " "based strictly on user instructions and filename patterns.\n\n" + taxonomy_context + "CRITICAL: You must return a valid JSON object containing a list named 'sorted_files'. " "Each item in the list must have exactly two fields:\n" "- 'file_id': The integer ID of the file.\n" "- 'custom_path': A string representing the generated path STRICTLY in the format 'ParentFolder / Category'. " "(Example: 'HCI / Lab', 'CS-123 / Assignments', 'General / Taxes'). " "If a file should be ignored, set its custom_path to 'Unsorted'.\n\n" "Do not write any markdown code blocks, conversational text, or explanations. Return ONLY raw JSON." ) prompt = f"User Instructions: {user_prompt}\n\nFiles to process:\n{json.dumps(file_list_input)}" try: response = client.models.generate_content( model='gemini-2.5-flash', contents=prompt, config=types.GenerateContentConfig( response_mime_type="application/json", system_instruction=system_instruction, temperature=0.3 # Restored slight flexibility to ensure logical, specific folder structure generation ) ) return json.loads(response.text) except Exception as e: print(f"❌ Gemini Sort Generation Failed: {e}") raise HTTPException(status_code=500, detail=f"AI processing failed: {str(e)}")