Spaces:
Sleeping
Sleeping
| # File: backend/gemini_utils.py | |
| import os | |
| import json | |
| from fastapi import HTTPException | |
| from dotenv import load_dotenv, find_dotenv | |
| from google import genai | |
| from google.genai import types | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| ROOT_DIR = os.path.dirname(BASE_DIR) | |
| load_dotenv(os.path.join(BASE_DIR, '.env')) # Looks in backend/.env | |
| load_dotenv(os.path.join(ROOT_DIR, '.env')) # Looks in DocuSort/.env | |
| api_key = os.environ.get("GEMINI_API_KEY") | |
| client = None | |
| if api_key: | |
| client = genai.Client(api_key=api_key) | |
| else: | |
| print("⚠️ WARNING: GEMINI_API_KEY is STILL NOT FOUND. Check your .env file!") | |
| def process_natural_language_sort(files: list, user_prompt: str, existing_courses: list = None, existing_categories: list = None) -> dict: | |
| if not client: | |
| raise HTTPException(status_code=500, detail="Gemini API Key is missing on the server. Python cannot find your .env file.") | |
| file_list_input = [{"id": f.id, "name": f.name} for f in files] | |
| # --- REBALANCED TAXONOMY SYSTEM --- | |
| taxonomy_context = "" | |
| if existing_courses or existing_categories: | |
| taxonomy_context = "CONTEXT: The user has an existing folder structure:\n" | |
| if existing_courses: | |
| taxonomy_context += f"- Existing Parent Folders (Course Codes): {', '.join(existing_courses)}\n" | |
| if existing_categories: | |
| taxonomy_context += f"- Existing Categories: {', '.join(existing_categories)}\n\n" | |
| taxonomy_context += ( | |
| "ORGANIZATION BALANCE DIRECTIVES:\n" | |
| "1. Be smart and specific. Look at keywords inside the filenames to discover courses and categories.\n" | |
| "2. If a filename contains a unique subject indicator (like 'HCI', 'MTH', 'Accounting'), extract that subject as the new custom Parent Folder. Do not dump them into 'General' if a specific subject folder can be created!\n" | |
| "3. If a filename matches an existing course code folder in the context above, reuse it. But if it doesn't match, create a fresh, accurate custom Parent Folder or use 'General' only as a last resort.\n" | |
| "4. Match categories specifically (e.g., 'Lab', 'Assignment', 'Exam', 'Syllabus', 'Project'). Use existing names if they fit, or invent clean new ones if needed.\n\n" | |
| ) | |
| system_instruction = ( | |
| "You are an expert file sorting assistant. Your job is to organize an array of file names " | |
| "based strictly on user instructions and filename patterns.\n\n" | |
| + taxonomy_context + | |
| "CRITICAL: You must return a valid JSON object containing a list named 'sorted_files'. " | |
| "Each item in the list must have exactly two fields:\n" | |
| "- 'file_id': The integer ID of the file.\n" | |
| "- 'custom_path': A string representing the generated path STRICTLY in the format 'ParentFolder / Category'. " | |
| "(Example: 'HCI / Lab', 'CS-123 / Assignments', 'General / Taxes'). " | |
| "If a file should be ignored, set its custom_path to 'Unsorted'.\n\n" | |
| "Do not write any markdown code blocks, conversational text, or explanations. Return ONLY raw JSON." | |
| ) | |
| prompt = f"User Instructions: {user_prompt}\n\nFiles to process:\n{json.dumps(file_list_input)}" | |
| try: | |
| response = client.models.generate_content( | |
| model='gemini-2.5-flash', | |
| contents=prompt, | |
| config=types.GenerateContentConfig( | |
| response_mime_type="application/json", | |
| system_instruction=system_instruction, | |
| temperature=0.3 # Restored slight flexibility to ensure logical, specific folder structure generation | |
| ) | |
| ) | |
| return json.loads(response.text) | |
| except Exception as e: | |
| print(f"❌ Gemini Sort Generation Failed: {e}") | |
| raise HTTPException(status_code=500, detail=f"AI processing failed: {str(e)}") |