docusort-api / backend /gemini_utils.py
Mohib
Clean backend API push
8ddf321
Raw
History Blame Contribute Delete
3.87 kB
# File: backend/gemini_utils.py
import os
import json
from fastapi import HTTPException
from dotenv import load_dotenv, find_dotenv
from google import genai
from google.genai import types
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(BASE_DIR)
load_dotenv(os.path.join(BASE_DIR, '.env')) # Looks in backend/.env
load_dotenv(os.path.join(ROOT_DIR, '.env')) # Looks in DocuSort/.env
api_key = os.environ.get("GEMINI_API_KEY")
client = None
if api_key:
client = genai.Client(api_key=api_key)
else:
print("⚠️ WARNING: GEMINI_API_KEY is STILL NOT FOUND. Check your .env file!")
def process_natural_language_sort(files: list, user_prompt: str, existing_courses: list = None, existing_categories: list = None) -> dict:
if not client:
raise HTTPException(status_code=500, detail="Gemini API Key is missing on the server. Python cannot find your .env file.")
file_list_input = [{"id": f.id, "name": f.name} for f in files]
# --- REBALANCED TAXONOMY SYSTEM ---
taxonomy_context = ""
if existing_courses or existing_categories:
taxonomy_context = "CONTEXT: The user has an existing folder structure:\n"
if existing_courses:
taxonomy_context += f"- Existing Parent Folders (Course Codes): {', '.join(existing_courses)}\n"
if existing_categories:
taxonomy_context += f"- Existing Categories: {', '.join(existing_categories)}\n\n"
taxonomy_context += (
"ORGANIZATION BALANCE DIRECTIVES:\n"
"1. Be smart and specific. Look at keywords inside the filenames to discover courses and categories.\n"
"2. If a filename contains a unique subject indicator (like 'HCI', 'MTH', 'Accounting'), extract that subject as the new custom Parent Folder. Do not dump them into 'General' if a specific subject folder can be created!\n"
"3. If a filename matches an existing course code folder in the context above, reuse it. But if it doesn't match, create a fresh, accurate custom Parent Folder or use 'General' only as a last resort.\n"
"4. Match categories specifically (e.g., 'Lab', 'Assignment', 'Exam', 'Syllabus', 'Project'). Use existing names if they fit, or invent clean new ones if needed.\n\n"
)
system_instruction = (
"You are an expert file sorting assistant. Your job is to organize an array of file names "
"based strictly on user instructions and filename patterns.\n\n"
+ taxonomy_context +
"CRITICAL: You must return a valid JSON object containing a list named 'sorted_files'. "
"Each item in the list must have exactly two fields:\n"
"- 'file_id': The integer ID of the file.\n"
"- 'custom_path': A string representing the generated path STRICTLY in the format 'ParentFolder / Category'. "
"(Example: 'HCI / Lab', 'CS-123 / Assignments', 'General / Taxes'). "
"If a file should be ignored, set its custom_path to 'Unsorted'.\n\n"
"Do not write any markdown code blocks, conversational text, or explanations. Return ONLY raw JSON."
)
prompt = f"User Instructions: {user_prompt}\n\nFiles to process:\n{json.dumps(file_list_input)}"
try:
response = client.models.generate_content(
model='gemini-2.5-flash',
contents=prompt,
config=types.GenerateContentConfig(
response_mime_type="application/json",
system_instruction=system_instruction,
temperature=0.3 # Restored slight flexibility to ensure logical, specific folder structure generation
)
)
return json.loads(response.text)
except Exception as e:
print(f"❌ Gemini Sort Generation Failed: {e}")
raise HTTPException(status_code=500, detail=f"AI processing failed: {str(e)}")