Spaces:

CosmoLABHub
/

AkomptaBackend

Sleeping

File size: 9,124 Bytes

ef287e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e138bb
ef287e1
26c0cd1
ef287e1
 
 
 
26c0cd1
 
 
 
ef287e1
 
 
 
 
 
 
26c0cd1
 
 
 
 
ef287e1
 
 
 
 
26c0cd1
ef287e1
 
26c0cd1
ef287e1
 
 
 
26c0cd1
ef287e1
26c0cd1
 
 
 
ef287e1
 
26c0cd1
ef287e1
 
 
26c0cd1
ef287e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e138bb
ef287e1
 
 
 
 
 
 
 
 
4e138bb
26c0cd1
4e138bb
 
 
26c0cd1
 
 
 
4e138bb
 
 
 
 
 
 
 
26c0cd1
 
 
 
 
 
4e138bb
 
 
 
 
 
26c0cd1
4e138bb
 
26c0cd1
4e138bb
 
 
 
26c0cd1
4e138bb
26c0cd1
 
 
 
4e138bb
 
26c0cd1
4e138bb
 
26c0cd1
4e138bb
 
 
 
 
 
26c0cd1
 
4e138bb
 
26c0cd1
4e138bb
 
 
26c0cd1
4e138bb

import os
import json
from pathlib import Path
from google import genai
from google.genai import types
from django.conf import settings

class GeminiService:
    def __init__(self):
        # Try to load from environment first
        self.api_key = os.environ.get("GEMINI_API_KEY")
        
        # If not in environment, try loading from .env file directly
        if not self.api_key:
            env_path = Path(__file__).resolve().parent.parent / '.env'
            if env_path.exists():
                with open(env_path, 'r') as f:
                    for line in f:
                        line = line.strip()
                        if line.startswith('GEMINI_API_KEY='):
                            self.api_key = line.split('=', 1)[1].strip()
                            break
        
        if not self.api_key or self.api_key == 'your-gemini-api-key-here':
            raise ValueError("GEMINI_API_KEY not found or invalid. Please set it in backend/.env file")
        
        self.client = genai.Client(api_key=self.api_key)
        # Use free tier model with generous quotas
        self.model = "gemini-flash-latest"  # Free tier: 15 RPM, 1M tokens/day

    def process_voice_command(self, audio_bytes, mime_type="audio/mp3", context_products=None):
        """
        Process audio bytes to extract transaction details.
        Returns a dictionary with transcription and structured data.
        """
        if context_products is None:
            context_products = []

        prompt = f"""
        You are an AI assistant for a financial app called Akompta.
        Your task is to listen to the user's voice command and extract transaction details.
        
        The user might say things like:
        - "J'ai vendu la tomate pour 500FCFA le Kilo" (Income)
        - "J'ai payé un ordinateur à 300000FCFA" (Expense)
        
        Here is the list of products currently in the user's inventory:
        {json.dumps(context_products)}
        
        If the user mentions a product from this list but doesn't specify the price, use the price from the list to calculate the total amount (amount = quantity * price).
        
        Please perform the following:
        1. Transcribe the audio exactly as spoken (in French).
        2. Analyze the intent and extract structured data.
        
        Return ONLY a JSON object with the following structure:
        {{
            "transcription": "The exact transcription",
            "intent": "create_transaction",
            "data": {{
                "type": "income" or "expense",
                "amount": number (e.g. 500),
                "currency": "FCFA" or other,
                "category": "Category name (e.g. Vente, Alimentation, Transport, Technologie)",
                "name": "Description of the item or service (e.g. 'Vente de 3 Mangues')",
                "date": "YYYY-MM-DD" or null if not specified (assume today if null)
            }}
        }}
        
        Important: If a product price is found in the inventory list, ALWAYS calculate: amount = quantity * unit_price.
        
        If the audio is not clear or not related to a transaction, return:
        {{
            "transcription": "...",
            "intent": "unknown",
            "error": "Reason"
        }}
        """

        try:
            response = self.client.models.generate_content(
                model=self.model,
                contents=[
                    types.Content(
                        parts=[
                            types.Part.from_bytes(data=audio_bytes, mime_type=mime_type),
                            types.Part.from_text(text=prompt)
                        ]
                    )
                ],
                config=types.GenerateContentConfig(
                    response_mime_type="application/json"
                )
            )
            
            result = json.loads(response.text)
            print(f"Gemini AI Voice Result: {result}")
            return result

        except Exception as e:
            print(f"Error calling Gemini: {e}")
            return {
                "transcription": "",
                "intent": "error",
                "error": str(e)
            }

    def process_text_command(self, text, context_products=None):
        """
        Process text input to extract transaction details.
        """
        if context_products is None:
            context_products = []

        prompt = f"""
        You are an AI assistant for a financial app called Akompta.
        Your task is to analyze the user's text command and extract transaction details.
        
        The user might say things like:
        - "J'ai vendu la tomate pour 500FCFA le Kilo" (Income)
        - "J'ai payé un ordinateur à 300000FCFA" (Expense)
        - "Ajoute un produit Tomate à 200FCFA le bol, j'en ai 30 en stock" (Create Product)
        
        Here is the list of products currently in the user's inventory:
        {json.dumps(context_products)}
        
        If the user mentions a product from this list but doesn't specify the price, use the price from the list to calculate the total amount (amount = quantity * price).
        Example: If "Mangue" is in the list at 100 FCFA and the user says "vendu 3 mangues", the amount should be 300.
        
        Please perform the following:
        1. Analyze the intent and extract structured data.
        
        Return ONLY a JSON object with the following structure:
        
        For transactions:
        {{
            "transcription": "The input text",
            "intent": "create_transaction",
            "data": {{
                "type": "income" or "expense",
                "amount": number,
                "currency": "FCFA",
                "category": "Category name",
                "name": "Description (e.g. 'Vente de 3 Mangues')",
                "date": "YYYY-MM-DD"
            }}
        }}
        
        Important: If a product price is found in the inventory list, ALWAYS calculate: amount = quantity * unit_price.
        
        For products/inventory:
        {{
            "transcription": "The input text",
            "intent": "create_product",
            "data": {{
                "name": "Product name",
                "price": number,
                "unit": "Unit (e.g. bol, kg, unit)",
                "description": "Short description",
                "category": "vente" or "depense" or "stock",
                "stock_status": "ok" or "low" or "rupture"
            }}
        }}
        
        If the text is not clear, return:
        {{
            "transcription": "...",
            "intent": "unknown",
            "error": "Reason"
        }}
        """

        try:
            response = self.client.models.generate_content(
                model=self.model,
                contents=[
                    types.Content(
                        parts=[
                            types.Part.from_text(text=f"User command: {text}"),
                            types.Part.from_text(text=prompt)
                        ]
                    )
                ],
                config=types.GenerateContentConfig(
                    response_mime_type="application/json"
                )
            )
            
            result = json.loads(response.text)
            print(f"Gemini AI Result: {result}")
            # Ensure transcription is the input text if not provided by AI
            if not result.get('transcription'):
                result['transcription'] = text
            return result

        except Exception as e:
            print(f"Error calling Gemini: {e}")
            return {
                "transcription": text,
                "intent": "error",
                "error": str(e)
            }

    def process_insights(self, context_data):
        """
        Generate financial insights based on context data.
        """
        prompt = f"""
        Tu es un analyste financier expert pour l'application Akompta.
        Analyse les données suivantes (JSON) :
        {json.dumps(context_data)}

        Génère exactement 3 insights courts et percutants (max 1 phrase chacun) en Français :
        1. Une observation sur les ventes ou revenus.
        2. Une observation sur les dépenses.
        3. Une alerte sur le stock ou une recommandation.

        Format de réponse attendu : Une liste simple de 3 phrases séparées par des sauts de ligne. Pas de markdown complexe, pas de titres.
        """

        try:
            response = self.client.models.generate_content(
                model=self.model,
                contents=prompt
            )
            
            text = response.text or ""
            items = [line.strip() for line in text.split('\n') if line.strip()]
            return items[:3]

        except Exception as e:
            print(f"Error calling Gemini for insights: {e}")
            return [
                "Analyse des ventes en cours...",
                "Vérification des stocks...",
                "Calcul des marges..."
            ]