File size: 4,116 Bytes

8223b74

import os
import json
import pytesseract
from PIL import Image
from pdf2image import convert_from_path
import docx
from datetime import datetime
from sqlalchemy.orm import Session
from app.db.models import Document
from app.agent import create_agent

# Desteklenen dosya türleri
SUPPORTED_CONTENT_TYPES = {
    'application/pdf': 'pdf',
    'image/jpeg': 'image',
    'image/png': 'image',
    'image/tiff': 'image',
    'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx'
}

# Dosya yükleme dizini
UPLOAD_DIR = os.path.join(os.getcwd(), 'uploads')
os.makedirs(UPLOAD_DIR, exist_ok=True)

def save_uploaded_file(file, filename):
    """Yüklenen dosyayı kaydet"""
    file_path = os.path.join(UPLOAD_DIR, filename)
    with open(file_path, "wb") as buffer:
        buffer.write(file.file.read())
    return file_path

def extract_text_from_pdf(file_path):
    """PDF dosyasından metin çıkar"""
    try:
        # PDF'i görüntülere dönüştür
        images = convert_from_path(file_path)
        text = ""
        
        # Her sayfadan metin çıkar
        for img in images:
            text += pytesseract.image_to_string(img, lang='tur') + "\n"
            
        return text
    except Exception as e:
        print(f"PDF işleme hatası: {str(e)}")
        return ""

def extract_text_from_image(file_path):
    """Görüntüden metin çıkar"""
    try:
        img = Image.open(file_path)
        text = pytesseract.image_to_string(img, lang='tur')
        return text
    except Exception as e:
        print(f"Görüntü işleme hatası: {str(e)}")
        return ""

def extract_text_from_docx(file_path):
    """DOCX dosyasından metin çıkar"""
    try:
        doc = docx.Document(file_path)
        text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
        return text
    except Exception as e:
        print(f"DOCX işleme hatası: {str(e)}")
        return ""

def process_document(file_path, content_type):
    """Belge türüne göre metin çıkar"""
    file_type = SUPPORTED_CONTENT_TYPES.get(content_type)
    
    if not file_type:
        return "Desteklenmeyen dosya türü"
    
    if file_type == 'pdf':
        return extract_text_from_pdf(file_path)
    elif file_type == 'image':
        return extract_text_from_image(file_path)
    elif file_type == 'docx':
        return extract_text_from_docx(file_path)
    
    return ""

def analyze_document_content(content, db: Session):
    """Belge içeriğini analiz et"""
    try:
        # Agent oluştur
        agent_executor = create_agent(db)
        
        # Analiz için prompt
        prompt = f"""Bu belgeyi analiz et ve aşağıdaki bilgileri çıkar:
        1. Belgedeki maliyet hesaplamalarıyla ilgili tüm bilgiler
        2. İşçilik maliyetleri
        3. Malzeme maliyetleri
        4. Kar marjı bilgileri
        5. Toplam maliyet
        
        Belge içeriği:
        {content}
        """
        
        # Agent'ı çalıştır
        result = agent_executor.invoke({"input": prompt})
        analysis = result["output"]
        
        # Analiz sonucunu JSON formatında döndür
        return json.dumps({
            "analysis": analysis,
            "analyzed_at": datetime.now().isoformat()
        }, ensure_ascii=False)
    except Exception as e:
        print(f"Analiz hatası: {str(e)}")
        return json.dumps({
            "error": f"Analiz sırasında hata oluştu: {str(e)}",
            "analyzed_at": datetime.now().isoformat()
        }, ensure_ascii=False)

def save_document_to_db(db: Session, filename, content_type, file_path, file_size, content_text=None, analysis_result=None):
    """Belge bilgilerini veritabanına kaydet"""
    document = Document(
        filename=filename,
        content_type=content_type,
        file_path=file_path,
        file_size=file_size,
        content_text=content_text,
        analysis_result=analysis_result
    )
    
    if analysis_result:
        document.analyzed_at = datetime.now()
    
    db.add(document)
    db.commit()
    db.refresh(document)
    return document