Spaces:

Kalpokoch
/

OpenQuery

Sleeping

File size: 5,434 Bytes

9a000fe
bc8a612
e769917
bc8a612
9a000fe
bc8a612
 
 
 
9a000fe
 
 
 
 
 
6b9a057
 
bc8a612
6fca0b0
61e6651
 
 
 
 
bc8a612
 
e769917
 
 
bc8a612
61e6651
bc8a612
e769917
bc8a612
9a000fe
e769917
 
61e6651
d1af85f
61e6651
6fca0b0
61e6651
 
9a000fe
6fca0b0
 
 
 
 
 
9a000fe
6fca0b0
bc8a612
d1af85f
bc8a612
 
9a000fe
d1af85f
e769917
6b9a057
bc8a612
6fca0b0
 
 
 
 
 
 
 
 
 
 
bc8a612
61e6651
bc8a612
6fca0b0
 
61e6651
bc8a612
6fca0b0
 
bc8a612
 
 
 
6fca0b0
 
bc8a612
 
e769917
6fca0b0
 
61e6651
e769917
6b9a057
bc8a612
61e6651
6fca0b0
 
 
 
 
 
 
 
 
 
 
61e6651
9a000fe
6fca0b0
 
61e6651
9a000fe
 
6fca0b0
 
61e6651
9a000fe
bc8a612
 
 
 
 
9a000fe
bc8a612
9a000fe
bc8a612
 
 
 
9a000fe
 
61e6651
9a000fe
6fca0b0
 
61e6651
9a000fe
bc8a612
9a000fe
bc8a612
61e6651
 
 
9a000fe
 
 
61e6651
 
 
9a000fe
61e6651
9a000fe
 
 
61e6651
 
 
9a000fe
 
6fca0b0

# app.py

import logging
import uuid
import io
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

# Import from our core modules
from core.chunking import semantic_chunker
from core.vector_store import create_faiss_index, deserialize_faiss_index

# Parsing and AI libraries
import fitz
from PIL import Image
import pytesseract
from sentence_transformers import SentenceTransformer
from ctransformers import AutoModelForCausalLM  # ✅ FIXED import

# --- THIS IS THE FIX FOR TESSERACT ---
# Explicitly tell pytesseract where to find the Tesseract OCR engine.
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
# ------------------------------------

# --- 1. INITIAL SETUP & MODEL LOADING ---

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(title="Optimized Universal Data AI", version="3.1.0")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
)

# --- Load Optimized Models ---
try:
    logger.info("Loading optimized AI models...")

    # Using a smaller, but still powerful, BGE model
    embedding_model = SentenceTransformer('BAAI/bge-base-en-v1.5')
    
    # Load TinyLlama in GGUF format using ctransformers
    llm = AutoModelForCausalLM.from_pretrained(
        "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
        model_file="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
        model_type="llama",   # Tell ctransformers the model family
        gpu_layers=0          # For CPU-only environment
    )

    logger.info("AI models loaded successfully.")
except Exception as e:
    logger.critical(f"Fatal error: Could not load AI models. {e}")
    embedding_model = None
    llm = None

SESSION_DATA = {}

# --- 2. DATA MODELS ---
class QueryRequest(BaseModel):
    question: str

class UploadResponse(BaseModel):
    session_id: str
    filename: str
    chunks_created: int

class QueryResponse(BaseModel):
    answer: str
    context: str

# --- 3. HELPER FUNCTIONS ---
def parse_pdf(content: bytes) -> str:
    doc = fitz.open(stream=content, filetype="pdf")
    return "".join(page.get_text() for page in doc)

def parse_image(content: bytes) -> str:
    image = Image.open(io.BytesIO(content))
    return pytesseract.image_to_string(image)

# --- 4. API ENDPOINTS ---

@app.get("/")
def read_root():
    return {"status": "ok", "message": "Welcome to the Optimized Universal Data AI"}

@app.post("/upload", response_model=UploadResponse)
async def upload_file(file: UploadFile = File(...)):
    if not embedding_model:
        raise HTTPException(status_code=503, detail="Embedding model not available.")
    
    session_id = str(uuid.uuid4())
    content = await file.read()
    content_type = file.content_type
    
    if content_type == "application/pdf":
        text = parse_pdf(content)
    elif content_type and content_type.startswith("image/"):
        text = parse_image(content)
    elif file.filename.endswith(('.txt', '.md')):
        text = content.decode("utf-8")
    else:
        raise HTTPException(status_code=400, detail=f"Unsupported file type: {content_type}")

    if not text.strip():
        raise HTTPException(status_code=400, detail="No text could be extracted.")
    
    text_chunks = semantic_chunker(text, embedding_model)
    if not text_chunks:
        raise HTTPException(status_code=400, detail="Document too short to be processed.")
    
    embeddings = embedding_model.encode(text_chunks, convert_to_numpy=True)
    serialized_index = create_faiss_index(embeddings)
    if not serialized_index:
        raise HTTPException(status_code=500, detail="Failed to create document index.")
    
    SESSION_DATA[session_id] = {"chunks": text_chunks, "index": serialized_index}
    logger.info(f"Session {session_id} created with {len(text_chunks)} chunks.")
    return {"session_id": session_id, "filename": file.filename, "chunks_created": len(text_chunks)}

@app.post("/query/{session_id}", response_model=QueryResponse)
async def query_session(session_id: str, request: QueryRequest):
    if not llm or not embedding_model:
        raise HTTPException(status_code=503, detail="AI models are not available.")
        
    session = SESSION_DATA.get(session_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found.")
    
    query_with_prefix = f"Represent this sentence for searching relevant passages: {request.question}"
    question_embedding = embedding_model.encode([query_with_prefix], convert_to_numpy=True).astype('float32')
    
    index = deserialize_faiss_index(session["index"])
    if not index:
        raise HTTPException(status_code=500, detail="Could not load session index.")
    
    k = min(5, index.ntotal)
    distances, indices = index.search(question_embedding, k)
    context = "\n".join([session["chunks"][i] for i in indices[0]])

    # Correct prompt format for TinyLlama Chat
    prompt = f"""<|im_start|>user
Use the following context to answer the question.
Context:
{context}

Question: {request.question}<|im_end|>
<|im_start|>assistant
"""

    logger.info("Generating answer with TinyLlama...")
    
    answer = llm(
        prompt,
        max_new_tokens=256,
        temperature=0.3,
        stop=["<|im_end|>"]
    )
    
    return {"answer": answer.strip(), "context": context}