updated app.py incldued phi2
Browse files- app.py +76 -122
- core/__init__.py +0 -0
- core/chunking.py +46 -0
- core/vector_store.py +38 -0
- requirements.txt +7 -4
app.py
CHANGED
|
@@ -1,181 +1,135 @@
|
|
| 1 |
-
#
|
| 2 |
-
# ---------------- Universal Data AI ----------------
|
| 3 |
-
#
|
| 4 |
-
# Final app.py script (v3) with robust FAISS I/O
|
| 5 |
-
# Corrects previous serialization errors.
|
| 6 |
-
#
|
| 7 |
-
# Last updated: August 8, 2025
|
| 8 |
-
#
|
| 9 |
|
| 10 |
import logging
|
| 11 |
import uuid
|
| 12 |
-
import io
|
| 13 |
-
|
| 14 |
-
# FastAPI & Pydantic
|
| 15 |
from fastapi import FastAPI, UploadFile, File, HTTPException
|
| 16 |
from fastapi.middleware.cors import CORSMiddleware
|
| 17 |
from pydantic import BaseModel
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
from PIL import Image
|
| 22 |
import pytesseract
|
| 23 |
-
|
| 24 |
-
# AI & Search Libraries
|
| 25 |
-
import numpy as np
|
| 26 |
-
import faiss
|
| 27 |
from sentence_transformers import SentenceTransformer
|
| 28 |
-
from
|
| 29 |
|
| 30 |
# --- 1. INITIAL SETUP & MODEL LOADING ---
|
| 31 |
|
| 32 |
logging.basicConfig(level=logging.INFO)
|
| 33 |
logger = logging.getLogger(__name__)
|
| 34 |
|
| 35 |
-
app = FastAPI(
|
| 36 |
-
title="Universal Data AI",
|
| 37 |
-
description="Ephemeral data analysis tool with in-memory vector search.",
|
| 38 |
-
version="1.0.1", # Version bump
|
| 39 |
-
)
|
| 40 |
|
| 41 |
app.add_middleware(
|
| 42 |
CORSMiddleware,
|
| 43 |
-
allow_origins=["*"],
|
| 44 |
-
allow_credentials=True,
|
| 45 |
-
allow_methods=["*"],
|
| 46 |
-
allow_headers=["*"],
|
| 47 |
)
|
| 48 |
|
|
|
|
| 49 |
try:
|
| 50 |
logger.info("Loading AI models...")
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
logger.info("AI models loaded successfully.")
|
| 54 |
except Exception as e:
|
| 55 |
logger.critical(f"Fatal error: Could not load AI models. {e}")
|
| 56 |
embedding_model = None
|
| 57 |
-
|
| 58 |
|
| 59 |
SESSION_DATA = {}
|
| 60 |
|
| 61 |
# --- 2. DATA MODELS ---
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
-
|
| 64 |
-
question: str
|
| 65 |
-
|
| 66 |
-
class UploadResponse(BaseModel):
|
| 67 |
-
session_id: str
|
| 68 |
-
filename: str
|
| 69 |
-
chunks_created: int
|
| 70 |
-
|
| 71 |
-
class QueryResponse(BaseModel):
|
| 72 |
-
answer: str
|
| 73 |
-
score: float
|
| 74 |
-
context: str
|
| 75 |
-
|
| 76 |
-
# --- 3. HELPER FUNCTIONS ---
|
| 77 |
-
|
| 78 |
def parse_pdf(content: bytes) -> str:
|
| 79 |
-
doc = fitz.open(stream=content, filetype="pdf")
|
| 80 |
-
return "".join(page.get_text() for page in doc)
|
| 81 |
-
|
| 82 |
def parse_image(content: bytes) -> str:
|
| 83 |
-
image = Image.open(io.BytesIO(content))
|
| 84 |
-
return pytesseract.image_to_string(image)
|
| 85 |
-
|
| 86 |
-
def chunk_text(text: str, chunk_size: int = 256, overlap: int = 32) -> list[str]:
|
| 87 |
-
words = text.split()
|
| 88 |
-
if not words: return []
|
| 89 |
-
return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size - overlap)]
|
| 90 |
-
|
| 91 |
-
# --- THIS FUNCTION IS CORRECTED ---
|
| 92 |
-
def deserialize_index(serialized_index: bytes) -> faiss.Index:
|
| 93 |
-
"""
|
| 94 |
-
Loads a FAISS index from its byte representation using a robust method.
|
| 95 |
-
"""
|
| 96 |
-
try:
|
| 97 |
-
bio = io.BytesIO(serialized_index)
|
| 98 |
-
# Use PyCallbackIOReader to read from the in-memory binary stream
|
| 99 |
-
reader = faiss.PyCallbackIOReader(bio.read)
|
| 100 |
-
return faiss.read_index(reader)
|
| 101 |
-
except Exception as e:
|
| 102 |
-
logger.error(f"Failed to deserialize FAISS index: {e}")
|
| 103 |
-
raise
|
| 104 |
|
| 105 |
# --- 4. API ENDPOINTS ---
|
| 106 |
|
| 107 |
@app.get("/")
|
| 108 |
-
def read_root():
|
| 109 |
-
return {"status": "ok", "message": "Welcome to Universal Data AI"}
|
| 110 |
|
| 111 |
@app.post("/upload", response_model=UploadResponse)
|
| 112 |
async def upload_file(file: UploadFile = File(...)):
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
session_id = str(uuid.uuid4())
|
| 117 |
-
logger.info(f"Upload received for session {session_id}: {file.filename}")
|
| 118 |
content = await file.read()
|
| 119 |
-
|
| 120 |
content_type = file.content_type
|
| 121 |
if content_type == "application/pdf": text = parse_pdf(content)
|
| 122 |
elif content_type and content_type.startswith("image/"): text = parse_image(content)
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
if not
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
raise HTTPException(status_code=400, detail="Document too short to be processed.")
|
| 132 |
-
|
| 133 |
-
embeddings = embedding_model.encode(text_chunks, convert_to_numpy=True).astype('float32')
|
| 134 |
-
index = faiss.IndexFlatL2(embeddings.shape[1])
|
| 135 |
-
index.add(embeddings)
|
| 136 |
-
|
| 137 |
-
# --- THIS SECTION IS CORRECTED ---
|
| 138 |
-
try:
|
| 139 |
-
# Use PyCallbackIOWriter to write the index to an in-memory binary stream
|
| 140 |
-
bio = io.BytesIO()
|
| 141 |
-
writer = faiss.PyCallbackIOWriter(bio.write)
|
| 142 |
-
faiss.write_index(index, writer)
|
| 143 |
-
serialized_index = bio.getvalue()
|
| 144 |
-
except Exception as e:
|
| 145 |
-
logger.error(f"Failed to serialize FAISS index: {e}")
|
| 146 |
-
raise HTTPException(status_code=500, detail="Failed to create document index.")
|
| 147 |
-
|
| 148 |
-
SESSION_DATA[session_id] = {
|
| 149 |
-
"chunks": text_chunks,
|
| 150 |
-
"index": serialized_index, # Store the index as bytes
|
| 151 |
-
}
|
| 152 |
-
|
| 153 |
logger.info(f"Session {session_id} created with {len(text_chunks)} chunks.")
|
| 154 |
return {"session_id": session_id, "filename": file.filename, "chunks_created": len(text_chunks)}
|
| 155 |
|
| 156 |
@app.post("/query/{session_id}", response_model=QueryResponse)
|
| 157 |
async def query_session(session_id: str, request: QueryRequest):
|
| 158 |
-
|
|
|
|
| 159 |
raise HTTPException(status_code=503, detail="AI models are not available.")
|
| 160 |
-
|
| 161 |
session = SESSION_DATA.get(session_id)
|
| 162 |
if not session:
|
| 163 |
raise HTTPException(status_code=404, detail="Session not found.")
|
| 164 |
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
| 169 |
distances, indices = index.search(question_embedding, k)
|
| 170 |
-
|
| 171 |
-
relevant_chunks = [session["chunks"][i] for i in indices[0]]
|
| 172 |
-
context = " ".join(relevant_chunks)
|
| 173 |
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import logging
|
| 4 |
import uuid
|
| 5 |
+
import io
|
|
|
|
|
|
|
| 6 |
from fastapi import FastAPI, UploadFile, File, HTTPException
|
| 7 |
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
from pydantic import BaseModel
|
| 9 |
|
| 10 |
+
# Import from our core modules
|
| 11 |
+
from core.chunking import semantic_chunker
|
| 12 |
+
from core.vector_store import create_faiss_index, deserialize_faiss_index
|
| 13 |
+
|
| 14 |
+
# Parsing and AI libraries
|
| 15 |
+
import fitz
|
| 16 |
from PIL import Image
|
| 17 |
import pytesseract
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
from sentence_transformers import SentenceTransformer
|
| 19 |
+
from ctransformers import AutoModel # NEW: For running quantized GGUF models
|
| 20 |
|
| 21 |
# --- 1. INITIAL SETUP & MODEL LOADING ---
|
| 22 |
|
| 23 |
logging.basicConfig(level=logging.INFO)
|
| 24 |
logger = logging.getLogger(__name__)
|
| 25 |
|
| 26 |
+
app = FastAPI(title="Generative Universal Data AI", version="3.0.0")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
app.add_middleware(
|
| 29 |
CORSMiddleware,
|
| 30 |
+
allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
|
|
|
|
|
|
|
|
|
|
| 31 |
)
|
| 32 |
|
| 33 |
+
# --- Load Models ---
|
| 34 |
try:
|
| 35 |
logger.info("Loading AI models...")
|
| 36 |
+
# Model for creating vector embeddings (remains the same)
|
| 37 |
+
embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5')
|
| 38 |
+
|
| 39 |
+
# NEW: Loading the quantized Phi-2 model using ctransformers
|
| 40 |
+
# This downloads a GGUF model file, optimized for CPU inference.
|
| 41 |
+
# Q4_K_M is a good balance of quality and performance.
|
| 42 |
+
llm = AutoModel.from_pretrained(
|
| 43 |
+
"TheBloke/phi-2-GGUF",
|
| 44 |
+
model_file="phi-2.Q4_K_M.gguf"
|
| 45 |
+
)
|
| 46 |
logger.info("AI models loaded successfully.")
|
| 47 |
except Exception as e:
|
| 48 |
logger.critical(f"Fatal error: Could not load AI models. {e}")
|
| 49 |
embedding_model = None
|
| 50 |
+
llm = None
|
| 51 |
|
| 52 |
SESSION_DATA = {}
|
| 53 |
|
| 54 |
# --- 2. DATA MODELS ---
|
| 55 |
+
class QueryRequest(BaseModel): question: str
|
| 56 |
+
class UploadResponse(BaseModel): session_id: str; filename: str; chunks_created: int
|
| 57 |
+
# Modified response to reflect generative model output
|
| 58 |
+
class QueryResponse(BaseModel): answer: str; context: str
|
| 59 |
|
| 60 |
+
# --- 3. HELPER FUNCTIONS --- (No changes here)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
def parse_pdf(content: bytes) -> str:
|
| 62 |
+
doc = fitz.open(stream=content, filetype="pdf"); return "".join(page.get_text() for page in doc)
|
|
|
|
|
|
|
| 63 |
def parse_image(content: bytes) -> str:
|
| 64 |
+
image = Image.open(io.BytesIO(content)); return pytesseract.image_to_string(image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
# --- 4. API ENDPOINTS ---
|
| 67 |
|
| 68 |
@app.get("/")
|
| 69 |
+
def read_root(): return {"status": "ok", "message": "Welcome to the Generative Universal Data AI"}
|
|
|
|
| 70 |
|
| 71 |
@app.post("/upload", response_model=UploadResponse)
|
| 72 |
async def upload_file(file: UploadFile = File(...)):
|
| 73 |
+
# This endpoint remains largely the same, using the BGE model and semantic chunking
|
| 74 |
+
if not embedding_model: raise HTTPException(status_code=503, detail="Embedding model not available.")
|
| 75 |
+
# ... (the rest of the upload logic is identical to the previous version)
|
| 76 |
session_id = str(uuid.uuid4())
|
|
|
|
| 77 |
content = await file.read()
|
|
|
|
| 78 |
content_type = file.content_type
|
| 79 |
if content_type == "application/pdf": text = parse_pdf(content)
|
| 80 |
elif content_type and content_type.startswith("image/"): text = parse_image(content)
|
| 81 |
+
else: text = content.decode("utf-8")
|
| 82 |
+
if not text.strip(): raise HTTPException(status_code=400, detail="No text could be extracted.")
|
| 83 |
+
text_chunks = semantic_chunker(text, embedding_model)
|
| 84 |
+
if not text_chunks: raise HTTPException(status_code=400, detail="Document too short to be processed.")
|
| 85 |
+
embeddings = embedding_model.encode(text_chunks, convert_to_numpy=True)
|
| 86 |
+
serialized_index = create_faiss_index(embeddings)
|
| 87 |
+
if not serialized_index: raise HTTPException(status_code=500, detail="Failed to create document index.")
|
| 88 |
+
SESSION_DATA[session_id] = {"chunks": text_chunks, "index": serialized_index}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
logger.info(f"Session {session_id} created with {len(text_chunks)} chunks.")
|
| 90 |
return {"session_id": session_id, "filename": file.filename, "chunks_created": len(text_chunks)}
|
| 91 |
|
| 92 |
@app.post("/query/{session_id}", response_model=QueryResponse)
|
| 93 |
async def query_session(session_id: str, request: QueryRequest):
|
| 94 |
+
# --- THIS ENDPOINT IS COMPLETELY REWORKED FOR PHI-2 ---
|
| 95 |
+
if not llm or not embedding_model:
|
| 96 |
raise HTTPException(status_code=503, detail="AI models are not available.")
|
| 97 |
+
|
| 98 |
session = SESSION_DATA.get(session_id)
|
| 99 |
if not session:
|
| 100 |
raise HTTPException(status_code=404, detail="Session not found.")
|
| 101 |
|
| 102 |
+
# Step 1: Retrieve relevant context (same as before)
|
| 103 |
+
query_with_prefix = f"Represent this sentence for searching relevant passages: {request.question}"
|
| 104 |
+
question_embedding = embedding_model.encode([query_with_prefix], convert_to_numpy=True).astype('float32')
|
| 105 |
+
index = deserialize_faiss_index(session["index"])
|
| 106 |
+
if not index: raise HTTPException(status_code=500, detail="Could not load session index.")
|
| 107 |
+
k = min(5, index.ntotal)
|
| 108 |
distances, indices = index.search(question_embedding, k)
|
| 109 |
+
context = "\n".join([session["chunks"][i] for i in indices[0]])
|
|
|
|
|
|
|
| 110 |
|
| 111 |
+
# Step 2: Create a specific prompt for the generative model
|
| 112 |
+
# This template instructs the model on how to behave.
|
| 113 |
+
prompt = f"""
|
| 114 |
+
Instruct: Use the following context to answer the question accurately. If the answer is not present in the context, say "The answer is not available in the provided document."
|
| 115 |
|
| 116 |
+
Context:
|
| 117 |
+
{context}
|
| 118 |
+
|
| 119 |
+
Question: {request.question}
|
| 120 |
+
|
| 121 |
+
Answer:"""
|
| 122 |
+
|
| 123 |
+
logger.info("Generating answer with Phi-2...")
|
| 124 |
+
|
| 125 |
+
# Step 3: Generate the answer
|
| 126 |
+
answer = llm(
|
| 127 |
+
prompt,
|
| 128 |
+
max_new_tokens=256, # Max length of the answer
|
| 129 |
+
temperature=0.2, # Lower temperature for more factual answers
|
| 130 |
+
stop=["\n", "Instruct:", "Question:"] # Stop generation at these tokens
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
# Generative models don't give a confidence 'score' like extractive ones.
|
| 134 |
+
# We simply return the generated text.
|
| 135 |
+
return {"answer": answer.strip(), "context": context}
|
core/__init__.py
ADDED
|
File without changes
|
core/chunking.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# core/chunking.py
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
+
import logging
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
def semantic_chunker(text: str, model: SentenceTransformer, similarity_threshold: float = 0.55):
|
| 11 |
+
"""
|
| 12 |
+
Splits text into chunks based on semantic similarity of sentences.
|
| 13 |
+
"""
|
| 14 |
+
logger.info("Starting semantic chunking...")
|
| 15 |
+
# First, split the document into sentences. A simple split by newline and period.
|
| 16 |
+
sentences = [s.strip() for s in text.replace("\n", ". ").split(".") if s.strip()]
|
| 17 |
+
if not sentences:
|
| 18 |
+
return []
|
| 19 |
+
|
| 20 |
+
# Generate embeddings for each sentence
|
| 21 |
+
embeddings = model.encode(sentences, convert_to_numpy=True)
|
| 22 |
+
|
| 23 |
+
chunks = []
|
| 24 |
+
current_chunk_sentences = [sentences[0]]
|
| 25 |
+
|
| 26 |
+
for i in range(1, len(sentences)):
|
| 27 |
+
# Calculate similarity between the current sentence and the previous one
|
| 28 |
+
similarity = cosine_similarity(
|
| 29 |
+
embeddings[i].reshape(1, -1),
|
| 30 |
+
embeddings[i-1].reshape(1, -1)
|
| 31 |
+
)[0, 0]
|
| 32 |
+
|
| 33 |
+
# If similarity is below the threshold, it's a semantic break.
|
| 34 |
+
# Finalize the current chunk and start a new one.
|
| 35 |
+
if similarity < similarity_threshold:
|
| 36 |
+
chunks.append(" ".join(current_chunk_sentences))
|
| 37 |
+
current_chunk_sentences = []
|
| 38 |
+
|
| 39 |
+
current_chunk_sentences.append(sentences[i])
|
| 40 |
+
|
| 41 |
+
# Add the last remaining chunk
|
| 42 |
+
if current_chunk_sentences:
|
| 43 |
+
chunks.append(" ".join(current_chunk_sentences))
|
| 44 |
+
|
| 45 |
+
logger.info(f"Semantic chunking resulted in {len(chunks)} chunks.")
|
| 46 |
+
return chunks
|
core/vector_store.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# core/vector_store.py
|
| 2 |
+
|
| 3 |
+
import faiss
|
| 4 |
+
import io
|
| 5 |
+
import logging
|
| 6 |
+
from sentence_transformers import SentenceTransformer
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
def create_faiss_index(embeddings):
|
| 11 |
+
"""Creates a FAISS index from a list of embeddings."""
|
| 12 |
+
if embeddings is None or len(embeddings) == 0:
|
| 13 |
+
logger.warning("No embeddings provided to create FAISS index.")
|
| 14 |
+
return None
|
| 15 |
+
|
| 16 |
+
dimension = embeddings.shape[1]
|
| 17 |
+
index = faiss.IndexFlatL2(dimension)
|
| 18 |
+
index.add(embeddings.astype('float32'))
|
| 19 |
+
|
| 20 |
+
# Serialize the index to bytes for in-memory storage
|
| 21 |
+
try:
|
| 22 |
+
bio = io.BytesIO()
|
| 23 |
+
writer = faiss.PyCallbackIOWriter(bio.write)
|
| 24 |
+
faiss.write_index(index, writer)
|
| 25 |
+
return bio.getvalue()
|
| 26 |
+
except Exception as e:
|
| 27 |
+
logger.error(f"Failed to serialize FAISS index: {e}")
|
| 28 |
+
return None
|
| 29 |
+
|
| 30 |
+
def deserialize_faiss_index(index_bytes: bytes) -> faiss.Index:
|
| 31 |
+
"""Deserializes a FAISS index from bytes."""
|
| 32 |
+
try:
|
| 33 |
+
bio = io.BytesIO(index_bytes)
|
| 34 |
+
reader = faiss.PyCallbackIOReader(bio.read)
|
| 35 |
+
return faiss.read_index(reader)
|
| 36 |
+
except Exception as e:
|
| 37 |
+
logger.error(f"Failed to deserialize FAISS index: {e}")
|
| 38 |
+
return None
|
requirements.txt
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
|
|
| 1 |
fastapi
|
| 2 |
-
uvicorn
|
| 3 |
python-multipart
|
| 4 |
-
|
| 5 |
PyMuPDF
|
| 6 |
Pillow
|
| 7 |
pytesseract
|
| 8 |
-
|
| 9 |
sentence-transformers
|
| 10 |
-
faiss-cpu
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ctransformers>=0.2.27
|
| 2 |
fastapi
|
| 3 |
+
uvicorn
|
| 4 |
python-multipart
|
| 5 |
+
pydantic
|
| 6 |
PyMuPDF
|
| 7 |
Pillow
|
| 8 |
pytesseract
|
|
|
|
| 9 |
sentence-transformers
|
| 10 |
+
faiss-cpu
|
| 11 |
+
transformers
|
| 12 |
+
torch
|
| 13 |
+
scikit-learn
|