Spaces:

Sabithulla
/

alpha-core-ai

Build error

App Files Files Community

Sabithulla commited on Feb 23

Commit

2a72045

1 Parent(s): 6e4922a

Add FastAPI backend with Docker for HuggingFace Spaces

Browse files

Files changed (6) hide show

Dockerfile +29 -0
database.py +50 -0
main.py +127 -0
model_manager.py +192 -0
ocr_engine.py +30 -0
requirements.txt +9 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+FROM python:3.11-slim
+WORKDIR /app
+# Install system dependencies for llama-cpp and image processing
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    libopenblas-dev \
+    tesseract-ocr \
+    libtesseract-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create models directory
+RUN mkdir -p models
+# Expose port 7860 (HuggingFace Spaces default)
+EXPOSE 7860
+# Run Uvicorn on port 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--timeout-keep-alive", "75"]

database.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+from supabase import create_client, Client
+from dotenv import load_dotenv
+load_dotenv()
+class DatabaseManager:
+    def __init__(self):
+        url = os.environ.get("SUPABASE_URL")
+        key = os.environ.get("SUPABASE_KEY")
+        if url and key:
+            self.supabase: Client = create_client(url, key)
+        else:
+            self.supabase = None
+            print("Warning: Supabase credentials missing. Database functionality will be disabled.")
+    def store_message(self, user_id: str, role: str, content: str, model_used: str):
+        if not self.supabase:
+            return None
+        data = {
+            "user_id": user_id,
+            "role": role,
+            "content": content,
+            "model_used": model_used
+        }
+        return self.supabase.table("messages").insert(data).execute()
+    def get_history(self, user_id: str):
+        if not self.supabase:
+            return []
+        # History is fetched from the last 24 hours
+        return self.supabase.table("messages")\
+            .select("*")\
+            .eq("user_id", user_id)\
+            .order("created_at", desc=False)\
+            .execute()
+    def cleanup_old_messages(self):
+        if not self.supabase:
+            return None
+        # This can be called by a cron job
+        # In SQL: DELETE FROM messages WHERE created_at < NOW() - INTERVAL '1 day';
+        # We can trigger an RPC or just use a raw delete if Supabase client allows it
+        # Here we'll just mock it or provide instructions for Supabase edge functions
+        pass
+db_manager = DatabaseManager()

main.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from fastapi import FastAPI, UploadFile, File, Body, HTTPException, Request
+from fastapi.responses import StreamingResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import uvicorn
+import os
+import json
+import sys
+from dotenv import load_dotenv
+from typing import Optional, List
+import logging
+from model_manager import model_manager
+from ocr_engine import ocr_engine
+from database import db_manager
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    stream=sys.stdout
+)
+logger = logging.getLogger(__name__)
+load_dotenv()
+app = FastAPI(title="AI Platform API")
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=[
+        "https://frontend-one-gamma-14.vercel.app",
+        "http://localhost:3000",  # For local development
+        "http://localhost:8000"
+    ],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/")
+async def root():
+    return {
+        "name": "Alpha Core AI API",
+        "version": "1.0.0",
+        "status": "online",
+        "endpoints": {
+            "health": "/health",
+            "chat": "/chat",
+            "upload": "/upload-image",
+            "cleanup": "/cleanup"
+        }
+    }
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "version": "1.0.0"}
+class ChatRequest(BaseModel):
+    message: str
+    model: str = "tinyllama"
+    user_id: str = "default_user"
+    context: Optional[List[dict]] = None
+    temperature: Optional[float] = 0.7
+    top_p: Optional[float] = 0.95
+    max_tokens: Optional[int] = 2048
+    repeat_penalty: Optional[float] = 1.1
+@app.post("/chat")
+async def chat_endpoint(request: ChatRequest):
+    try:
+        logger.info(f"Chat request: model={request.model}, user={request.user_id}")
+        def stream_response():
+            full_response = ""
+            try:
+                # Pass context and settings to model manager for memory
+                params = {
+                    "temperature": request.temperature,
+                    "top_p": request.top_p,
+                    "max_tokens": request.max_tokens,
+                    "repeat_penalty": request.repeat_penalty
+                }
+                for token in model_manager.generate_stream(request.model, request.message, request.context, **params):
+                    full_response += token
+                    yield f"data: {json.dumps({'token': token})}\n\n"
+                logger.info(f"Response generated: {len(full_response)} tokens")
+                # Final output and DB storage
+                db_manager.store_message(request.user_id, request.message, "user", request.model)
+                db_manager.store_message(request.user_id, full_response, "assistant", request.model)
+                yield "data: [DONE]\n\n"
+            except Exception as e:
+                logger.error(f"Stream error: {str(e)}", exc_info=True)
+                yield f"data: {json.dumps({'error': str(e)})}\n\n"
+        return StreamingResponse(stream_response(), media_type="text/event-stream")
+    except Exception as e:
+        logger.error(f"Chat endpoint error: {str(e)}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/upload-image")
+async def upload_image(file: UploadFile = File(...)):
+    if not file.content_type.startswith("image/"):
+        raise HTTPException(status_code=400, detail="File must be an image")
+    try:
+        content = await file.read()
+        extracted_text = ocr_engine.extract_text(content)
+        return {"text": extracted_text}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/cleanup")
+async def cleanup_chats():
+    try:
+        db_manager.cleanup_old_messages()
+        return {"message": "Cleanup successful"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+if __name__ == "__main__":
+    port = int(os.getenv("PORT", 8000))
+    uvicorn.run(app, host="0.0.0.0", port=port)

model_manager.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import os
+from llama_cpp import Llama
+import requests
+from typing import Generator
+class ModelManager:
+    def __init__(self):
+        self.models = {}
+        # Templates for different model architectures
+        self.model_configs = {
+            "tinyllama": {
+                "repo": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
+                "file": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
+                "url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
+                "format": "tinyllama"
+            },
+            "phi": {
+                "repo": "TheBloke/phi-2-GGUF",
+                "file": "phi-2.Q4_K_M.gguf",
+                "url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_K_M.gguf",
+                "format": "phi"
+            },
+            "coder": {
+                "repo": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
+                "file": "qwen2.5-coder-1.5b-instruct-q4_k_m.gguf",
+                "url": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf",
+                "format": "chatml"
+            },
+            "orca": {
+                "repo": "bartowski/Llama-3.2-3B-Instruct-GGUF",
+                "file": "Llama-3.2-3B-Instruct-Q4_K_M.gguf",
+                "url": "https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf",
+                "format": "llama3"
+            },
+            "fast-chat": {
+                "repo": "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
+                "file": "qwen2.5-0.5b-instruct-q4_k_m.gguf",
+                "url": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_k_m.gguf",
+                "format": "chatml"
+            },
+            "mistral": {
+                "repo": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
+                "file": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
+                "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
+                "format": "chatml"
+            },
+            "neural": {
+                "repo": "TheBloke/neural-chat-7B-v3-1-GGUF",
+                "file": "neural-chat-7b-v3-1.Q4_K_M.gguf",
+                "url": "https://huggingface.co/TheBloke/neural-chat-7B-v3-1-GGUF/resolve/main/neural-chat-7b-v3-1.Q4_K_M.gguf",
+                "format": "chatml"
+            },
+            "zephyr": {
+                "repo": "TheBloke/zephyr-7B-beta-GGUF",
+                "file": "zephyr-7b-beta.Q4_K_M.gguf",
+                "url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_M.gguf",
+                "format": "chatml"
+            },
+            "openhermes": {
+                "repo": "TheBloke/OpenHermes-2.5-Mistral-7B-GGUF",
+                "file": "openhermes-2.5-mistral-7b.Q4_K_M.gguf",
+                "url": "https://huggingface.co/TheBloke/OpenHermes-2.5-Mistral-7B-GGUF/resolve/main/openhermes-2.5-mistral-7b.Q4_K_M.gguf",
+                "format": "chatml"
+            },
+            "starling": {
+                "repo": "TheBloke/Starling-LM-7B-alpha-GGUF",
+                "file": "starling-lm-7b-alpha.Q4_K_M.gguf",
+                "url": "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/resolve/main/starling-lm-7b-alpha.Q4_K_M.gguf",
+                "format": "chatml"
+            },
+            "dolphin": {
+                "repo": "TheBloke/dolphin-2.5-mixtral-8x7b-GGUF",
+                "file": "dolphin-2.5-mixtral-8x7b.Q4_K_M.gguf",
+                "url": "https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/resolve/main/dolphin-2.5-mixtral-8x7b.Q4_K_M.gguf",
+                "format": "chatml"
+            }
+        }
+        self.models_dir = os.path.join(os.getcwd(), "models")
+        os.makedirs(self.models_dir, exist_ok=True)
+        # Proactively download all models
+        self.auto_download_all()
+    def auto_download_all(self):
+        print("Starting proactive model download (Auto-Download Phase)...")
+        for model_id in self.model_configs:
+            try:
+                self.download_model(model_id)
+            except Exception as e:
+                print(f"Failed to auto-download {model_id}: {e}")
+    def download_model(self, model_id: str):
+        config = self.model_configs.get(model_id)
+        if not config:
+            raise ValueError(f"Model {model_id} not configured")
+        target_path = os.path.join(self.models_dir, config["file"])
+        # Check if file exists AND has some size
+        if os.path.exists(target_path) and os.path.getsize(target_path) > 50000000: # Min 50MB
+            return target_path
+        print(f"Downloading {model_id} from {config['url']}...")
+        try:
+            # Using a more standard stream download with content-length check if possible
+            response = requests.get(config["url"], stream=True, timeout=60)
+            response.raise_for_status()
+            with open(target_path, "wb") as f:
+                for chunk in response.iter_content(chunk_size=1024*1024): # 1MB chunks
+                    if chunk:
+                        f.write(chunk)
+            print(f"Successfully downloaded {model_id}")
+            return target_path
+        except Exception as e:
+            if os.path.exists(target_path):
+                os.remove(target_path)
+            print(f"Download failed for {model_id}: {e}")
+            raise e
+    def load_model(self, model_id: str):
+        if model_id in self.models:
+            return self.models[model_id]
+        path = self.download_model(model_id)
+        self.models[model_id] = Llama(
+            model_path=path,
+            n_ctx=2048, # Standard context
+            n_threads=4,
+            verbose=False
+        )
+        return self.models[model_id]
+    def format_prompt(self, model_id: str, system: str, history: list, prompt: str):
+        fmt = self.model_configs[model_id]["format"]
+        if fmt == "chatml":
+            full = f"<|im_start|>system\n{system}<|im_end|>\n"
+            for msg in history:
+                role = "user" if msg["role"] == "user" else "assistant"
+                full += f"<|im_start|>{role}\n{msg['content']}<|im_end|>\n"
+            full += f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
+            return full, ["<|im_end|>", "###", "<|im_start|>", "</s>"]
+        elif fmt == "tinyllama":
+            full = f"<|system|>\n{system}</s>\n"
+            for msg in history:
+                role = "user" if msg["role"] == "user" else "assistant"
+                full += f"<|{role}|>\n{msg['content']}</s>\n"
+            full += f"<|user|>\n{prompt}</s>\n<|assistant|>\n"
+            return full, ["</s>", "<|user|>", "<|assistant|>"]
+        elif fmt == "llama3":
+            # Llama 3.2 template
+            full = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system}<|eot_id|>"
+            for msg in history:
+                role = "user" if msg["role"] == "user" else "assistant"
+                full += f"<|start_header_id|>{role}<|end_header_id|>\n\n{msg['content']}<|eot_id|>"
+            full += f"<|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+            return full, ["<|eot_id|>", "<|start_header_id|>", "</s>"]
+        elif fmt == "phi":
+            # Phi-2 optimized prompt
+            full = f"Instruct: {system}\n{prompt}\nOutput:"
+            return full, ["Instruct:", "Output:", "<|endoftext|>", "</s>"]
+        return prompt, ["</s>"]
+        return prompt, ["</s>"]
+    def generate_stream(self, model_id: str, prompt: str, context: list = None, **kwargs) -> Generator[str, None, None]:
+        llm = self.load_model(model_id)
+        system_text = (
+            "You are a highly accurate AI assistant. "
+            "For math, ALWAYS use LaTeX wrapping display equations in [ ] and inline in ( )."
+        )
+        full_prompt, stop_tokens = self.format_prompt(model_id, system_text, context or [], prompt)
+        # Use provided kwargs or defaults
+        params = {
+            "max_tokens": kwargs.get("max_tokens", 2048),
+            "stop": stop_tokens,
+            "stream": True,
+            "temperature": kwargs.get("temperature", 0.7),
+            "top_p": kwargs.get("top_p", 0.95),
+            "repeat_penalty": kwargs.get("repeat_penalty", 1.1)
+        }
+        for output in llm(full_prompt, **params):
+            token = output["choices"][0]["text"]
+            yield token
+model_manager = ModelManager()

ocr_engine.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import pytesseract
+from PIL import Image
+import io
+import os
+class OCREngine:
+    def __init__(self):
+        # On Render, tesseract is usually in /usr/bin/tesseract
+        # On Windows, we use the path provided by the user
+        if os.name == 'nt':
+            pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
+    def extract_text(self, image_content: bytes) -> str:
+        try:
+            image = Image.open(io.BytesIO(image_content))
+            # Basic preprocessing: Resize if too large
+            if image.width > 2000 or image.height > 2000:
+                image.thumbnail((2000, 2000))
+            # Convert to grayscale for better OCR
+            image = image.convert('L')
+            text = pytesseract.image_to_string(image)
+            return text.strip()
+        except Exception as e:
+            print(f"OCR Error: {e}")
+            return f"Error extracting text: {str(e)}"
+ocr_engine = OCREngine()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+uvicorn
+llama-cpp-python
+supabase
+python-multipart
+pytesseract
+pillow
+python-dotenv
+aiohttp