Spaces:

Peterase
/

rag-api-node-1

Running

File size: 2,704 Bytes

a63c61f

import json
import logging
import requests
import aiohttp
from typing import AsyncGenerator

from src.core.ports.llm_port import LlmPort
from src.core.config import settings

logger = logging.getLogger(__name__)

class OllamaAdapter(LlmPort):
    def __init__(self):
        self.host = settings.OLLAMA_HOST.rstrip('/')
        self.model = settings.OLLAMA_MODEL
        self.api_url = f"{self.host}/api/generate"

    def generate(self, prompt: str) -> str:
        print(f"OLLAMA DEBUG: Using model: {self.model}")
        print(f"OLLAMA DEBUG: API URL: {self.api_url}")
        print(f"OLLAMA DEBUG: Prompt length: {len(prompt)} chars")
        
        payload = {
            "model": self.model,
            "prompt": prompt,
            "stream": False
        }
        
        try:
            print(f"OLLAMA DEBUG: Sending request to Ollama...")
            response = requests.post(self.api_url, json=payload, timeout=180)
            print(f"OLLAMA DEBUG: Response status: {response.status_code}")
            response.raise_for_status()
            data = response.json()
            result = data.get("response", "")
            print(f"OLLAMA DEBUG: Generated response length: {len(result)} chars")
            return result
        except requests.exceptions.RequestException as e:
            logger.error(f"Error communicating with Ollama: {e}")
            return f"Error communicating with local LLM: {e}"

    async def generate_stream(self, prompt: str) -> AsyncGenerator[str, None]:
        payload = {
            "model": self.model,
            "prompt": prompt,
            "stream": True
        }
        
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(self.api_url, json=payload) as response:
                    response.raise_for_status()
                    async for line in response.content:
                        if line:
                            try:
                                data = json.loads(line.decode('utf-8'))
                                token = data.get("response", "")
                                # Format as Server-Sent Events (SSE) for the frontend
                                yield f"data: {json.dumps({'token': token})}\n\n"
                                
                                if data.get("done", False):
                                    break
                            except json.JSONDecodeError:
                                pass
        except Exception as e:
            logger.error(f"Streaming error from Ollama: {e}")
            yield f"data: {json.dumps({'token': f'[Error: {e}]'})}\n\n"
            
        yield "data: [DONE]\n\n"