Fellowship/fellowship/llm/client.py

"""
LLM client — the only module that communicates with the LLM backend.
Uses the OpenAI-compatible chat completions API via httpx.
All other modules call this; nothing else touches the LLM directly.
"""

import logging
from typing import AsyncIterator, Optional

import httpx

from fellowship.config import settings

logger = logging.getLogger(__name__)

# Retry config
MAX_RETRIES = 1
RETRY_DELAY = 2.0  # seconds


class LLMClient:
    def __init__(
        self,
        base_url: Optional[str] = None,
        api_key: Optional[str] = None,
    ) -> None:
        self.base_url = (base_url or settings.llm_base_url).rstrip("/")
        self.api_key = api_key or settings.llm_api_key

    async def chat(
        self,
        model: str,
        messages: list[dict],
        temperature: Optional[float] = None,
        max_tokens: Optional[int] = None,
        tools: Optional[list[dict]] = None,
    ) -> dict:
        """
        Send a chat completion request. Returns the full response dict.
        Retries once on failure before raising.
        """
        raise NotImplementedError

    async def chat_stream(
        self,
        model: str,
        messages: list[dict],
        temperature: Optional[float] = None,
        max_tokens: Optional[int] = None,
    ) -> AsyncIterator[str]:
        """
        Send a streaming chat completion request.
        Yields content tokens as they arrive.
        Only used when stream_tokens is enabled.
        """
        raise NotImplementedError

    def _headers(self) -> dict[str, str]:
        return {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }