""" LLM client — the only module that communicates with the LLM backend. Uses the OpenAI-compatible chat completions API via httpx. All other modules call this; nothing else touches the LLM directly. """ import logging from typing import AsyncIterator, Optional import httpx from fellowship.config import settings logger = logging.getLogger(__name__) # Retry config MAX_RETRIES = 1 RETRY_DELAY = 2.0 # seconds class LLMClient: def __init__( self, base_url: Optional[str] = None, api_key: Optional[str] = None, ) -> None: self.base_url = (base_url or settings.llm_base_url).rstrip("/") self.api_key = api_key or settings.llm_api_key async def chat( self, model: str, messages: list[dict], temperature: Optional[float] = None, max_tokens: Optional[int] = None, tools: Optional[list[dict]] = None, ) -> dict: """ Send a chat completion request. Returns the full response dict. Retries once on failure before raising. """ raise NotImplementedError async def chat_stream( self, model: str, messages: list[dict], temperature: Optional[float] = None, max_tokens: Optional[int] = None, ) -> AsyncIterator[str]: """ Send a streaming chat completion request. Yields content tokens as they arrive. Only used when stream_tokens is enabled. """ raise NotImplementedError def _headers(self) -> dict[str, str]: return { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", }