Scaffold all modules, route stubs, data models, and config. No logic implemented yet — all core methods raise NotImplementedError. Establishes the full directory layout matching the architecture in CLAUDE.md. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
63 lines
1.6 KiB
Python
63 lines
1.6 KiB
Python
"""
|
|
LLM client — the only module that communicates with the LLM backend.
|
|
Uses the OpenAI-compatible chat completions API via httpx.
|
|
All other modules call this; nothing else touches the LLM directly.
|
|
"""
|
|
|
|
import logging
|
|
from typing import AsyncIterator, Optional
|
|
|
|
import httpx
|
|
|
|
from fellowship.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Retry config
|
|
MAX_RETRIES = 1
|
|
RETRY_DELAY = 2.0 # seconds
|
|
|
|
|
|
class LLMClient:
|
|
def __init__(
|
|
self,
|
|
base_url: Optional[str] = None,
|
|
api_key: Optional[str] = None,
|
|
) -> None:
|
|
self.base_url = (base_url or settings.llm_base_url).rstrip("/")
|
|
self.api_key = api_key or settings.llm_api_key
|
|
|
|
async def chat(
|
|
self,
|
|
model: str,
|
|
messages: list[dict],
|
|
temperature: Optional[float] = None,
|
|
max_tokens: Optional[int] = None,
|
|
tools: Optional[list[dict]] = None,
|
|
) -> dict:
|
|
"""
|
|
Send a chat completion request. Returns the full response dict.
|
|
Retries once on failure before raising.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
async def chat_stream(
|
|
self,
|
|
model: str,
|
|
messages: list[dict],
|
|
temperature: Optional[float] = None,
|
|
max_tokens: Optional[int] = None,
|
|
) -> AsyncIterator[str]:
|
|
"""
|
|
Send a streaming chat completion request.
|
|
Yields content tokens as they arrive.
|
|
Only used when stream_tokens is enabled.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def _headers(self) -> dict[str, str]:
|
|
return {
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"Content-Type": "application/json",
|
|
}
|