Import: from gaia.llm.llm_client import LLMClient
Detailed Spec: spec/llm-client
Purpose: Unified client for local and cloud LLM providers.
LLM Client
from gaia.llm.llm_client import LLMClient
# Local LLM (Lemonade server)
llm = LLMClient(
base_url="http://localhost:8000/api/v1",
system_prompt="You are a helpful assistant"
)
# Generate response
response = llm.generate(
prompt="What is AI?",
model="Qwen2.5-0.5B-Instruct-CPU",
max_tokens=200,
temperature=0.7
)
print(response)
# Streaming response
for chunk in llm.generate(prompt="Tell me a story", stream=True):
print(chunk, end="", flush=True)
# Chat completions format
messages = [
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Hi! How can I help?"},
{"role": "user", "content": "Tell me about Python"}
]
response = llm.chat_completions(
messages=messages,
model="Qwen2.5-0.5B-Instruct-CPU"
)
print(response)
# List available models
models = llm.get_available_models()
for model in models:
print(f"- {model}")
Cloud Providers
from gaia.llm.llm_client import LLMClient
# Claude API
llm_claude = LLMClient(
use_claude=True,
claude_model="claude-sonnet-4-20250514",
system_prompt="You are an expert coder"
)
# OpenAI API
llm_openai = LLMClient(
use_openai=True,
system_prompt="You are a helpful assistant"
)
# Use same interface
response = llm_claude.generate("Explain Python decorators")
Lemonade Client (AMD-Optimized)
Import: from gaia.llm.lemonade_client import DEFAULT_MODEL_NAME, DEFAULT_LEMONADE_URL
from gaia.llm.lemonade_client import DEFAULT_MODEL_NAME, DEFAULT_LEMONADE_URL
# Default configuration
MODEL = DEFAULT_MODEL_NAME # "Qwen2.5-0.5B-Instruct-CPU"
URL = DEFAULT_LEMONADE_URL # "http://localhost:8000/api/v1"
# Used internally by LLMClient when no provider specified