Documentation Index
Fetch the complete documentation index at: https://docs.muxx.dev/llms.txt
Use this file to discover all available pages before exploring further.
This guide covers everything you need to integrate Muxx with your OpenAI-powered application.
Choose Your Integration
Option 1: Gateway Only (Quickest)
Just change your base URL:
from openai import OpenAI
client = OpenAI(
base_url="https://gateway.muxx.dev/v1",
default_headers={
"X-Muxx-Api-Key": "muxx_sk_live_xxxxxxxxxxxx"
}
)
# Use normally - all requests are logged
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}]
)
Pros: Zero code changes, caching, rate limiting
Cons: Request-level logging only
Option 2: SDK Only (Deep Tracing)
Wrap your client:
from muxx import Muxx
from openai import OpenAI
muxx = Muxx()
client = muxx.wrap(OpenAI())
# Use normally - all requests are traced
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}]
)
Pros: Trace/span hierarchy, decorators, async batching
Cons: No caching or rate limiting
Option 3: SDK + Gateway (Full Power)
Combine both:
from muxx import Muxx
from openai import OpenAI
muxx = Muxx()
client = muxx.wrap(OpenAI(
base_url="https://gateway.muxx.dev/v1",
default_headers={"X-Muxx-Api-Key": muxx.api_key}
))
Pros: Everything
Cons: Slightly more setup
Common Patterns
Chat Application
from muxx import Muxx, trace
from openai import OpenAI
muxx = Muxx()
client = muxx.wrap(OpenAI())
@trace("chat-turn")
def handle_message(user_id: str, message: str, history: list) -> str:
messages = history + [{"role": "user", "content": message}]
response = client.chat.completions.create(
model="gpt-4o",
messages=messages
)
return response.choices[0].message.content
RAG Application
from muxx import Muxx, trace, span
from openai import OpenAI
muxx = Muxx()
client = muxx.wrap(OpenAI())
@trace("rag-query")
def answer_question(question: str) -> str:
# Get embedding
with muxx.span("embed-query"):
embedding = client.embeddings.create(
model="text-embedding-3-small",
input=question
)
# Search (your vector DB)
with muxx.span("search"):
documents = search_similar(embedding.data[0].embedding)
# Generate answer
with muxx.span("generate"):
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": f"Context: {documents}"},
{"role": "user", "content": question}
]
)
return response.choices[0].message.content
Function Calling
@trace("function-call")
def handle_with_tools(message: str) -> str:
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"parameters": {"type": "object", "properties": {...}}
}
}
]
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": message}],
tools=tools
)
# Tool calls are logged automatically
if response.choices[0].message.tool_calls:
# Handle tool calls
pass
return response.choices[0].message.content
Streaming
Both gateway and SDK support streaming:
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Write a story"}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
# Complete response is logged when stream ends
Error Handling
from openai import OpenAIError, RateLimitError
@trace("api-call")
def safe_call(message: str) -> str:
try:
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": message}]
)
return response.choices[0].message.content
except RateLimitError:
# Logged as error in trace
raise
except OpenAIError as e:
# Logged with error details
raise
Cost Optimization
- Use gpt-4o-mini for simple tasks (20x cheaper)
- Enable caching for repeated queries
- Set max_tokens to limit output length
- Monitor in dashboard to identify expensive patterns