Sandboxed Tool Execution
Step-by-step walkthrough for running LLM tool calls in isolated sandboxes, covering container-based isolation, resource limits, network restrictions, and output sanitization.
When LLMs invoke tools -- executing code, querying databases, calling APIs -- the actions happen in the real world with real consequences. Without sandboxing, a prompt injection that hijacks tool calls can read sensitive files, exfiltrate data, or modify systems. Sandboxed execution isolates tool calls in restricted environments where damage is contained even if the tool call is malicious.
Step 1: Design the Sandbox Architecture
# sandbox/architecture.py
"""
Sandbox architecture for LLM tool execution.
"""
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class SandboxConfig:
max_cpu_seconds: float = 10.0
max_memory_mb: int = 256
max_disk_mb: int = 100
network_enabled: bool = False
allowed_network_hosts: list[str] = field(default_factory=list)
max_output_bytes: int = 10_000
timeout_seconds: float = 30.0
read_only_filesystem: bool = True
allowed_commands: list[str] = field(default_factory=list)
@dataclass
class SandboxResult:
success: bool
output: str
error: str = ""
exit_code: int = 0
execution_time_ms: float = 0.0
resource_usage: dict = field(default_factory=dict)
was_killed: bool = False
kill_reason: str = ""Step 2: Build the Docker-Based Sandbox
# sandbox/docker_sandbox.py
"""
Docker-based sandbox for code execution.
"""
import subprocess
import tempfile
import time
import json
from pathlib import Path
from sandbox.architecture import SandboxConfig, SandboxResult
class DockerSandbox:
def __init__(self, config: SandboxConfig = None):
self.config = config or SandboxConfig()
self.image = "python:3.11-slim"
def execute_code(self, code: str) -> SandboxResult:
start = time.monotonic()
with tempfile.TemporaryDirectory() as tmpdir:
code_path = Path(tmpdir) / "script.py"
code_path.write_text(code)
cmd = [
"docker", "run", "--rm",
f"--memory={self.config.max_memory_mb}m",
f"--cpus=1",
"--network=none" if not self.config.network_enabled else "",
"--read-only" if self.config.read_only_filesystem else "",
"--tmpfs", "/tmp:size=50m",
"-v", f"{code_path}:/sandbox/script.py:ro",
"-w", "/sandbox",
"--user", "nobody",
"--security-opt", "no-new-privileges",
self.image,
"python", "script.py",
]
cmd = [c for c in cmd if c] # Remove empty strings
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=self.config.timeout_seconds,
)
output = result.stdout[:self.config.max_output_bytes]
error = result.stderr[:self.config.max_output_bytes]
return SandboxResult(
success=result.returncode == 0,
output=output,
error=error,
exit_code=result.returncode,
execution_time_ms=(time.monotonic() - start) * 1000,
)
except subprocess.TimeoutExpired:
return SandboxResult(
success=False,
output="",
error="Execution timed out",
was_killed=True,
kill_reason="timeout",
execution_time_ms=(time.monotonic() - start) * 1000,
)Step 3: Implement Code Pre-Screening
# sandbox/pre_screen.py
"""
Pre-screen code before sandbox execution to catch obvious threats.
"""
import re
from dataclasses import dataclass
@dataclass
class ScreenResult:
allowed: bool
reason: str = ""
class CodePreScreener:
BLOCKED_PATTERNS = [
(r"subprocess\.(run|call|Popen|check_output)", "subprocess execution"),
(r"os\.(system|exec|spawn|popen)", "OS command execution"),
(r"shutil\.(rmtree|move)", "filesystem modification"),
(r"socket\.", "network socket access"),
(r"requests\.(get|post|put|delete)", "HTTP requests"),
(r"urllib", "URL access"),
(r"__import__\(", "dynamic import"),
(r"eval\(|exec\(", "dynamic code execution"),
(r"open\(.*(w|a|x).*\)", "file write access"),
(r"/etc/|/proc/|/sys/", "system file access"),
]
def screen(self, code: str) -> ScreenResult:
for pattern, description in self.BLOCKED_PATTERNS:
if re.search(pattern, code):
return ScreenResult(
allowed=False,
reason=f"Blocked pattern: {description}",
)
return ScreenResult(allowed=True)Step 4: Build Output Sanitization
# sandbox/output_sanitizer.py
"""
Sanitize sandbox output before returning to the LLM.
"""
import re
class OutputSanitizer:
SENSITIVE_PATTERNS = [
(r"/home/\w+", "[HOME_DIR]"),
(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", "[IP_ADDR]"),
(r"(?i)(password|secret|token|key)\s*[:=]\s*\S+", "[REDACTED_SECRET]"),
(r"/tmp/tmp[\w]+", "[TEMP_DIR]"),
]
def sanitize(self, output: str) -> str:
result = output
for pattern, replacement in self.SENSITIVE_PATTERNS:
result = re.sub(pattern, replacement, result)
# Truncate long outputs
if len(result) > 5000:
result = result[:5000] + "
... [output truncated]"
return resultStep 5: Integrate with the LLM Tool Pipeline
# sandbox/tool_executor.py
"""
Sandboxed tool execution for LLM function calls.
"""
from sandbox.docker_sandbox import DockerSandbox
from sandbox.pre_screen import CodePreScreener
from sandbox.output_sanitizer import OutputSanitizer
from sandbox.architecture import SandboxConfig
class SandboxedToolExecutor:
def __init__(self, config: SandboxConfig = None):
self.sandbox = DockerSandbox(config)
self.screener = CodePreScreener()
self.sanitizer = OutputSanitizer()
def execute(self, tool_name: str, code: str) -> dict:
# Pre-screen
screen = self.screener.screen(code)
if not screen.allowed:
return {
"success": False,
"output": f"Code blocked: {screen.reason}",
}
# Execute in sandbox
result = self.sandbox.execute_code(code)
# Sanitize output
output = self.sanitizer.sanitize(result.output)
error = self.sanitizer.sanitize(result.error)
return {
"success": result.success,
"output": output,
"error": error,
"execution_time_ms": result.execution_time_ms,
}Step 6: Deploy as a Service
# sandbox/api.py
from fastapi import FastAPI
from pydantic import BaseModel
from sandbox.tool_executor import SandboxedToolExecutor
app = FastAPI(title="Sandboxed Tool Execution")
executor = SandboxedToolExecutor()
class ExecuteRequest(BaseModel):
tool_name: str
code: str
class ExecuteResponse(BaseModel):
success: bool
output: str
error: str
execution_time_ms: float
@app.post("/execute", response_model=ExecuteResponse)
async def execute_tool(request: ExecuteRequest):
result = executor.execute(request.tool_name, request.code)
return ExecuteResponse(**result)uvicorn sandbox.api:app --port 8640Step 7: Test Sandbox Isolation
# tests/test_sandbox.py
import pytest
from sandbox.pre_screen import CodePreScreener
from sandbox.output_sanitizer import OutputSanitizer
def test_safe_code_allowed():
screener = CodePreScreener()
result = screener.screen("print(2 + 2)")
assert result.allowed
def test_subprocess_blocked():
screener = CodePreScreener()
result = screener.screen("import subprocess; subprocess.run(['ls'])")
assert not result.allowed
def test_network_blocked():
screener = CodePreScreener()
result = screener.screen("import requests; requests.get('http://evil.com')")
assert not result.allowed
def test_output_sanitization():
sanitizer = OutputSanitizer()
output = "File at /home/user/data.txt, server at 192.168.1.1"
cleaned = sanitizer.sanitize(output)
assert "/home/user" not in cleaned
assert "192.168.1.1" not in cleanedpytest tests/test_sandbox.py -vRelated Topics
- Capability-Based Access Control -- Permission control for tools
- Tool Permission Scoping -- Least-privilege for agent tools
- Monitoring Agent Tool Calls -- Observability for tool execution
- Agent Action Allowlists -- Restricting permitted actions
Why is --network=none important when sandboxing LLM-generated code execution?