Unicode Normalization Defense
Step-by-step walkthrough for implementing Unicode normalization to prevent encoding-based prompt injection bypasses, covering homoglyph detection, invisible character stripping, bidirectional text handling, and normalization testing.
Attackers use Unicode to make malicious text look different to security filters while appearing identical (or nearly identical) to the LLM's tokenizer. A fullwidth I looks like a regular I to humans and models, but a regex checking for "Ignore" will miss Ignore. Invisible characters can break up keywords, and bidirectional overrides can make text render in a different order than it is stored. This walkthrough builds a Unicode normalization layer that collapses these tricks before any security filter runs.
Step 1: Map the Unicode Attack Surface
Understand the categories of Unicode-based attacks before building defenses.
# unicode_defense/attack_surface.py
"""
Catalog of Unicode-based attack techniques against LLM filters.
Used for testing and documentation.
"""
ATTACK_CATEGORIES = {
"homoglyphs": {
"description": "Characters from different scripts that look identical to ASCII",
"examples": [
{"attack": "\u0406gnore", "looks_like": "Ignore", "script": "Cyrillic І"},
{"attack": "\u0410ll", "looks_like": "All", "script": "Cyrillic А"},
{"attack": "\uff29gnore", "looks_like": "Ignore", "script": "Fullwidth I"},
{"attack": "pr\u043evious","looks_like": "previous","script": "Cyrillic о"},
],
},
"invisible_characters": {
"description": "Zero-width and invisible characters that break keyword matching",
"examples": [
{"char": "\u200b", "name": "Zero-width space", "usage": "ign\u200bore"},
{"char": "\u200c", "name": "Zero-width non-joiner", "usage": "sys\u200ctem"},
{"char": "\u200d", "name": "Zero-width joiner", "usage": "prom\u200dpt"},
{"char": "\ufeff", "name": "BOM / zero-width no-break space", "usage": "\ufeffignore"},
{"char": "\u00ad", "name": "Soft hyphen", "usage": "in\u00adstructions"},
],
},
"bidirectional": {
"description": "Right-to-left overrides that change text rendering order",
"examples": [
{"char": "\u202e", "name": "Right-to-left override"},
{"char": "\u202d", "name": "Left-to-right override"},
{"char": "\u200f", "name": "Right-to-left mark"},
],
},
"combining_characters": {
"description": "Diacritics and combining marks that modify base characters",
"examples": [
{"attack": "i\u0307gnore", "looks_like": "ignore with dot above i"},
{"attack": "syste\u0308m", "looks_like": "system with diaeresis"},
],
},
"encoding_variants": {
"description": "Same character in different Unicode forms",
"examples": [
{"nfc": "\u00e9", "nfd": "e\u0301", "display": "e with acute"},
{"nfc": "\u00f1", "nfd": "n\u0303", "display": "n with tilde"},
],
},
}Step 2: Implement Core Normalization
# unicode_defense/normalizer.py
"""
Unicode normalization engine for LLM input defense.
"""
import unicodedata
import re
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class NormalizationResult:
original: str
normalized: str
changes: list[str] = field(default_factory=list)
risk_indicators: list[str] = field(default_factory=list)
class UnicodeNormalizer:
# Characters that should always be stripped
STRIP_CATEGORIES = {
"Cf", # Format characters (zero-width, bidirectional)
"Cc", # Control characters (except common whitespace)
"Co", # Private use
"Cn", # Unassigned
}
# Whitespace control characters to preserve
PRESERVE_CHARS = {
0x0009, # Tab
0x000A, # Line feed
0x000D, # Carriage return
0x0020, # Space
}
def __init__(
self,
normalization_form: str = "NFKC",
strip_invisibles: bool = True,
strip_bidi: bool = True,
max_combining_marks: int = 3,
):
self.form = normalization_form
self.strip_invisibles = strip_invisibles
self.strip_bidi = strip_bidi
self.max_combining_marks = max_combining_marks
def normalize(self, text: str) -> NormalizationResult:
result = NormalizationResult(original=text, normalized=text)
current = text
# Step 1: Unicode normalization (NFKC)
normalized = unicodedata.normalize(self.form, current)
if normalized != current:
result.changes.append(
f"Applied {self.form} normalization "
f"(changed {len(current) - len(normalized)} chars)"
)
current = normalized
# Step 2: Strip invisible and control characters
if self.strip_invisibles:
current = self._strip_invisibles(current, result)
# Step 3: Handle bidirectional overrides
if self.strip_bidi:
current = self._strip_bidi(current, result)
# Step 4: Limit combining marks
current = self._limit_combining(current, result)
# Step 5: Normalize whitespace
current = self._normalize_whitespace(current, result)
result.normalized = current
return result
def _strip_invisibles(
self, text: str, result: NormalizationResult
) -> str:
stripped = []
count = 0
for char in text:
cp = ord(char)
cat = unicodedata.category(char)
if cat in self.STRIP_CATEGORIES and cp not in self.PRESERVE_CHARS:
count += 1
continue
stripped.append(char)
if count > 0:
result.changes.append(f"Stripped {count} invisible characters")
if count > 5:
result.risk_indicators.append(
f"High invisible character count: {count}"
)
return "".join(stripped)
def _strip_bidi(
self, text: str, result: NormalizationResult
) -> str:
bidi_chars = {
"\u200e", "\u200f", # LRM, RLM
"\u202a", "\u202b", "\u202c", # LRE, RLE, PDF
"\u202d", "\u202e", # LRO, RLO
"\u2066", "\u2067", "\u2068", "\u2069", # Isolates
}
cleaned = []
bidi_count = 0
for char in text:
if char in bidi_chars:
bidi_count += 1
continue
cleaned.append(char)
if bidi_count > 0:
result.changes.append(
f"Stripped {bidi_count} bidirectional control characters"
)
result.risk_indicators.append(
"Bidirectional overrides detected"
)
return "".join(cleaned)
def _limit_combining(
self, text: str, result: NormalizationResult
) -> str:
output = []
combining_count = 0
for char in text:
if unicodedata.category(char).startswith("M"):
combining_count += 1
if combining_count <= self.max_combining_marks:
output.append(char)
else:
combining_count = 0
output.append(char)
cleaned = "".join(output)
if len(cleaned) != len(text):
result.changes.append("Excessive combining marks removed")
return cleaned
def _normalize_whitespace(
self, text: str, result: NormalizationResult
) -> str:
# Collapse multiple spaces
cleaned = re.sub(r" {3,}", " ", text)
# Normalize line endings
cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
# Collapse multiple blank lines
cleaned = re.sub(r"\n{4,}", "\n\n\n", cleaned)
return cleanedStep 3: Build a Homoglyph Detector
Detect and replace characters from other scripts that look like ASCII.
# unicode_defense/homoglyph.py
"""
Homoglyph detection and normalization.
Maps confusable characters to their ASCII equivalents.
"""
import unicodedata
from typing import Optional
class HomoglyphDetector:
# Common confusable mappings (script-specific -> ASCII)
CONFUSABLES = {
# Cyrillic confusables
"\u0410": "A", "\u0412": "B", "\u0421": "C",
"\u0415": "E", "\u041d": "H", "\u041a": "K",
"\u041c": "M", "\u041e": "O", "\u0420": "P",
"\u0422": "T", "\u0425": "X",
"\u0430": "a", "\u0435": "e", "\u043e": "o",
"\u0440": "p", "\u0441": "c", "\u0443": "y",
"\u0445": "x", "\u0456": "i",
# Greek confusables
"\u0391": "A", "\u0392": "B", "\u0395": "E",
"\u0397": "H", "\u0399": "I", "\u039a": "K",
"\u039c": "M", "\u039d": "N", "\u039f": "O",
"\u03a1": "P", "\u03a4": "T", "\u03a7": "X",
"\u03b1": "a", "\u03bf": "o",
# Mathematical symbols
"\u2133": "M", "\u2134": "o",
# Fullwidth Latin (handled by NFKC but listed for completeness)
"\uff21": "A", "\uff22": "B", "\uff23": "C",
}
def __init__(self, strict: bool = False):
self.strict = strict
self._build_lookup()
def _build_lookup(self) -> None:
self._lookup = dict(self.CONFUSABLES)
def detect_and_replace(self, text: str) -> tuple[str, list[dict]]:
"""Replace confusable characters with ASCII equivalents."""
replacements = []
output = []
for i, char in enumerate(text):
if char in self._lookup:
replacement = self._lookup[char]
replacements.append({
"position": i,
"original": char,
"original_name": unicodedata.name(char, "UNKNOWN"),
"replacement": replacement,
})
output.append(replacement)
else:
output.append(char)
return "".join(output), replacements
def has_mixed_scripts(self, text: str) -> bool:
"""Detect if text mixes multiple Unicode scripts suspiciously."""
scripts = set()
for char in text:
if char.isalpha():
try:
script = unicodedata.name(char).split()[0]
scripts.add(script)
except ValueError:
pass
# Mixing Latin with Cyrillic or Greek is suspicious
suspicious_mixes = [
{"LATIN", "CYRILLIC"},
{"LATIN", "GREEK"},
]
for mix in suspicious_mixes:
if mix.issubset(scripts):
return True
return FalseStep 4: Create the Unified Defense Layer
Combine normalization and homoglyph detection into a single preprocessing step.
# unicode_defense/defense.py
"""
Unified Unicode defense layer for LLM input pipelines.
"""
from dataclasses import dataclass, field
from unicode_defense.normalizer import UnicodeNormalizer
from unicode_defense.homoglyph import HomoglyphDetector
@dataclass
class DefenseResult:
original: str
cleaned: str
was_modified: bool
risk_score: float
indicators: list[str] = field(default_factory=list)
homoglyph_replacements: int = 0
invisible_chars_removed: int = 0
class UnicodeDefenseLayer:
def __init__(self):
self.normalizer = UnicodeNormalizer()
self.homoglyph = HomoglyphDetector()
def process(self, text: str) -> DefenseResult:
indicators = []
risk_score = 0.0
# Step 1: Check for mixed scripts
if self.homoglyph.has_mixed_scripts(text):
indicators.append("Mixed Unicode scripts detected")
risk_score += 0.3
# Step 2: Normalize Unicode
norm_result = self.normalizer.normalize(text)
current = norm_result.normalized
indicators.extend(norm_result.risk_indicators)
if norm_result.risk_indicators:
risk_score += 0.2 * len(norm_result.risk_indicators)
# Step 3: Replace homoglyphs
cleaned, replacements = self.homoglyph.detect_and_replace(current)
if replacements:
indicators.append(
f"{len(replacements)} homoglyph replacements"
)
risk_score += min(0.1 * len(replacements), 0.5)
return DefenseResult(
original=text,
cleaned=cleaned,
was_modified=(cleaned != text),
risk_score=min(risk_score, 1.0),
indicators=indicators,
homoglyph_replacements=len(replacements),
)Step 5: Build Encoding Attack Tests
# tests/test_unicode_defense.py
"""
Tests for Unicode normalization defense layer.
"""
import pytest
from unicode_defense.defense import UnicodeDefenseLayer
from unicode_defense.normalizer import UnicodeNormalizer
@pytest.fixture
def defense():
return UnicodeDefenseLayer()
class TestHomoglyphAttacks:
def test_cyrillic_homoglyph(self, defense):
# Cyrillic "А" (U+0410) looks like Latin "A"
attack = "\u0410ll previous instructions"
result = defense.process(attack)
assert "All previous instructions" in result.cleaned
def test_mixed_script_detection(self, defense):
attack = "\u0406gnore previous" # Cyrillic І + Latin
result = defense.process(attack)
assert result.risk_score > 0
assert any("Mixed" in i for i in result.indicators)
def test_fullwidth_normalization(self, defense):
# Fullwidth IGNORE -> ASCII IGNORE via NFKC
attack = "\uff29\uff27\uff2e\uff2f\uff32\uff25"
result = defense.process(attack)
assert result.cleaned == "IGNORE"
class TestInvisibleCharacters:
def test_zero_width_space(self, defense):
attack = "ign\u200bore all"
result = defense.process(attack)
assert "\u200b" not in result.cleaned
assert "ignore all" in result.cleaned
def test_zero_width_joiner(self, defense):
attack = "sys\u200dtem prompt"
result = defense.process(attack)
assert "\u200d" not in result.cleaned
def test_bom_stripping(self, defense):
attack = "\ufeffignore instructions"
result = defense.process(attack)
assert "\ufeff" not in result.cleaned
class TestBidirectional:
def test_rtl_override_stripped(self, defense):
attack = "\u202eignore\u202c instructions"
result = defense.process(attack)
assert "\u202e" not in result.cleaned
assert "Bidirectional" in str(result.indicators)
class TestBenignInput:
def test_ascii_unchanged(self, defense):
text = "What is the weather today?"
result = defense.process(text)
assert result.cleaned == text
assert not result.was_modified
def test_legitimate_unicode_preserved(self, defense):
text = "Qu'est-ce que c'est?"
result = defense.process(text)
assert "'" in result.cleaned
def test_emoji_handled(self, defense):
text = "Great job! \U0001f44d"
result = defense.process(text)
assert "\U0001f44d" in result.cleanedpytest tests/test_unicode_defense.py -vStep 6: Deploy as a Pipeline Preprocessor
# unicode_defense/middleware.py
"""
ASGI middleware for Unicode normalization.
Runs before all other security filters.
"""
import json
import logging
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse
from unicode_defense.defense import UnicodeDefenseLayer
logger = logging.getLogger("unicode_defense")
class UnicodeDefenseMiddleware(BaseHTTPMiddleware):
def __init__(self, app, risk_threshold: float = 0.8):
super().__init__(app)
self.defense = UnicodeDefenseLayer()
self.risk_threshold = risk_threshold
async def dispatch(self, request: Request, call_next):
if request.method != "POST":
return await call_next(request)
try:
body = await request.body()
data = json.loads(body)
except (json.JSONDecodeError, UnicodeDecodeError):
return await call_next(request)
# Normalize all string fields
modified = False
for key in ["prompt", "message", "input", "query"]:
if key in data and isinstance(data[key], str):
result = self.defense.process(data[key])
if result.risk_score >= self.risk_threshold:
logger.warning(
f"High-risk Unicode input blocked: "
f"score={result.risk_score}, "
f"indicators={result.indicators}"
)
return JSONResponse(
{"error": "Input contains suspicious encoding"},
status_code=422,
)
if result.was_modified:
data[key] = result.cleaned
modified = True
logger.info(
f"Unicode normalized: {result.indicators}"
)
if modified:
# Replace request body with normalized content
request._body = json.dumps(data).encode()
return await call_next(request)Step 7: Maintain the Confusables Database
# unicode_defense/update_confusables.py
"""
Update the homoglyph database from Unicode Consortium data.
"""
from pathlib import Path
import urllib.request
def download_confusables(output_path: Path) -> None:
"""Download the official Unicode confusables list."""
url = "https://www.unicode.org/Public/security/latest/confusables.txt"
urllib.request.urlretrieve(url, output_path)
def parse_confusables(confusables_path: Path) -> dict[str, str]:
"""Parse the Unicode confusables file into a lookup table."""
mappings = {}
for line in confusables_path.read_text().splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
parts = line.split(";")
if len(parts) < 2:
continue
source_hex = parts[0].strip()
target_hex = parts[1].strip().split()[0]
try:
source = chr(int(source_hex, 16))
target = chr(int(target_hex, 16))
# Only map to ASCII targets for security use
if target.isascii():
mappings[source] = target
except (ValueError, OverflowError):
continue
return mappings# Download and update confusables database
python -c "
from pathlib import Path
from unicode_defense.update_confusables import download_confusables, parse_confusables
download_confusables(Path('data/confusables.txt'))
mappings = parse_confusables(Path('data/confusables.txt'))
print(f'Loaded {len(mappings)} confusable mappings')
"Related Topics
- Building a Production Input Sanitizer -- Full sanitization pipeline
- Regex-Based Prompt Filter -- Filters that depend on normalization
- Multi-Layer Input Validation -- Where normalization fits in defense-in-depth
- Prompt Classifier Training -- Classifiers that benefit from normalized input
An attacker submits 'ign\u200bore all previous instructions' where \u200b is a zero-width space. Why does this bypass a regex filter checking for 'ignore'?