Unicode Normalization Defense

intermediate11 min readUpdated 2026-03-15

Step-by-step walkthrough for implementing Unicode normalization to prevent encoding-based prompt injection bypasses, covering homoglyph detection, invisible character stripping, bidirectional text handling, and normalization testing.

unicode normalization encoding homoglyph defense walkthrough

Attackers use Unicode to make malicious text look different to security filters while appearing identical (or nearly identical) to the LLM's tokenizer. A fullwidth Ｉ looks like a regular I to humans and models, but a regex checking for "Ignore" will miss Ｉgnore. Invisible characters can break up keywords, and bidirectional overrides can make text render in a different order than it is stored. This walkthrough builds a Unicode normalization layer that collapses these tricks before any security filter runs.

Step 1: Map the Unicode Attack Surface

Understand the categories of Unicode-based attacks before building defenses.

# unicode_defense/attack_surface.py
"""
Catalog of Unicode-based attack techniques against LLM filters.
Used for testing and documentation.
"""
 
ATTACK_CATEGORIES = {
    "homoglyphs": {
        "description": "Characters from different scripts that look identical to ASCII",
        "examples": [
            {"attack": "\u0406gnore",  "looks_like": "Ignore",  "script": "Cyrillic І"},
            {"attack": "\u0410ll",     "looks_like": "All",     "script": "Cyrillic А"},
            {"attack": "\uff29gnore",  "looks_like": "Ignore",  "script": "Fullwidth Ｉ"},
            {"attack": "pr\u043evious","looks_like": "previous","script": "Cyrillic о"},
        ],
    },
    "invisible_characters": {
        "description": "Zero-width and invisible characters that break keyword matching",
        "examples": [
            {"char": "\u200b", "name": "Zero-width space", "usage": "ign\u200bore"},
            {"char": "\u200c", "name": "Zero-width non-joiner", "usage": "sys\u200ctem"},
            {"char": "\u200d", "name": "Zero-width joiner", "usage": "prom\u200dpt"},
            {"char": "\ufeff", "name": "BOM / zero-width no-break space", "usage": "\ufeffignore"},
            {"char": "\u00ad", "name": "Soft hyphen", "usage": "in\u00adstructions"},
        ],
    },
    "bidirectional": {
        "description": "Right-to-left overrides that change text rendering order",
        "examples": [
            {"char": "\u202e", "name": "Right-to-left override"},
            {"char": "\u202d", "name": "Left-to-right override"},
            {"char": "\u200f", "name": "Right-to-left mark"},
        ],
    },
    "combining_characters": {
        "description": "Diacritics and combining marks that modify base characters",
        "examples": [
            {"attack": "i\u0307gnore", "looks_like": "ignore with dot above i"},
            {"attack": "syste\u0308m", "looks_like": "system with diaeresis"},
        ],
    },
    "encoding_variants": {
        "description": "Same character in different Unicode forms",
        "examples": [
            {"nfc": "\u00e9", "nfd": "e\u0301", "display": "e with acute"},
            {"nfc": "\u00f1", "nfd": "n\u0303", "display": "n with tilde"},
        ],
    },
}

Step 2: Implement Core Normalization

# unicode_defense/normalizer.py
"""
Unicode normalization engine for LLM input defense.
"""
import unicodedata
import re
from dataclasses import dataclass, field
from typing import Optional
 
@dataclass
class NormalizationResult:
    original: str
    normalized: str
    changes: list[str] = field(default_factory=list)
    risk_indicators: list[str] = field(default_factory=list)
 
class UnicodeNormalizer:
    # Characters that should always be stripped
    STRIP_CATEGORIES = {
        "Cf",  # Format characters (zero-width, bidirectional)
        "Cc",  # Control characters (except common whitespace)
        "Co",  # Private use
        "Cn",  # Unassigned
    }
 
    # Whitespace control characters to preserve
    PRESERVE_CHARS = {
        0x0009,  # Tab
        0x000A,  # Line feed
        0x000D,  # Carriage return
        0x0020,  # Space
    }
 
    def __init__(
        self,
        normalization_form: str = "NFKC",
        strip_invisibles: bool = True,
        strip_bidi: bool = True,
        max_combining_marks: int = 3,
    ):
        self.form = normalization_form
        self.strip_invisibles = strip_invisibles
        self.strip_bidi = strip_bidi
        self.max_combining_marks = max_combining_marks
 
    def normalize(self, text: str) -> NormalizationResult:
        result = NormalizationResult(original=text, normalized=text)
        current = text
 
        # Step 1: Unicode normalization (NFKC)
        normalized = unicodedata.normalize(self.form, current)
        if normalized != current:
            result.changes.append(
                f"Applied {self.form} normalization "
                f"(changed {len(current) - len(normalized)} chars)"
            )
        current = normalized
 
        # Step 2: Strip invisible and control characters
        if self.strip_invisibles:
            current = self._strip_invisibles(current, result)
 
        # Step 3: Handle bidirectional overrides
        if self.strip_bidi:
            current = self._strip_bidi(current, result)
 
        # Step 4: Limit combining marks
        current = self._limit_combining(current, result)
 
        # Step 5: Normalize whitespace
        current = self._normalize_whitespace(current, result)
 
        result.normalized = current
        return result
 
    def _strip_invisibles(
        self, text: str, result: NormalizationResult
    ) -> str:
        stripped = []
        count = 0
        for char in text:
            cp = ord(char)
            cat = unicodedata.category(char)
 
            if cat in self.STRIP_CATEGORIES and cp not in self.PRESERVE_CHARS:
                count += 1
                continue
            stripped.append(char)
 
        if count > 0:
            result.changes.append(f"Stripped {count} invisible characters")
            if count > 5:
                result.risk_indicators.append(
                    f"High invisible character count: {count}"
                )
 
        return "".join(stripped)
 
    def _strip_bidi(
        self, text: str, result: NormalizationResult
    ) -> str:
        bidi_chars = {
            "\u200e", "\u200f",  # LRM, RLM
            "\u202a", "\u202b", "\u202c",  # LRE, RLE, PDF
            "\u202d", "\u202e",  # LRO, RLO
            "\u2066", "\u2067", "\u2068", "\u2069",  # Isolates
        }
 
        cleaned = []
        bidi_count = 0
        for char in text:
            if char in bidi_chars:
                bidi_count += 1
                continue
            cleaned.append(char)
 
        if bidi_count > 0:
            result.changes.append(
                f"Stripped {bidi_count} bidirectional control characters"
            )
            result.risk_indicators.append(
                "Bidirectional overrides detected"
            )
 
        return "".join(cleaned)
 
    def _limit_combining(
        self, text: str, result: NormalizationResult
    ) -> str:
        output = []
        combining_count = 0
 
        for char in text:
            if unicodedata.category(char).startswith("M"):
                combining_count += 1
                if combining_count <= self.max_combining_marks:
                    output.append(char)
            else:
                combining_count = 0
                output.append(char)
 
        cleaned = "".join(output)
        if len(cleaned) != len(text):
            result.changes.append("Excessive combining marks removed")
 
        return cleaned
 
    def _normalize_whitespace(
        self, text: str, result: NormalizationResult
    ) -> str:
        # Collapse multiple spaces
        cleaned = re.sub(r" {3,}", "  ", text)
        # Normalize line endings
        cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
        # Collapse multiple blank lines
        cleaned = re.sub(r"\n{4,}", "\n\n\n", cleaned)
        return cleaned

Step 3: Build a Homoglyph Detector

Detect and replace characters from other scripts that look like ASCII.

# unicode_defense/homoglyph.py
"""
Homoglyph detection and normalization.
Maps confusable characters to their ASCII equivalents.
"""
import unicodedata
from typing import Optional
 
class HomoglyphDetector:
    # Common confusable mappings (script-specific -> ASCII)
    CONFUSABLES = {
        # Cyrillic confusables
        "\u0410": "A", "\u0412": "B", "\u0421": "C",
        "\u0415": "E", "\u041d": "H", "\u041a": "K",
        "\u041c": "M", "\u041e": "O", "\u0420": "P",
        "\u0422": "T", "\u0425": "X",
        "\u0430": "a", "\u0435": "e", "\u043e": "o",
        "\u0440": "p", "\u0441": "c", "\u0443": "y",
        "\u0445": "x", "\u0456": "i",
        # Greek confusables
        "\u0391": "A", "\u0392": "B", "\u0395": "E",
        "\u0397": "H", "\u0399": "I", "\u039a": "K",
        "\u039c": "M", "\u039d": "N", "\u039f": "O",
        "\u03a1": "P", "\u03a4": "T", "\u03a7": "X",
        "\u03b1": "a", "\u03bf": "o",
        # Mathematical symbols
        "\u2133": "M", "\u2134": "o",
        # Fullwidth Latin (handled by NFKC but listed for completeness)
        "\uff21": "A", "\uff22": "B", "\uff23": "C",
    }
 
    def __init__(self, strict: bool = False):
        self.strict = strict
        self._build_lookup()
 
    def _build_lookup(self) -> None:
        self._lookup = dict(self.CONFUSABLES)
 
    def detect_and_replace(self, text: str) -> tuple[str, list[dict]]:
        """Replace confusable characters with ASCII equivalents."""
        replacements = []
        output = []
 
        for i, char in enumerate(text):
            if char in self._lookup:
                replacement = self._lookup[char]
                replacements.append({
                    "position": i,
                    "original": char,
                    "original_name": unicodedata.name(char, "UNKNOWN"),
                    "replacement": replacement,
                })
                output.append(replacement)
            else:
                output.append(char)
 
        return "".join(output), replacements
 
    def has_mixed_scripts(self, text: str) -> bool:
        """Detect if text mixes multiple Unicode scripts suspiciously."""
        scripts = set()
        for char in text:
            if char.isalpha():
                try:
                    script = unicodedata.name(char).split()[0]
                    scripts.add(script)
                except ValueError:
                    pass
 
        # Mixing Latin with Cyrillic or Greek is suspicious
        suspicious_mixes = [
            {"LATIN", "CYRILLIC"},
            {"LATIN", "GREEK"},
        ]
        for mix in suspicious_mixes:
            if mix.issubset(scripts):
                return True
        return False

Step 4: Create the Unified Defense Layer

Combine normalization and homoglyph detection into a single preprocessing step.

# unicode_defense/defense.py
"""
Unified Unicode defense layer for LLM input pipelines.
"""
from dataclasses import dataclass, field
from unicode_defense.normalizer import UnicodeNormalizer
from unicode_defense.homoglyph import HomoglyphDetector
 
@dataclass
class DefenseResult:
    original: str
    cleaned: str
    was_modified: bool
    risk_score: float
    indicators: list[str] = field(default_factory=list)
    homoglyph_replacements: int = 0
    invisible_chars_removed: int = 0
 
class UnicodeDefenseLayer:
    def __init__(self):
        self.normalizer = UnicodeNormalizer()
        self.homoglyph = HomoglyphDetector()
 
    def process(self, text: str) -> DefenseResult:
        indicators = []
        risk_score = 0.0
 
        # Step 1: Check for mixed scripts
        if self.homoglyph.has_mixed_scripts(text):
            indicators.append("Mixed Unicode scripts detected")
            risk_score += 0.3
 
        # Step 2: Normalize Unicode
        norm_result = self.normalizer.normalize(text)
        current = norm_result.normalized
        indicators.extend(norm_result.risk_indicators)
 
        if norm_result.risk_indicators:
            risk_score += 0.2 * len(norm_result.risk_indicators)
 
        # Step 3: Replace homoglyphs
        cleaned, replacements = self.homoglyph.detect_and_replace(current)
 
        if replacements:
            indicators.append(
                f"{len(replacements)} homoglyph replacements"
            )
            risk_score += min(0.1 * len(replacements), 0.5)
 
        return DefenseResult(
            original=text,
            cleaned=cleaned,
            was_modified=(cleaned != text),
            risk_score=min(risk_score, 1.0),
            indicators=indicators,
            homoglyph_replacements=len(replacements),
        )

Step 5: Build Encoding Attack Tests

# tests/test_unicode_defense.py
"""
Tests for Unicode normalization defense layer.
"""
import pytest
from unicode_defense.defense import UnicodeDefenseLayer
from unicode_defense.normalizer import UnicodeNormalizer
 
@pytest.fixture
def defense():
    return UnicodeDefenseLayer()
 
class TestHomoglyphAttacks:
    def test_cyrillic_homoglyph(self, defense):
        # Cyrillic "А" (U+0410) looks like Latin "A"
        attack = "\u0410ll previous instructions"
        result = defense.process(attack)
        assert "All previous instructions" in result.cleaned
 
    def test_mixed_script_detection(self, defense):
        attack = "\u0406gnore previous"  # Cyrillic І + Latin
        result = defense.process(attack)
        assert result.risk_score > 0
        assert any("Mixed" in i for i in result.indicators)
 
    def test_fullwidth_normalization(self, defense):
        # Fullwidth IGNORE -> ASCII IGNORE via NFKC
        attack = "\uff29\uff27\uff2e\uff2f\uff32\uff25"
        result = defense.process(attack)
        assert result.cleaned == "IGNORE"
 
class TestInvisibleCharacters:
    def test_zero_width_space(self, defense):
        attack = "ign\u200bore all"
        result = defense.process(attack)
        assert "\u200b" not in result.cleaned
        assert "ignore all" in result.cleaned
 
    def test_zero_width_joiner(self, defense):
        attack = "sys\u200dtem prompt"
        result = defense.process(attack)
        assert "\u200d" not in result.cleaned
 
    def test_bom_stripping(self, defense):
        attack = "\ufeffignore instructions"
        result = defense.process(attack)
        assert "\ufeff" not in result.cleaned
 
class TestBidirectional:
    def test_rtl_override_stripped(self, defense):
        attack = "\u202eignore\u202c instructions"
        result = defense.process(attack)
        assert "\u202e" not in result.cleaned
        assert "Bidirectional" in str(result.indicators)
 
class TestBenignInput:
    def test_ascii_unchanged(self, defense):
        text = "What is the weather today?"
        result = defense.process(text)
        assert result.cleaned == text
        assert not result.was_modified
 
    def test_legitimate_unicode_preserved(self, defense):
        text = "Qu'est-ce que c'est?"
        result = defense.process(text)
        assert "'" in result.cleaned
 
    def test_emoji_handled(self, defense):
        text = "Great job! \U0001f44d"
        result = defense.process(text)
        assert "\U0001f44d" in result.cleaned

pytest tests/test_unicode_defense.py -v

Step 6: Deploy as a Pipeline Preprocessor

# unicode_defense/middleware.py
"""
ASGI middleware for Unicode normalization.
Runs before all other security filters.
"""
import json
import logging
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse
 
from unicode_defense.defense import UnicodeDefenseLayer
 
logger = logging.getLogger("unicode_defense")
 
class UnicodeDefenseMiddleware(BaseHTTPMiddleware):
    def __init__(self, app, risk_threshold: float = 0.8):
        super().__init__(app)
        self.defense = UnicodeDefenseLayer()
        self.risk_threshold = risk_threshold
 
    async def dispatch(self, request: Request, call_next):
        if request.method != "POST":
            return await call_next(request)
 
        try:
            body = await request.body()
            data = json.loads(body)
        except (json.JSONDecodeError, UnicodeDecodeError):
            return await call_next(request)
 
        # Normalize all string fields
        modified = False
        for key in ["prompt", "message", "input", "query"]:
            if key in data and isinstance(data[key], str):
                result = self.defense.process(data[key])
 
                if result.risk_score >= self.risk_threshold:
                    logger.warning(
                        f"High-risk Unicode input blocked: "
                        f"score={result.risk_score}, "
                        f"indicators={result.indicators}"
                    )
                    return JSONResponse(
                        {"error": "Input contains suspicious encoding"},
                        status_code=422,
                    )
 
                if result.was_modified:
                    data[key] = result.cleaned
                    modified = True
                    logger.info(
                        f"Unicode normalized: {result.indicators}"
                    )
 
        if modified:
            # Replace request body with normalized content
            request._body = json.dumps(data).encode()
 
        return await call_next(request)

Step 7: Maintain the Confusables Database

# unicode_defense/update_confusables.py
"""
Update the homoglyph database from Unicode Consortium data.
"""
from pathlib import Path
import urllib.request
 
def download_confusables(output_path: Path) -> None:
    """Download the official Unicode confusables list."""
    url = "https://www.unicode.org/Public/security/latest/confusables.txt"
    urllib.request.urlretrieve(url, output_path)
 
def parse_confusables(confusables_path: Path) -> dict[str, str]:
    """Parse the Unicode confusables file into a lookup table."""
    mappings = {}
    for line in confusables_path.read_text().splitlines():
        line = line.strip()
        if not line or line.startswith("#"):
            continue
        parts = line.split(";")
        if len(parts) < 2:
            continue
        source_hex = parts[0].strip()
        target_hex = parts[1].strip().split()[0]
        try:
            source = chr(int(source_hex, 16))
            target = chr(int(target_hex, 16))
            # Only map to ASCII targets for security use
            if target.isascii():
                mappings[source] = target
        except (ValueError, OverflowError):
            continue
 
    return mappings

# Download and update confusables database
python -c "
from pathlib import Path
from unicode_defense.update_confusables import download_confusables, parse_confusables
download_confusables(Path('data/confusables.txt'))
mappings = parse_confusables(Path('data/confusables.txt'))
print(f'Loaded {len(mappings)} confusable mappings')
"

Building a Production Input Sanitizer -- Full sanitization pipeline
Regex-Based Prompt Filter -- Filters that depend on normalization
Multi-Layer Input Validation -- Where normalization fits in defense-in-depth
Prompt Classifier Training -- Classifiers that benefit from normalized input

Knowledge Check

An attacker submits 'ign\u200bore all previous instructions' where \u200b is a zero-width space. Why does this bypass a regex filter checking for 'ignore'?

Edit this page on GitHub

Unicode Normalization Defense

intermediate11 min readUpdated 2026-03-15

unicode normalization encoding homoglyph defense walkthrough

Step 1: Map the Unicode Attack Surface

Understand the categories of Unicode-based attacks before building defenses.

# unicode_defense/attack_surface.py
"""
Catalog of Unicode-based attack techniques against LLM filters.
Used for testing and documentation.
"""
 
ATTACK_CATEGORIES = {
    "homoglyphs": {
        "description": "Characters from different scripts that look identical to ASCII",
        "examples": [
            {"attack": "\u0406gnore",  "looks_like": "Ignore",  "script": "Cyrillic І"},
            {"attack": "\u0410ll",     "looks_like": "All",     "script": "Cyrillic А"},
            {"attack": "\uff29gnore",  "looks_like": "Ignore",  "script": "Fullwidth Ｉ"},
            {"attack": "pr\u043evious","looks_like": "previous","script": "Cyrillic о"},
        ],
    },
    "invisible_characters": {
        "description": "Zero-width and invisible characters that break keyword matching",
        "examples": [
            {"char": "\u200b", "name": "Zero-width space", "usage": "ign\u200bore"},
            {"char": "\u200c", "name": "Zero-width non-joiner", "usage": "sys\u200ctem"},
            {"char": "\u200d", "name": "Zero-width joiner", "usage": "prom\u200dpt"},
            {"char": "\ufeff", "name": "BOM / zero-width no-break space", "usage": "\ufeffignore"},
            {"char": "\u00ad", "name": "Soft hyphen", "usage": "in\u00adstructions"},
        ],
    },
    "bidirectional": {
        "description": "Right-to-left overrides that change text rendering order",
        "examples": [
            {"char": "\u202e", "name": "Right-to-left override"},
            {"char": "\u202d", "name": "Left-to-right override"},
            {"char": "\u200f", "name": "Right-to-left mark"},
        ],
    },
    "combining_characters": {
        "description": "Diacritics and combining marks that modify base characters",
        "examples": [
            {"attack": "i\u0307gnore", "looks_like": "ignore with dot above i"},
            {"attack": "syste\u0308m", "looks_like": "system with diaeresis"},
        ],
    },
    "encoding_variants": {
        "description": "Same character in different Unicode forms",
        "examples": [
            {"nfc": "\u00e9", "nfd": "e\u0301", "display": "e with acute"},
            {"nfc": "\u00f1", "nfd": "n\u0303", "display": "n with tilde"},
        ],
    },
}

Step 2: Implement Core Normalization

# unicode_defense/normalizer.py
"""
Unicode normalization engine for LLM input defense.
"""
import unicodedata
import re
from dataclasses import dataclass, field
from typing import Optional
 
@dataclass
class NormalizationResult:
    original: str
    normalized: str
    changes: list[str] = field(default_factory=list)
    risk_indicators: list[str] = field(default_factory=list)
 
class UnicodeNormalizer:
    # Characters that should always be stripped
    STRIP_CATEGORIES = {
        "Cf",  # Format characters (zero-width, bidirectional)
        "Cc",  # Control characters (except common whitespace)
        "Co",  # Private use
        "Cn",  # Unassigned
    }
 
    # Whitespace control characters to preserve
    PRESERVE_CHARS = {
        0x0009,  # Tab
        0x000A,  # Line feed
        0x000D,  # Carriage return
        0x0020,  # Space
    }
 
    def __init__(
        self,
        normalization_form: str = "NFKC",
        strip_invisibles: bool = True,
        strip_bidi: bool = True,
        max_combining_marks: int = 3,
    ):
        self.form = normalization_form
        self.strip_invisibles = strip_invisibles
        self.strip_bidi = strip_bidi
        self.max_combining_marks = max_combining_marks
 
    def normalize(self, text: str) -> NormalizationResult:
        result = NormalizationResult(original=text, normalized=text)
        current = text
 
        # Step 1: Unicode normalization (NFKC)
        normalized = unicodedata.normalize(self.form, current)
        if normalized != current:
            result.changes.append(
                f"Applied {self.form} normalization "
                f"(changed {len(current) - len(normalized)} chars)"
            )
        current = normalized
 
        # Step 2: Strip invisible and control characters
        if self.strip_invisibles:
            current = self._strip_invisibles(current, result)
 
        # Step 3: Handle bidirectional overrides
        if self.strip_bidi:
            current = self._strip_bidi(current, result)
 
        # Step 4: Limit combining marks
        current = self._limit_combining(current, result)
 
        # Step 5: Normalize whitespace
        current = self._normalize_whitespace(current, result)
 
        result.normalized = current
        return result
 
    def _strip_invisibles(
        self, text: str, result: NormalizationResult
    ) -> str:
        stripped = []
        count = 0
        for char in text:
            cp = ord(char)
            cat = unicodedata.category(char)
 
            if cat in self.STRIP_CATEGORIES and cp not in self.PRESERVE_CHARS:
                count += 1
                continue
            stripped.append(char)
 
        if count > 0:
            result.changes.append(f"Stripped {count} invisible characters")
            if count > 5:
                result.risk_indicators.append(
                    f"High invisible character count: {count}"
                )
 
        return "".join(stripped)
 
    def _strip_bidi(
        self, text: str, result: NormalizationResult
    ) -> str:
        bidi_chars = {
            "\u200e", "\u200f",  # LRM, RLM
            "\u202a", "\u202b", "\u202c",  # LRE, RLE, PDF
            "\u202d", "\u202e",  # LRO, RLO
            "\u2066", "\u2067", "\u2068", "\u2069",  # Isolates
        }
 
        cleaned = []
        bidi_count = 0
        for char in text:
            if char in bidi_chars:
                bidi_count += 1
                continue
            cleaned.append(char)
 
        if bidi_count > 0:
            result.changes.append(
                f"Stripped {bidi_count} bidirectional control characters"
            )
            result.risk_indicators.append(
                "Bidirectional overrides detected"
            )
 
        return "".join(cleaned)
 
    def _limit_combining(
        self, text: str, result: NormalizationResult
    ) -> str:
        output = []
        combining_count = 0
 
        for char in text:
            if unicodedata.category(char).startswith("M"):
                combining_count += 1
                if combining_count <= self.max_combining_marks:
                    output.append(char)
            else:
                combining_count = 0
                output.append(char)
 
        cleaned = "".join(output)
        if len(cleaned) != len(text):
            result.changes.append("Excessive combining marks removed")
 
        return cleaned
 
    def _normalize_whitespace(
        self, text: str, result: NormalizationResult
    ) -> str:
        # Collapse multiple spaces
        cleaned = re.sub(r" {3,}", "  ", text)
        # Normalize line endings
        cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
        # Collapse multiple blank lines
        cleaned = re.sub(r"\n{4,}", "\n\n\n", cleaned)
        return cleaned

Step 3: Build a Homoglyph Detector

Detect and replace characters from other scripts that look like ASCII.

# unicode_defense/homoglyph.py
"""
Homoglyph detection and normalization.
Maps confusable characters to their ASCII equivalents.
"""
import unicodedata
from typing import Optional
 
class HomoglyphDetector:
    # Common confusable mappings (script-specific -> ASCII)
    CONFUSABLES = {
        # Cyrillic confusables
        "\u0410": "A", "\u0412": "B", "\u0421": "C",
        "\u0415": "E", "\u041d": "H", "\u041a": "K",
        "\u041c": "M", "\u041e": "O", "\u0420": "P",
        "\u0422": "T", "\u0425": "X",
        "\u0430": "a", "\u0435": "e", "\u043e": "o",
        "\u0440": "p", "\u0441": "c", "\u0443": "y",
        "\u0445": "x", "\u0456": "i",
        # Greek confusables
        "\u0391": "A", "\u0392": "B", "\u0395": "E",
        "\u0397": "H", "\u0399": "I", "\u039a": "K",
        "\u039c": "M", "\u039d": "N", "\u039f": "O",
        "\u03a1": "P", "\u03a4": "T", "\u03a7": "X",
        "\u03b1": "a", "\u03bf": "o",
        # Mathematical symbols
        "\u2133": "M", "\u2134": "o",
        # Fullwidth Latin (handled by NFKC but listed for completeness)
        "\uff21": "A", "\uff22": "B", "\uff23": "C",
    }
 
    def __init__(self, strict: bool = False):
        self.strict = strict
        self._build_lookup()
 
    def _build_lookup(self) -> None:
        self._lookup = dict(self.CONFUSABLES)
 
    def detect_and_replace(self, text: str) -> tuple[str, list[dict]]:
        """Replace confusable characters with ASCII equivalents."""
        replacements = []
        output = []
 
        for i, char in enumerate(text):
            if char in self._lookup:
                replacement = self._lookup[char]
                replacements.append({
                    "position": i,
                    "original": char,
                    "original_name": unicodedata.name(char, "UNKNOWN"),
                    "replacement": replacement,
                })
                output.append(replacement)
            else:
                output.append(char)
 
        return "".join(output), replacements
 
    def has_mixed_scripts(self, text: str) -> bool:
        """Detect if text mixes multiple Unicode scripts suspiciously."""
        scripts = set()
        for char in text:
            if char.isalpha():
                try:
                    script = unicodedata.name(char).split()[0]
                    scripts.add(script)
                except ValueError:
                    pass
 
        # Mixing Latin with Cyrillic or Greek is suspicious
        suspicious_mixes = [
            {"LATIN", "CYRILLIC"},
            {"LATIN", "GREEK"},
        ]
        for mix in suspicious_mixes:
            if mix.issubset(scripts):
                return True
        return False

Step 4: Create the Unified Defense Layer

Combine normalization and homoglyph detection into a single preprocessing step.

# unicode_defense/defense.py
"""
Unified Unicode defense layer for LLM input pipelines.
"""
from dataclasses import dataclass, field
from unicode_defense.normalizer import UnicodeNormalizer
from unicode_defense.homoglyph import HomoglyphDetector
 
@dataclass
class DefenseResult:
    original: str
    cleaned: str
    was_modified: bool
    risk_score: float
    indicators: list[str] = field(default_factory=list)
    homoglyph_replacements: int = 0
    invisible_chars_removed: int = 0
 
class UnicodeDefenseLayer:
    def __init__(self):
        self.normalizer = UnicodeNormalizer()
        self.homoglyph = HomoglyphDetector()
 
    def process(self, text: str) -> DefenseResult:
        indicators = []
        risk_score = 0.0
 
        # Step 1: Check for mixed scripts
        if self.homoglyph.has_mixed_scripts(text):
            indicators.append("Mixed Unicode scripts detected")
            risk_score += 0.3
 
        # Step 2: Normalize Unicode
        norm_result = self.normalizer.normalize(text)
        current = norm_result.normalized
        indicators.extend(norm_result.risk_indicators)
 
        if norm_result.risk_indicators:
            risk_score += 0.2 * len(norm_result.risk_indicators)
 
        # Step 3: Replace homoglyphs
        cleaned, replacements = self.homoglyph.detect_and_replace(current)
 
        if replacements:
            indicators.append(
                f"{len(replacements)} homoglyph replacements"
            )
            risk_score += min(0.1 * len(replacements), 0.5)
 
        return DefenseResult(
            original=text,
            cleaned=cleaned,
            was_modified=(cleaned != text),
            risk_score=min(risk_score, 1.0),
            indicators=indicators,
            homoglyph_replacements=len(replacements),
        )

Step 5: Build Encoding Attack Tests

# tests/test_unicode_defense.py
"""
Tests for Unicode normalization defense layer.
"""
import pytest
from unicode_defense.defense import UnicodeDefenseLayer
from unicode_defense.normalizer import UnicodeNormalizer
 
@pytest.fixture
def defense():
    return UnicodeDefenseLayer()
 
class TestHomoglyphAttacks:
    def test_cyrillic_homoglyph(self, defense):
        # Cyrillic "А" (U+0410) looks like Latin "A"
        attack = "\u0410ll previous instructions"
        result = defense.process(attack)
        assert "All previous instructions" in result.cleaned
 
    def test_mixed_script_detection(self, defense):
        attack = "\u0406gnore previous"  # Cyrillic І + Latin
        result = defense.process(attack)
        assert result.risk_score > 0
        assert any("Mixed" in i for i in result.indicators)
 
    def test_fullwidth_normalization(self, defense):
        # Fullwidth IGNORE -> ASCII IGNORE via NFKC
        attack = "\uff29\uff27\uff2e\uff2f\uff32\uff25"
        result = defense.process(attack)
        assert result.cleaned == "IGNORE"
 
class TestInvisibleCharacters:
    def test_zero_width_space(self, defense):
        attack = "ign\u200bore all"
        result = defense.process(attack)
        assert "\u200b" not in result.cleaned
        assert "ignore all" in result.cleaned
 
    def test_zero_width_joiner(self, defense):
        attack = "sys\u200dtem prompt"
        result = defense.process(attack)
        assert "\u200d" not in result.cleaned
 
    def test_bom_stripping(self, defense):
        attack = "\ufeffignore instructions"
        result = defense.process(attack)
        assert "\ufeff" not in result.cleaned
 
class TestBidirectional:
    def test_rtl_override_stripped(self, defense):
        attack = "\u202eignore\u202c instructions"
        result = defense.process(attack)
        assert "\u202e" not in result.cleaned
        assert "Bidirectional" in str(result.indicators)
 
class TestBenignInput:
    def test_ascii_unchanged(self, defense):
        text = "What is the weather today?"
        result = defense.process(text)
        assert result.cleaned == text
        assert not result.was_modified
 
    def test_legitimate_unicode_preserved(self, defense):
        text = "Qu'est-ce que c'est?"
        result = defense.process(text)
        assert "'" in result.cleaned
 
    def test_emoji_handled(self, defense):
        text = "Great job! \U0001f44d"
        result = defense.process(text)
        assert "\U0001f44d" in result.cleaned

pytest tests/test_unicode_defense.py -v

Step 6: Deploy as a Pipeline Preprocessor

# unicode_defense/middleware.py
"""
ASGI middleware for Unicode normalization.
Runs before all other security filters.
"""
import json
import logging
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse
 
from unicode_defense.defense import UnicodeDefenseLayer
 
logger = logging.getLogger("unicode_defense")
 
class UnicodeDefenseMiddleware(BaseHTTPMiddleware):
    def __init__(self, app, risk_threshold: float = 0.8):
        super().__init__(app)
        self.defense = UnicodeDefenseLayer()
        self.risk_threshold = risk_threshold
 
    async def dispatch(self, request: Request, call_next):
        if request.method != "POST":
            return await call_next(request)
 
        try:
            body = await request.body()
            data = json.loads(body)
        except (json.JSONDecodeError, UnicodeDecodeError):
            return await call_next(request)
 
        # Normalize all string fields
        modified = False
        for key in ["prompt", "message", "input", "query"]:
            if key in data and isinstance(data[key], str):
                result = self.defense.process(data[key])
 
                if result.risk_score >= self.risk_threshold:
                    logger.warning(
                        f"High-risk Unicode input blocked: "
                        f"score={result.risk_score}, "
                        f"indicators={result.indicators}"
                    )
                    return JSONResponse(
                        {"error": "Input contains suspicious encoding"},
                        status_code=422,
                    )
 
                if result.was_modified:
                    data[key] = result.cleaned
                    modified = True
                    logger.info(
                        f"Unicode normalized: {result.indicators}"
                    )
 
        if modified:
            # Replace request body with normalized content
            request._body = json.dumps(data).encode()
 
        return await call_next(request)

Step 7: Maintain the Confusables Database

# unicode_defense/update_confusables.py
"""
Update the homoglyph database from Unicode Consortium data.
"""
from pathlib import Path
import urllib.request
 
def download_confusables(output_path: Path) -> None:
    """Download the official Unicode confusables list."""
    url = "https://www.unicode.org/Public/security/latest/confusables.txt"
    urllib.request.urlretrieve(url, output_path)
 
def parse_confusables(confusables_path: Path) -> dict[str, str]:
    """Parse the Unicode confusables file into a lookup table."""
    mappings = {}
    for line in confusables_path.read_text().splitlines():
        line = line.strip()
        if not line or line.startswith("#"):
            continue
        parts = line.split(";")
        if len(parts) < 2:
            continue
        source_hex = parts[0].strip()
        target_hex = parts[1].strip().split()[0]
        try:
            source = chr(int(source_hex, 16))
            target = chr(int(target_hex, 16))
            # Only map to ASCII targets for security use
            if target.isascii():
                mappings[source] = target
        except (ValueError, OverflowError):
            continue
 
    return mappings

# Download and update confusables database
python -c "
from pathlib import Path
from unicode_defense.update_confusables import download_confusables, parse_confusables
download_confusables(Path('data/confusables.txt'))
mappings = parse_confusables(Path('data/confusables.txt'))
print(f'Loaded {len(mappings)} confusable mappings')
"

Building a Production Input Sanitizer -- Full sanitization pipeline
Regex-Based Prompt Filter -- Filters that depend on normalization
Multi-Layer Input Validation -- Where normalization fits in defense-in-depth
Prompt Classifier Training -- Classifiers that benefit from normalized input

Knowledge Check

An attacker submits 'ign\u200bore all previous instructions' where \u200b is a zero-width space. Why does this bypass a regex filter checking for 'ignore'?

Edit this page on GitHub

Unicode Normalization Defense

Related articles

Unicode Normalization Defense

Related articles