#!/usr/bin/env python3
"""
check_shortcuts.py
──────────────────
Recursively scans the current working directory for files that contain the four
shortcut lines

    <ctrl>T
    <ctrl>minus
    <ctrl>plus
    <ctrl>B

( each line must start with an actual TAB character, *not* spaces ) and checks
whether

 • the translations for <ctrl>T  and <ctrl>minus   are *identical*
 • the translations for <ctrl>plus and <ctrl>B      are *identical*

If either pair is identical, the script prints a short report naming the file
and the offending pair.

Usage
-----

    $ python3 check_shortcuts.py             # scan cwd
    $ python3 check_shortcuts.py path/to/dir # scan a specific directory
"""
from __future__ import annotations

import argparse
import os
import re
import sys
from pathlib import Path

# ──────────────────────────────────────────────────────────────────────────────
# 1. Command–line arguments
# ──────────────────────────────────────────────────────────────────────────────
parser = argparse.ArgumentParser(description="Detect identical translations in CTRL shortcut lines.")
parser.add_argument(
    "root",
    nargs="?",
    default=".",
    help="Directory to scan (default: current working directory)",
)
args = parser.parse_args()
root: Path = Path(args.root).resolve()

# ──────────────────────────────────────────────────────────────────────────────
# 2. Pre-compile the regular expressions
#    “translation” is whatever follows the first ‘|’ (or just whitespace) until EOL
# ──────────────────────────────────────────────────────────────────────────────
TOKEN_PATTERNS: dict[str, re.Pattern[str]] = {
    "<ctrl>T": re.compile(r"^\t<ctrl>T\s*(?:\|\s*)?(.*)$"),
    "<ctrl>minus": re.compile(r"^\t<ctrl>minus\s*(?:\|\s*)?(.*)$"),
    "<ctrl>plus": re.compile(r"^\t<ctrl>plus\s*(?:\|\s*)?(.*)$"),
    "<ctrl>B": re.compile(r"^\t<ctrl>B\s*(?:\|\s*)?(.*)$"),
}

# ──────────────────────────────────────────────────────────────────────────────
# 3. Helpers
# ──────────────────────────────────────────────────────────────────────────────
def collect_translations(path: Path) -> dict[str, str]:
    """
    Return a dict {token -> translation} for every shortcut line that occurs
    in *path*.  If a line appears multiple times the *first* occurrence wins.
    """
    found: dict[str, str] = {}
    try:
        with path.open(encoding="utf-8", errors="ignore") as fh:
            for line in fh:
                for token, pat in TOKEN_PATTERNS.items():
                    if token not in found:  # stop as soon as we have it
                        m = pat.match(line)
                        if m:
                            found[token] = m.group(1).strip()
                if len(found) == 4:
                    return found
    except (UnicodeDecodeError, OSError):
        # skip unreadable / binary files
        pass
    return found


def report(path: Path, dup_T_minus: bool, dup_plus_B: bool, trans: dict[str, str]) -> None:
    print(f"\n{path}")
    if dup_T_minus:
        print(f"  • identical translations for <ctrl>T & <ctrl>minus : “{trans['<ctrl>T']}”")
    if dup_plus_B:
        print(f"  • identical translations for <ctrl>plus & <ctrl>B   : “{trans['<ctrl>plus']}”")


# ──────────────────────────────────────────────────────────────────────────────
# 4. Walk the tree
# ──────────────────────────────────────────────────────────────────────────────
duplicate_count = 0

for dirpath, _dirnames, filenames in os.walk(root):
    for name in filenames:
        file_path = Path(dirpath, name)
        translations = collect_translations(file_path)

        # we need at least the two members of each pair to be able to compare
        if {"<ctrl>T", "<ctrl>minus"}.issubset(translations) or {
            "<ctrl>plus",
            "<ctrl>B",
        }.issubset(translations):
            dup_pair_1 = translations.get("<ctrl>T") == translations.get("<ctrl>minus")
            dup_pair_2 = translations.get("<ctrl>plus") == translations.get("<ctrl>B")

            if dup_pair_1 or dup_pair_2:
                duplicate_count += 1
                report(file_path, dup_pair_1, dup_pair_2, translations)

# ──────────────────────────────────────────────────────────────────────────────
# 5. Final summary
# ──────────────────────────────────────────────────────────────────────────────
if duplicate_count == 0:
    print("√ No identical shortcut translations found.")
else:
    print(f"\nFound {duplicate_count} file(s) with identical shortcut translations.")
