refactor(tree-sitter): centralize grammar config & hacks

Easier to track and maintain.
This commit is contained in:
Henrik Lissner
2025-08-27 00:35:39 +02:00
parent 6009c2b838
commit 3b58741522
18 changed files with 85 additions and 142 deletions

View File

@@ -30,10 +30,8 @@ This is ignored by ccls.")
:hook (c-mode-common . rainbow-delimiters-mode)
:init
(when (modulep! +tree-sitter)
(set-tree-sitter! 'c-mode 'c-ts-mode
'((c :url "https://github.com/tree-sitter/tree-sitter-c")))
(set-tree-sitter! 'c++-mode 'c++-ts-mode
'((cpp :url "https://github.com/tree-sitter/tree-sitter-cpp"))))
(set-tree-sitter! 'c-mode 'c-ts-mode 'c)
(set-tree-sitter! 'c++-mode 'c++-ts-mode 'cpp))
:config
(set-docsets! '(c-mode c-ts-mode) "C")
@@ -116,8 +114,7 @@ This is ignored by ccls.")
:init
(when (and (modulep! +tree-sitter)
(boundp 'cmake-ts-mode)) ; 29+ only
(set-tree-sitter! 'cmake-mode 'cmake-ts-mode
'((cmake :url "https://github.com/uyha/tree-sitter-cmake"))))
(set-tree-sitter! 'cmake-mode 'cmake-ts-mode 'cmake))
:config
(set-docsets! '(cmake-mode cmake-ts-mode) "CMake")
(set-popup-rule! "^\\*CMake Help\\*" :size 0.4 :ttl t)
@@ -160,12 +157,7 @@ This is ignored by ccls.")
'((cuda :url "https://github.com/tree-sitter-grammars/tree-sitter-cuda")))
:config
(when (modulep! +lsp)
(add-hook 'cuda-ts-mode-local-vars-hook #'lsp! 'append))
;; HACK: Remove redundant entries so we can rely solely on
;; `major-mode-remap-defaults' et co.
(rassq-delete-all 'cuda-ts-mode auto-mode-alist)
(cl-callf2 delete '(cuda "https://github.com/tree-sitter-grammars/tree-sitter-cuda" nil nil nil nil)
treesit-language-source-alist))
(add-hook 'cuda-ts-mode-local-vars-hook #'lsp! 'append)))
(use-package! demangle-mode

View File

@@ -62,7 +62,6 @@
;; HACK: Rely on `major-mode-remap-defaults' instead (upstream also doesn't
;; check if the grammars are ready before adding these entries, which will
;; bork clojure buffers.
(cl-callf2 rassq-delete-all 'clojure-ts-mode auto-mode-alist)
(cl-callf2 rassq-delete-all 'clojure-ts-clojurescript-mode auto-mode-alist)
(cl-callf2 rassq-delete-all 'clojure-ts-clojurec-mode auto-mode-alist)
(cl-callf2 rassq-delete-all 'clojure-ts-clojuredart-mode auto-mode-alist)

View File

@@ -2,6 +2,9 @@
(use-package! csharp-mode
:hook (csharp-mode . rainbow-delimiters-mode)
:init
(when (modulep! +tree-sitter)
(set-tree-sitter! 'csharp-mode 'csharp-ts-mode 'c-sharp))
:config
(set-formatter! 'csharpier '("csharpier" "format" "--write-stdout")
:modes '(csharp-mode csharp-ts-mode))
@@ -38,26 +41,12 @@
(add-hook 'csharp-mode-local-vars-hook #'lsp! 'append)
(add-hook 'csharp-ts-mode-local-vars-hook #'lsp! 'append))
(when (and (modulep! +tree-sitter)
(fboundp 'csharp-ts-mode)) ; 29.1+ only
(set-tree-sitter! 'csharp-mode 'csharp-ts-mode
'((c-sharp :url "https://github.com/tree-sitter/tree-sitter-c-sharp"
:rev "v0.23.1"))))
(defadvice! +csharp-disable-clear-string-fences-a (fn &rest args)
"This turns off `c-clear-string-fences' for `csharp-mode'. When
on for `csharp-mode' font lock breaks after an interpolated string
or terminating simple string."
:around #'csharp-disable-clear-string-fences
(unless (eq major-mode 'csharp-mode)
(apply fn args)))
;; HACK: `csharp-ts-mode' changes `auto-mode-alist' every time the mode is
;; activated, which runs the risk of overwriting user (or Doom) entries.
;; REVIEW: Should be addressed upstream.
(defadvice! +csharp--undo-ts-side-effects-a (fn &rest args)
:around #'csharp-ts-mode
(let (auto-mode-alist)
(apply fn args))))

View File

@@ -59,16 +59,8 @@
:when (fboundp 'elixir-ts-mode) ; 30.1+ only
:defer t
:init
(set-tree-sitter! 'elixir-mode 'elixir-ts-mode
'((elixir :url "https://github.com/elixir-lang/tree-sitter-elixir"
:rev "v0.3.3")
(heex :url "https://github.com/phoenixframework/tree-sitter-heex"
:rev "v0.7.0")))
(set-tree-sitter! 'elixir-mode 'elixir-ts-mode '(elixir heex))
:config
;; HACK: Rely on `major-mode-remap-defaults' (and elixir-mode's autoloaded
;; auto-mode-alist entries).
(cl-callf2 rassq-delete-all 'elixir-ts-mode auto-mode-alist)
(+elixir-common-config 'elixir-ts-mode))

View File

@@ -53,10 +53,7 @@
:when (fboundp 'go-ts-mode) ; 31.1+ only
:defer t
:init
(set-tree-sitter! 'go-mode 'go-ts-mode
'((go :url "https://github.com/tree-sitter/tree-sitter-go" :ref "v0.23.4")
(gomod :url "https://github.com/camdencheek/tree-sitter-go-mod" :ref "v1.1.0")
(gowork :url "https://github.com/omertuc/tree-sitter-go-work")))
(set-tree-sitter! 'go-mode 'go-ts-mode '(go gomod gowork))
:config
(+go-common-config 'go-ts-mode))

View File

@@ -30,9 +30,4 @@
:init
(set-tree-sitter! 'janet-mode 'janet-ts-mode
`(janet-simple :url "https://github.com/sogaiu/tree-sitter-janet-simple"
:cc ,(if (featurep :system 'windows) "gcc.exe")))
:config
;; HACK: These entries are inserted twice by this package, so remove them so
;; this module can be the single source of truth.
(cl-callf2 rassq-delete-all 'janet-ts-mode auto-mode-alist)
(cl-callf2 rassq-delete-all 'janet-ts-mode interpreter-mode-alist))
:cc ,(if (featurep :system 'windows) "gcc.exe"))))

View File

@@ -25,9 +25,7 @@
:when (fboundp 'json-ts-mode) ; 29.1+ only
:defer t
:init
(set-tree-sitter! 'json-mode 'json-ts-mode
'((json :url "https://github.com/tree-sitter/tree-sitter-json"
:rev "v0.24.8")))
(set-tree-sitter! 'json-mode 'json-ts-mode 'json)
:config
;; HACK: Rely on `major-mode-remap-defaults'.
(cl-callf2 assq-delete-all 'json-ts-mode auto-mode-alist)

View File

@@ -48,9 +48,7 @@
:when (modulep! +tree-sitter)
:defer t
:init
(set-tree-sitter! 'julia-mode 'julia-ts-mode
'((julia :url "https://github.com/tree-sitter/tree-sitter-julia"
:rev "v0.23.1")))
(set-tree-sitter! 'julia-mode 'julia-ts-mode 'julia)
:config
(when (modulep! +lsp)
(add-hook 'julia-ts-mode-local-vars-hook #'lsp! 'append)))

View File

@@ -30,9 +30,7 @@
:when (fboundp 'lua-ts-mode) ; 30.1+ only
:defer t
:init
(set-tree-sitter! 'lua-mode 'lua-ts-mode
'((lua :url "https://github.com/tree-sitter-grammars/tree-sitter-lua"
:rev "v0.3.0")))
(set-tree-sitter! 'lua-mode 'lua-ts-mode 'lua)
:config
(set-lookup-handlers! 'lua-ts-mode :documentation 'lua-search-documentation)
(set-electric! 'lua-ts-mode :words '("else" "end"))

View File

@@ -131,14 +131,7 @@ capture, the end position, and the output buffer.")
:when (fboundp 'markdown-ts-mode)
:defer t
:init
(set-tree-sitter! 'markdown-mode 'markdown-ts-mode
'((markdown :url "https://github.com/tree-sitter-grammars/tree-sitter-markdown"
:rev "v0.4.1"
:source-dir "tree-sitter-markdown-inline/src")
(markdown-inline :url "https://github.com/tree-sitter-grammars/tree-sitter-markdown"
:rev "v0.4.1"
:source-dir "tree-sitter-markdown-inline/src")))
(set-tree-sitter! 'markdown-mode 'markdown-ts-mode '(markdown markdown-inline))
:config
(cl-callf2 delete '("\\.md\\'" . markdown-ts-mode) auto-mode-alist))

View File

@@ -83,16 +83,8 @@
:when (fboundp 'php-ts-mode) ; 30.1+ only
:defer t
:init
(set-tree-sitter! 'php-mode 'php-ts-mode
'((php :url "https://github.com/tree-sitter/tree-sitter-php"
:rev "v0.23.11"
:source-dir "php/src")
(phpdoc :url "https://github.com/claytonrcarter/tree-sitter-phpdoc")))
(set-tree-sitter! 'php-mode 'php-ts-mode '(php phpdoc))
:config
;; HACK: Rely on `major-mode-remap-defaults'.
(cl-callf2 rassq-delete-all 'php-ts-mode auto-mode-alist)
(cl-callf2 rassq-delete-all 'php-ts-mode interpreter-mode-alist)
(+php-common-config 'php-ts-mode))

View File

@@ -20,8 +20,7 @@
python-indent-guess-indent-offset-verbose nil)
(when (modulep! +tree-sitter)
(set-tree-sitter! 'python-mode 'python-ts-mode
'((python :url "https://github.com/tree-sitter/tree-sitter-python"))))
(set-tree-sitter! 'python-mode 'python-ts-mode 'python))
:config
;; HACK: `python-base-mode' (and `python-ts-mode') don't exist on pre-29
@@ -98,16 +97,7 @@
;; HACK: `python-mode' doesn't update `tab-width' to reflect
;; `python-indent-offset', causing issues anywhere `tab-width' is respected.
(setq-hook! '(python-mode-hook python-ts-mode-hook) tab-width python-indent-offset)
;; HACK: `python-ts-mode' changes `auto-mode-alist' and
;; `interpreter-mode-alist' every time the mode is activated, which runs the
;; risk of overwriting user (or Doom) entries.
;; REVIEW: Should be addressed upstream.
(defadvice! +python--undo-ts-side-effects-a (fn &rest args)
:around #'python-ts-mode
(let (auto-mode-alist interpreter-mode-alist)
(apply fn args))))
(setq-hook! '(python-mode-hook python-ts-mode-hook) tab-width python-indent-offset))
(use-package! pyimport

View File

@@ -41,8 +41,7 @@
:when (fboundp 'ruby-ts-mode) ; 29.1+ only
:defer t
:init
(set-tree-sitter! 'ruby-mode 'ruby-ts-mode
'((ruby :url "https://github.com/tree-sitter/tree-sitter-ruby")))
(set-tree-sitter! 'ruby-mode 'ruby-ts-mode 'ruby)
:config
(set-electric! 'ruby-ts-mode :words '("else" "end" "elsif"))
(set-repl-handler! 'ruby-ts-mode #'inf-ruby)

View File

@@ -12,12 +12,7 @@
:when (modulep! +tree-sitter)
:when (fboundp 'yaml-ts-mode) ; 29.1+ only
:init
(set-tree-sitter! 'yaml-mode 'yaml-ts-mode
'((yaml :url "https://github.com/tree-sitter-grammars/tree-sitter-yaml"
:rev "v0.7.0")))
(set-tree-sitter! 'yaml-mode 'yaml-ts-mode 'yaml)
:config
;; HACK: Rely on `major-mode-remap-defaults'.
(cl-callf2 rassq-delete-all 'yaml-ts-mode auto-mode-alist)
(when (modulep! +lsp)
(add-hook 'yaml-ts-mode-local-vars-hook #'lsp! 'append)))

View File

@@ -47,7 +47,4 @@
'((zig :url "https://github.com/tree-sitter/zig-tree-sitter"
:rev "v0.25.0")))
:config
;; HACK: Rely on `major-mode-remap-defaults'
(cl-callf2 rassq-delete-all 'zig-ts-mode auto-mode-alist)
(+zig-common-config 'zig-ts-mode))

View File

@@ -13,9 +13,4 @@
:when (fboundp 'dockerfile-ts-mode) ; 29.1+ only
:defer t
:init
(set-tree-sitter! 'dockerfile-mode 'dockerfile-ts-mode
'((dockerfile :url "https://github.com/camdencheek/tree-sitter-dockerfile"
:rev "v0.2.0")))
:config
;; HACK: Rely on `major-mode-remap-defaults' instead
(cl-callf2 rassq-delete-all 'dockerfile-ts-mode auto-mode-alist))
(set-tree-sitter! 'dockerfile-mode 'dockerfile-ts-mode 'dockerfile))

View File

@@ -15,10 +15,11 @@ MODE and TS-MODE are major mode symbols. If RECIPES is provided, fall back to
MODE if RECIPES don't pass `treesit-ready-p' when activating TS-MODE. Use this
for ts modes that error out instead of failing gracefully.
RECIPES are an alist of plists with the format (LANG &key URL REV SOURCE-DIR CC
CPP COMMIT), which will be transformed into entries for
`treesit-language-source-alist' (which descrie what each of these keys mean).
Note that COMMIT is only available in Emacs >=31."
RECIPES is a symbol (a grammar language name), list thereof, or alist of plists
with the format (LANG &key URL REV SOURCE-DIR CC CPP COMMIT). If an alist of
plists, it will be transformed into entries for `treesit-language-source-alist'
(which describe what each of these keys mean). Note that COMMIT is ignored
pre-Emacs 31."
(declare (indent 2))
(cl-check-type mode symbol)
(cl-check-type ts-mode symbol)
@@ -41,6 +42,11 @@ Note that COMMIT is only available in Emacs >=31."
(fboundp ts-mode)
(or (eq treesit-enabled-modes t)
(memq ts-mode treesit-enabled-modes))
;; Lazily load autoload so
;; `treesit-language-source-alist' is initialized.
(let ((fn (symbol-function ts-mode)))
(or (not (autoloadp fn))
(autoload-do-load fn)))
;; Only prompt once, and log other times.
(cl-every (if ensured?
(doom-rpartial #'treesit-ready-p 'message)
@@ -54,16 +60,16 @@ Note that COMMIT is only available in Emacs >=31."
m))))))))
(with-eval-after-load 'treesit
(dolist (recipe recipes)
(cl-destructuring-bind (name &key url rev source-dir cc cpp commit)
(ensure-list recipe)
(setf (alist-get name treesit-language-source-alist)
(append (list url rev source-dir cc cpp)
;; COMPAT: 31.1 introduced a COMMIT recipe argument. On
;; <=30.x, extra arguments will trigger an arity error
;; when installing grammars.
(if (eq (cdr (func-arity 'treesit--install-language-grammar-1))
'many)
(list commit))))))))
(when (cdr (setq recipe (ensure-list recipe)))
(cl-destructuring-bind (name &key url rev source-dir cc cpp commit) recipe
(setf (alist-get name treesit-language-source-alist)
(append (list url rev source-dir cc cpp)
;; COMPAT: 31.1 introduced a COMMIT recipe argument. On
;; <=30.x, extra arguments will trigger an arity error
;; when installing grammars.
(if (eq (cdr (func-arity 'treesit--install-language-grammar-1))
'many)
(list commit)))))))))
;; ;; HACK: Remove and refactor when `use-package' eager macro expansion is solved or `use-package!' is removed
;; ;;;###autoload
@@ -81,6 +87,11 @@ Note that COMMIT is only available in Emacs >=31."
;; (evil-textobj-tree-sitter-goto-textobj group previous end query)))
;; sym))
;;;###autoload
(defun +tree-sitter-ts-mode-inhibit-side-effects-a (fn &rest args)
"Suppress changes to `auto-mode-alist' and `interpreter-mode-alist'."
(let (auto-mode-alist interpreter-mode-alist)
(apply fn args)))
;;; TODO: Backwards compatibility

View File

@@ -10,38 +10,42 @@
:preface
(setq treesit-enabled-modes t)
;; HACK: These *-ts-mode-maybe functions all treat `treesit-enabled-modes'
;; strangely in the event the language's grammar is unavailable. Plus, they
;; add yet-another-layer of complexity for users to be cognicent of. Get rid
;; of them.
;; REVIEW: Handle this during the 'doom sync' process instead.
(setq auto-mode-alist
(save-match-data
(cl-loop for (src . fn) in auto-mode-alist
unless (and (functionp fn)
(string-match "-ts-mode-maybe$" (symbol-name fn)))
collect (cons src fn))))
;; HACK: The *-ts-mode major modes are inconsistent about how they treat
;; missing language grammars (some error out, some respect
;; `treesit-auto-install-grammar', some fall back to `fundamental-mode').
;; I'd like to address this poor UX using `major-mode-remap-alist' entries
;; created by `set-tree-sitter!' (which will fall back to the non-treesit
;; modes), but most *-ts-mode's clobber `auto-mode-alist' and/or
;; `interpreter-mode-alist' each time the major mode is activated, so those
;; must be undone too so they don't overwrite user config.
;; TODO: Handle this during the 'doom sync' process instead.
(save-match-data
(dolist (sym '(auto-mode-alist interpreter-mode-alist))
(set
sym (cl-loop for (src . fn) in (symbol-value sym)
unless (and (functionp fn)
(string-match "-ts-mode\\(?:-maybe\\)?$" (symbol-name fn)))
collect (cons src fn)))))
;; HACK: These *-ts-modes change `auto-mode-alist' and/or
;; `interpreter-mode-alist' every time they are activated, running the risk
;; of overwriting user (or Doom) config.
;; REVIEW: Should be addressed upstream.
(dolist (mode '(csharp-ts-mode
python-ts-mode))
(advice-add mode :around #'+tree-sitter-ts-mode-inhibit-side-effects-a))
:config
;; HACK: The implementation of `treesit-enabled-modes's setter and
;; `treesit-major-mode-remap-alist' are intrusively opinionated, so I
;; disable it altogether as to not unexpectedly modify
;; `major-mode-remap-alist' at runtime. What's more, there's no guarantee
;; this will be populated correctly unless the user is on a particular
;; commit of Emacs 31 or newer. Best we simply ignore it.
(dolist (m treesit-major-mode-remap-alist)
(setq major-mode-remap-alist (delete m major-mode-remap-alist)))
;; `treesit-major-mode-remap-alist' is intrusively opinionated, so disable
;; it ato avoid untimely (and overriding) modifications of
;; `major-mode-remap-alist' at runtime. What's more, this was only
;; introduced in 31, so ignoring them is more consistent for pre-31 users.
(when major-mode-remap-alist
(dolist (m treesit-major-mode-remap-alist)
(setq major-mode-remap-alist (delete m major-mode-remap-alist))))
(setq treesit-major-mode-remap-alist nil)
;; HACK: treesit lacks any way to dictate where to install grammars.
(add-to-list 'treesit-extra-load-path (concat doom-profile-data-dir "tree-sitter"))
(defadvice! +tree-sitter--install-grammar-to-local-dir-a (fn &rest args)
"Write grammars to `doom-profile-data-dir'."
:around #'treesit-install-language-grammar
:around #'treesit--build-grammar
(let ((user-emacs-directory doom-profile-data-dir))
(apply fn args)))
;; HACK: Some *-ts-mode packages modify `major-mode-remap-defaults'
;; inconsistently. Playing whack-a-mole to undo those changes is more hassle
;; then simply ignoring them (by overriding `major-mode-remap-defaults' for
@@ -56,6 +60,15 @@
major-mode-remap-defaults)))
(funcall fn mode)))
;; HACK: Keep $EMACSDIR clean by installing grammars to the active profile.
(add-to-list 'treesit-extra-load-path (concat doom-profile-data-dir "tree-sitter"))
(defadvice! +tree-sitter--install-grammar-to-local-dir-a (fn &rest args)
"Write grammars to `doom-profile-data-dir'."
:around #'treesit-install-language-grammar
:around #'treesit--build-grammar
(let ((user-emacs-directory doom-profile-data-dir))
(apply fn args)))
;; TODO: Move most of these out to modules
(dolist (map '((awk "https://github.com/Beaglefoot/tree-sitter-awk" nil nil nil nil)
(bibtex "https://github.com/latex-lsp/tree-sitter-bibtex" nil nil nil nil)