From 092304d9e0ccc37cc0ddaa9b136457e56a1cac20 Mon Sep 17 00:00:00 2001 From: Craig Jennings Date: Sun, 12 Oct 2025 11:47:26 -0500 Subject: changing repositories --- assets/info/aspell-dev.info.gz | Bin 0 -> 23533 bytes assets/info/aspell.info.gz | Bin 0 -> 76304 bytes assets/info/assuan.info.gz | Bin 0 -> 46039 bytes assets/info/auth.info.gz | Bin 0 -> 16905 bytes assets/info/autoconf-archive.info-1.gz | Bin 0 -> 42760 bytes assets/info/autoconf-archive.info-2.gz | Bin 0 -> 45988 bytes assets/info/autoconf-archive.info-3.gz | Bin 0 -> 51293 bytes assets/info/autoconf-archive.info-4.gz | Bin 0 -> 43243 bytes assets/info/autoconf-archive.info.gz | Bin 0 -> 6957 bytes assets/info/autoconf.info.gz | Bin 0 -> 325017 bytes assets/info/automake.info-1.gz | Bin 0 -> 88407 bytes assets/info/automake.info-2.gz | Bin 0 -> 89582 bytes assets/info/automake.info.gz | Bin 0 -> 3436 bytes assets/info/autotype.info.gz | Bin 0 -> 17823 bytes assets/info/bash.info.gz | Bin 0 -> 167064 bytes assets/info/bc.info.gz | Bin 0 -> 12573 bytes assets/info/bovine.info.gz | Bin 0 -> 12579 bytes assets/info/calc.info.gz | Bin 0 -> 477808 bytes assets/info/ccmode.info.gz | Bin 0 -> 91193 bytes assets/info/cl.info.gz | Bin 0 -> 74268 bytes assets/info/coreutils.info.gz | Bin 0 -> 247917 bytes assets/info/dbus.info.gz | Bin 0 -> 29266 bytes assets/info/diffutils.info.gz | Bin 0 -> 62843 bytes assets/info/dir | 361 + assets/info/dired-x.info.gz | Bin 0 -> 17102 bytes assets/info/ebrowse.info.gz | Bin 0 -> 21189 bytes assets/info/ede.info.gz | Bin 0 -> 37497 bytes assets/info/ediff.info.gz | Bin 0 -> 43053 bytes assets/info/edt.info.gz | Bin 0 -> 20042 bytes assets/info/efaq.info.gz | Bin 0 -> 61770 bytes assets/info/eglot.info.gz | Bin 0 -> 29856 bytes assets/info/eieio.info.gz | Bin 0 -> 28392 bytes assets/info/eintr.info.gz | Bin 0 -> 209687 bytes assets/info/elisp.info.gz | Bin 0 -> 1337240 bytes assets/info/emacs-gnutls.info.gz | Bin 0 -> 11921 bytes assets/info/emacs-mime.info.gz | Bin 0 -> 29369 bytes assets/info/emacs.info.gz | Bin 0 -> 840022 bytes assets/info/epa.info.gz | Bin 0 -> 17320 bytes assets/info/erc.info.gz | Bin 0 -> 36517 bytes assets/info/ert.info.gz | Bin 0 -> 25038 bytes assets/info/eshell.info.gz | Bin 0 -> 46047 bytes assets/info/eudc.info.gz | Bin 0 -> 24727 bytes assets/info/eww.info.gz | Bin 0 -> 15752 bytes assets/info/find.info.gz | Bin 0 -> 98850 bytes assets/info/flymake.info.gz | Bin 0 -> 25388 bytes assets/info/forms.info.gz | Bin 0 -> 18418 bytes assets/info/gawk.info.gz | Bin 0 -> 500896 bytes assets/info/gdb.info-1.gz | Bin 0 -> 89435 bytes assets/info/gdb.info-2.gz | Bin 0 -> 85738 bytes assets/info/gdb.info-3.gz | Bin 0 -> 88797 bytes assets/info/gdb.info-4.gz | Bin 0 -> 85382 bytes assets/info/gdb.info-5.gz | Bin 0 -> 75808 bytes assets/info/gdb.info-6.gz | Bin 0 -> 71298 bytes assets/info/gdb.info-7.gz | Bin 0 -> 85787 bytes assets/info/gdb.info-8.gz | Bin 0 -> 103944 bytes assets/info/gdb.info-9.gz | Bin 0 -> 32199 bytes assets/info/gdb.info.gz | Bin 0 -> 10535 bytes assets/info/git.info | 5681 + assets/info/gnupg.info-1.gz | Bin 0 -> 90817 bytes assets/info/gnupg.info-2.gz | Bin 0 -> 92567 bytes assets/info/gnupg.info-3.gz | Bin 0 -> 1825 bytes assets/info/gnupg.info.gz | Bin 0 -> 2921 bytes assets/info/gnus.info.gz | Bin 0 -> 390797 bytes assets/info/grep.info.gz | Bin 0 -> 35634 bytes assets/info/grub.info-1.gz | Bin 0 -> 80613 bytes assets/info/grub.info-2.gz | Bin 0 -> 50870 bytes assets/info/grub.info.gz | Bin 0 -> 6180 bytes assets/info/guile.info-1.gz | Bin 0 -> 92238 bytes assets/info/guile.info-10.gz | Bin 0 -> 5047 bytes assets/info/guile.info-11.gz | Bin 0 -> 4215 bytes assets/info/guile.info-2.gz | Bin 0 -> 74040 bytes assets/info/guile.info-3.gz | Bin 0 -> 85843 bytes assets/info/guile.info-4.gz | Bin 0 -> 86814 bytes assets/info/guile.info-5.gz | Bin 0 -> 88511 bytes assets/info/guile.info-6.gz | Bin 0 -> 77566 bytes assets/info/guile.info-7.gz | Bin 0 -> 81040 bytes assets/info/guile.info-8.gz | Bin 0 -> 86205 bytes assets/info/guile.info-9.gz | Bin 0 -> 85176 bytes assets/info/guile.info.gz | Bin 0 -> 10001 bytes assets/info/gzip.info.gz | Bin 0 -> 15748 bytes assets/info/history.info.gz | Bin 0 -> 18783 bytes assets/info/htmlfontify.info.gz | Bin 0 -> 21977 bytes assets/info/idlwave.info.gz | Bin 0 -> 65633 bytes assets/info/ido.info.gz | Bin 0 -> 16126 bytes assets/info/info.info.gz | Bin 0 -> 25993 bytes assets/info/latex2e.info | 21120 + assets/info/mairix-el.info.gz | Bin 0 -> 13008 bytes assets/info/make.info-1.gz | Bin 0 -> 87999 bytes assets/info/make.info-2.gz | Bin 0 -> 86318 bytes assets/info/make.info-3.gz | Bin 0 -> 3629 bytes assets/info/make.info.gz | Bin 0 -> 2772 bytes assets/info/message.info.gz | Bin 0 -> 40672 bytes assets/info/mh-e.info.gz | Bin 0 -> 115424 bytes assets/info/modus-themes.info.gz | Bin 0 -> 70114 bytes assets/info/mu4e.info.gz | Bin 0 -> 75024 bytes assets/info/newsticker.info.gz | Bin 0 -> 14564 bytes assets/info/nxml-mode.info.gz | Bin 0 -> 18181 bytes assets/info/octave-mode.info.gz | Bin 0 -> 12973 bytes assets/info/org.info.gz | Bin 0 -> 300292 bytes assets/info/parted.info.gz | Bin 0 -> 20423 bytes assets/info/pcl-cvs.info.gz | Bin 0 -> 24461 bytes assets/info/pcl.info | 29024 + assets/info/pgg.info.gz | Bin 0 -> 14238 bytes assets/info/pinentry.info.gz | Bin 0 -> 16304 bytes assets/info/python.info | 532361 ++++++++++++++++++ assets/info/pythonpackagingguide.info | 15961 + assets/info/rcirc.info.gz | Bin 0 -> 20575 bytes assets/info/reftex.info.gz | Bin 0 -> 73871 bytes assets/info/regenerate_dir.sh | 61 + assets/info/remember.info.gz | Bin 0 -> 13794 bytes assets/info/sasl.info.gz | Bin 0 -> 10795 bytes assets/info/sbcl.info-1.gz | Bin 0 -> 88604 bytes assets/info/sbcl.info-2.gz | Bin 0 -> 71229 bytes assets/info/sbcl.info-3.gz | Bin 0 -> 715 bytes assets/info/sbcl.info.gz | Bin 0 -> 8335 bytes assets/info/sc.info.gz | Bin 0 -> 33364 bytes assets/info/sed.info.gz | Bin 0 -> 56008 bytes assets/info/semantic.info.gz | Bin 0 -> 30474 bytes assets/info/ses.info.gz | Bin 0 -> 24606 bytes assets/info/setuptools.info | 26390 + assets/info/sicp.info | 33394 ++ assets/info/sieve.info.gz | Bin 0 -> 11676 bytes assets/info/smtpmail.info.gz | Bin 0 -> 14542 bytes assets/info/speedbar.info.gz | Bin 0 -> 24022 bytes assets/info/srecode.info.gz | Bin 0 -> 27074 bytes assets/info/standards.info | 6129 + assets/info/stow.info.gz | Bin 0 -> 30997 bytes assets/info/tar.info-1.gz | Bin 0 -> 88101 bytes assets/info/tar.info-2.gz | Bin 0 -> 93693 bytes assets/info/tar.info-3.gz | Bin 0 -> 7667 bytes assets/info/tar.info.gz | Bin 0 -> 5236 bytes assets/info/texi2any_api.info.gz | Bin 0 -> 59734 bytes assets/info/texi2any_internals.info.gz | Bin 0 -> 65589 bytes assets/info/texinfo.info-1.gz | Bin 0 -> 86221 bytes assets/info/texinfo.info-2.gz | Bin 0 -> 89529 bytes assets/info/texinfo.info-3.gz | Bin 0 -> 80150 bytes assets/info/texinfo.info-4.gz | Bin 0 -> 23998 bytes assets/info/texinfo.info.gz | Bin 0 -> 6843 bytes assets/info/todo-mode.info.gz | Bin 0 -> 32712 bytes assets/info/tramp.info.gz | Bin 0 -> 75569 bytes assets/info/transient.info.gz | Bin 0 -> 42747 bytes assets/info/url.info.gz | Bin 0 -> 24054 bytes assets/info/use-package.info | 2386 + assets/info/use-package.info.gz | Bin 0 -> 31367 bytes assets/info/use-package.texi.files/doclicense.texi | 505 + assets/info/use-package.texi.files/docstyle.texi | 19 + .../info/use-package.texi.files/use-package.texi | 1903 + assets/info/vhdl-mode.info.gz | Bin 0 -> 19172 bytes assets/info/vip.info.gz | Bin 0 -> 30306 bytes assets/info/viper.info.gz | Bin 0 -> 66771 bytes assets/info/vtable.info.gz | Bin 0 -> 15110 bytes assets/info/wget.info.gz | Bin 0 -> 71884 bytes assets/info/widget.info.gz | Bin 0 -> 38053 bytes assets/info/wisent.info.gz | Bin 0 -> 26285 bytes assets/info/woman.info.gz | Bin 0 -> 26706 bytes assets/info/zsh.info | 191 + assets/info/zsh.info-1 | 7487 + assets/info/zsh.info-2 | 6945 + assets/info/zsh.info-3 | 5810 + assets/info/zsh.info-4 | 7058 + assets/info/zsh.info-5 | 7230 + assets/info/zsh.info-6 | Bin 0 -> 289103 bytes assets/info/zsh.info-7 | Bin 0 -> 38011 bytes 163 files changed, 710016 insertions(+) create mode 100644 assets/info/aspell-dev.info.gz create mode 100644 assets/info/aspell.info.gz create mode 100644 assets/info/assuan.info.gz create mode 100644 assets/info/auth.info.gz create mode 100644 assets/info/autoconf-archive.info-1.gz create mode 100644 assets/info/autoconf-archive.info-2.gz create mode 100644 assets/info/autoconf-archive.info-3.gz create mode 100644 assets/info/autoconf-archive.info-4.gz create mode 100644 assets/info/autoconf-archive.info.gz create mode 100644 assets/info/autoconf.info.gz create mode 100644 assets/info/automake.info-1.gz create mode 100644 assets/info/automake.info-2.gz create mode 100644 assets/info/automake.info.gz create mode 100644 assets/info/autotype.info.gz create mode 100644 assets/info/bash.info.gz create mode 100644 assets/info/bc.info.gz create mode 100644 assets/info/bovine.info.gz create mode 100644 assets/info/calc.info.gz create mode 100644 assets/info/ccmode.info.gz create mode 100644 assets/info/cl.info.gz create mode 100644 assets/info/coreutils.info.gz create mode 100644 assets/info/dbus.info.gz create mode 100644 assets/info/diffutils.info.gz create mode 100644 assets/info/dir create mode 100644 assets/info/dired-x.info.gz create mode 100644 assets/info/ebrowse.info.gz create mode 100644 assets/info/ede.info.gz create mode 100644 assets/info/ediff.info.gz create mode 100644 assets/info/edt.info.gz create mode 100644 assets/info/efaq.info.gz create mode 100644 assets/info/eglot.info.gz create mode 100644 assets/info/eieio.info.gz create mode 100644 assets/info/eintr.info.gz create mode 100644 assets/info/elisp.info.gz create mode 100644 assets/info/emacs-gnutls.info.gz create mode 100644 assets/info/emacs-mime.info.gz create mode 100644 assets/info/emacs.info.gz create mode 100644 assets/info/epa.info.gz create mode 100644 assets/info/erc.info.gz create mode 100644 assets/info/ert.info.gz create mode 100644 assets/info/eshell.info.gz create mode 100644 assets/info/eudc.info.gz create mode 100644 assets/info/eww.info.gz create mode 100644 assets/info/find.info.gz create mode 100644 assets/info/flymake.info.gz create mode 100644 assets/info/forms.info.gz create mode 100644 assets/info/gawk.info.gz create mode 100644 assets/info/gdb.info-1.gz create mode 100644 assets/info/gdb.info-2.gz create mode 100644 assets/info/gdb.info-3.gz create mode 100644 assets/info/gdb.info-4.gz create mode 100644 assets/info/gdb.info-5.gz create mode 100644 assets/info/gdb.info-6.gz create mode 100644 assets/info/gdb.info-7.gz create mode 100644 assets/info/gdb.info-8.gz create mode 100644 assets/info/gdb.info-9.gz create mode 100644 assets/info/gdb.info.gz create mode 100644 assets/info/git.info create mode 100644 assets/info/gnupg.info-1.gz create mode 100644 assets/info/gnupg.info-2.gz create mode 100644 assets/info/gnupg.info-3.gz create mode 100644 assets/info/gnupg.info.gz create mode 100644 assets/info/gnus.info.gz create mode 100644 assets/info/grep.info.gz create mode 100644 assets/info/grub.info-1.gz create mode 100644 assets/info/grub.info-2.gz create mode 100644 assets/info/grub.info.gz create mode 100644 assets/info/guile.info-1.gz create mode 100644 assets/info/guile.info-10.gz create mode 100644 assets/info/guile.info-11.gz create mode 100644 assets/info/guile.info-2.gz create mode 100644 assets/info/guile.info-3.gz create mode 100644 assets/info/guile.info-4.gz create mode 100644 assets/info/guile.info-5.gz create mode 100644 assets/info/guile.info-6.gz create mode 100644 assets/info/guile.info-7.gz create mode 100644 assets/info/guile.info-8.gz create mode 100644 assets/info/guile.info-9.gz create mode 100644 assets/info/guile.info.gz create mode 100644 assets/info/gzip.info.gz create mode 100644 assets/info/history.info.gz create mode 100644 assets/info/htmlfontify.info.gz create mode 100644 assets/info/idlwave.info.gz create mode 100644 assets/info/ido.info.gz create mode 100644 assets/info/info.info.gz create mode 100644 assets/info/latex2e.info create mode 100644 assets/info/mairix-el.info.gz create mode 100644 assets/info/make.info-1.gz create mode 100644 assets/info/make.info-2.gz create mode 100644 assets/info/make.info-3.gz create mode 100644 assets/info/make.info.gz create mode 100644 assets/info/message.info.gz create mode 100644 assets/info/mh-e.info.gz create mode 100644 assets/info/modus-themes.info.gz create mode 100644 assets/info/mu4e.info.gz create mode 100644 assets/info/newsticker.info.gz create mode 100644 assets/info/nxml-mode.info.gz create mode 100644 assets/info/octave-mode.info.gz create mode 100644 assets/info/org.info.gz create mode 100644 assets/info/parted.info.gz create mode 100644 assets/info/pcl-cvs.info.gz create mode 100644 assets/info/pcl.info create mode 100644 assets/info/pgg.info.gz create mode 100644 assets/info/pinentry.info.gz create mode 100644 assets/info/python.info create mode 100644 assets/info/pythonpackagingguide.info create mode 100644 assets/info/rcirc.info.gz create mode 100644 assets/info/reftex.info.gz create mode 100755 assets/info/regenerate_dir.sh create mode 100644 assets/info/remember.info.gz create mode 100644 assets/info/sasl.info.gz create mode 100644 assets/info/sbcl.info-1.gz create mode 100644 assets/info/sbcl.info-2.gz create mode 100644 assets/info/sbcl.info-3.gz create mode 100644 assets/info/sbcl.info.gz create mode 100644 assets/info/sc.info.gz create mode 100644 assets/info/sed.info.gz create mode 100644 assets/info/semantic.info.gz create mode 100644 assets/info/ses.info.gz create mode 100644 assets/info/setuptools.info create mode 100644 assets/info/sicp.info create mode 100644 assets/info/sieve.info.gz create mode 100644 assets/info/smtpmail.info.gz create mode 100644 assets/info/speedbar.info.gz create mode 100644 assets/info/srecode.info.gz create mode 100644 assets/info/standards.info create mode 100644 assets/info/stow.info.gz create mode 100644 assets/info/tar.info-1.gz create mode 100644 assets/info/tar.info-2.gz create mode 100644 assets/info/tar.info-3.gz create mode 100644 assets/info/tar.info.gz create mode 100644 assets/info/texi2any_api.info.gz create mode 100644 assets/info/texi2any_internals.info.gz create mode 100644 assets/info/texinfo.info-1.gz create mode 100644 assets/info/texinfo.info-2.gz create mode 100644 assets/info/texinfo.info-3.gz create mode 100644 assets/info/texinfo.info-4.gz create mode 100644 assets/info/texinfo.info.gz create mode 100644 assets/info/todo-mode.info.gz create mode 100644 assets/info/tramp.info.gz create mode 100644 assets/info/transient.info.gz create mode 100644 assets/info/url.info.gz create mode 100644 assets/info/use-package.info create mode 100644 assets/info/use-package.info.gz create mode 100644 assets/info/use-package.texi.files/doclicense.texi create mode 100644 assets/info/use-package.texi.files/docstyle.texi create mode 100644 assets/info/use-package.texi.files/use-package.texi create mode 100644 assets/info/vhdl-mode.info.gz create mode 100644 assets/info/vip.info.gz create mode 100644 assets/info/viper.info.gz create mode 100644 assets/info/vtable.info.gz create mode 100644 assets/info/wget.info.gz create mode 100644 assets/info/widget.info.gz create mode 100644 assets/info/wisent.info.gz create mode 100644 assets/info/woman.info.gz create mode 100644 assets/info/zsh.info create mode 100644 assets/info/zsh.info-1 create mode 100644 assets/info/zsh.info-2 create mode 100644 assets/info/zsh.info-3 create mode 100644 assets/info/zsh.info-4 create mode 100644 assets/info/zsh.info-5 create mode 100644 assets/info/zsh.info-6 create mode 100644 assets/info/zsh.info-7 (limited to 'assets/info') diff --git a/assets/info/aspell-dev.info.gz b/assets/info/aspell-dev.info.gz new file mode 100644 index 00000000..0406fd34 Binary files /dev/null and b/assets/info/aspell-dev.info.gz differ diff --git a/assets/info/aspell.info.gz b/assets/info/aspell.info.gz new file mode 100644 index 00000000..75323ab8 Binary files /dev/null and b/assets/info/aspell.info.gz differ diff --git a/assets/info/assuan.info.gz b/assets/info/assuan.info.gz new file mode 100644 index 00000000..860d1c5b Binary files /dev/null and b/assets/info/assuan.info.gz differ diff --git a/assets/info/auth.info.gz b/assets/info/auth.info.gz new file mode 100644 index 00000000..c1adcace Binary files /dev/null and b/assets/info/auth.info.gz differ diff --git a/assets/info/autoconf-archive.info-1.gz b/assets/info/autoconf-archive.info-1.gz new file mode 100644 index 00000000..38bb60c3 Binary files /dev/null and b/assets/info/autoconf-archive.info-1.gz differ diff --git a/assets/info/autoconf-archive.info-2.gz b/assets/info/autoconf-archive.info-2.gz new file mode 100644 index 00000000..33ff3c7b Binary files /dev/null and b/assets/info/autoconf-archive.info-2.gz differ diff --git a/assets/info/autoconf-archive.info-3.gz b/assets/info/autoconf-archive.info-3.gz new file mode 100644 index 00000000..4b4189ce Binary files /dev/null and b/assets/info/autoconf-archive.info-3.gz differ diff --git a/assets/info/autoconf-archive.info-4.gz b/assets/info/autoconf-archive.info-4.gz new file mode 100644 index 00000000..8db729c5 Binary files /dev/null and b/assets/info/autoconf-archive.info-4.gz differ diff --git a/assets/info/autoconf-archive.info.gz b/assets/info/autoconf-archive.info.gz new file mode 100644 index 00000000..20b7b4a7 Binary files /dev/null and b/assets/info/autoconf-archive.info.gz differ diff --git a/assets/info/autoconf.info.gz b/assets/info/autoconf.info.gz new file mode 100644 index 00000000..5477da01 Binary files /dev/null and b/assets/info/autoconf.info.gz differ diff --git a/assets/info/automake.info-1.gz b/assets/info/automake.info-1.gz new file mode 100644 index 00000000..7a176c8e Binary files /dev/null and b/assets/info/automake.info-1.gz differ diff --git a/assets/info/automake.info-2.gz b/assets/info/automake.info-2.gz new file mode 100644 index 00000000..d4fc33ec Binary files /dev/null and b/assets/info/automake.info-2.gz differ diff --git a/assets/info/automake.info.gz b/assets/info/automake.info.gz new file mode 100644 index 00000000..d55bd64e Binary files /dev/null and b/assets/info/automake.info.gz differ diff --git a/assets/info/autotype.info.gz b/assets/info/autotype.info.gz new file mode 100644 index 00000000..56106110 Binary files /dev/null and b/assets/info/autotype.info.gz differ diff --git a/assets/info/bash.info.gz b/assets/info/bash.info.gz new file mode 100644 index 00000000..c3072fcd Binary files /dev/null and b/assets/info/bash.info.gz differ diff --git a/assets/info/bc.info.gz b/assets/info/bc.info.gz new file mode 100644 index 00000000..3e94686a Binary files /dev/null and b/assets/info/bc.info.gz differ diff --git a/assets/info/bovine.info.gz b/assets/info/bovine.info.gz new file mode 100644 index 00000000..81f908c8 Binary files /dev/null and b/assets/info/bovine.info.gz differ diff --git a/assets/info/calc.info.gz b/assets/info/calc.info.gz new file mode 100644 index 00000000..5566b621 Binary files /dev/null and b/assets/info/calc.info.gz differ diff --git a/assets/info/ccmode.info.gz b/assets/info/ccmode.info.gz new file mode 100644 index 00000000..704a4a3c Binary files /dev/null and b/assets/info/ccmode.info.gz differ diff --git a/assets/info/cl.info.gz b/assets/info/cl.info.gz new file mode 100644 index 00000000..fc2a56f1 Binary files /dev/null and b/assets/info/cl.info.gz differ diff --git a/assets/info/coreutils.info.gz b/assets/info/coreutils.info.gz new file mode 100644 index 00000000..c4db87c4 Binary files /dev/null and b/assets/info/coreutils.info.gz differ diff --git a/assets/info/dbus.info.gz b/assets/info/dbus.info.gz new file mode 100644 index 00000000..da301c50 Binary files /dev/null and b/assets/info/dbus.info.gz differ diff --git a/assets/info/diffutils.info.gz b/assets/info/diffutils.info.gz new file mode 100644 index 00000000..5f858ef7 Binary files /dev/null and b/assets/info/diffutils.info.gz differ diff --git a/assets/info/dir b/assets/info/dir new file mode 100644 index 00000000..0431252c --- /dev/null +++ b/assets/info/dir @@ -0,0 +1,361 @@ +This is the file .../info/dir, which contains the +topmost node of the Info hierarchy, called (dir)Top. +The first time you invoke Info you start off looking at this node. + +File: dir, Node: Top This is the top of the INFO tree + + This (the Directory node) gives a menu of major topics. + Typing "q" exits, "H" lists all Info commands, "d" returns here, + "h" gives a primer for first-timers, + "mEmacs" visits the Emacs manual, etc. + + In Emacs, you can click mouse button 2 on a menu item or cross reference + to select it. + +* Menu: + +Emacs +* Emacs: (emacs). The extensible self-documenting text editor. +* Emacs FAQ: (efaq). Frequently Asked Questions about Emacs. + +Emacs editing modes +* CC Mode: (ccmode). Emacs mode for editing C, C++, Objective-C, + Java, Pike, AWK, and CORBA IDL code. +* IDLWAVE: (idlwave). Major mode and shell for IDL files. +* Octave mode: (octave-mode). Emacs mode for editing GNU Octave files. +* Org Mode: (org). Outline-based notes management and organizer. +* VHDL Mode: (vhdl-mode). Emacs mode for editing VHDL code. +* nXML Mode: (nxml-mode). XML editing mode with RELAX NG support. + +Emacs Lisp +* Elisp: (elisp). The Emacs Lisp Reference Manual. +* Emacs Lisp Intro: (eintr). A simple introduction to Emacs Lisp + +Emacs Lisp Libraries +* Auth-source: (auth). The Emacs auth-source library. +* CL-Lib: (cl). Partial Common Lisp support for Emacs Lisp. +* D-Bus: (dbus). Using D-Bus in Emacs. +* Emacs MIME: (emacs-mime). Emacs MIME de/composition library. +* SMTP: (smtpmail). Emacs library for sending mail via SMTP. +* URL: (url). URL loading package. +* Widget: (widget). The "widget" package. + +Emacs misc features +* Autotype: (autotype). Convenient features for text. +* Bovine: (bovine). Semantic bovine parser development. +* Calc: (calc). Advanced desk calculator and mathematical tool. +* Dired-X: (dired-x). Dired Extra Features. +* EDE: (ede). The Emacs Development Environment. +* EDT: (edt). An Emacs emulation of the EDT editor. +* EIEIO: (eieio). An objects system for Emacs Lisp. +* ERC: (erc). Powerful and extensible IRC client for Emacs. +* ERT: (ert). Emacs Lisp regression testing tool. +* EUDC: (eudc). Client for directory servers (LDAP, BBDB). +* EWW: (eww). Emacs Web Wowser +* EasyPG Assistant: (epa). An Emacs user interface to GNU Privacy Guard. +* Ebrowse: (ebrowse). A C++ class browser for Emacs. +* Ediff: (ediff). Compare and merge text and code. +* Eglot: (eglot). Language Server Protocol client for Emacs. +* Emacs GnuTLS: (emacs-gnutls). The Emacs GnuTLS integration. +* Eshell: (eshell). A command shell implemented in Emacs Lisp. +* Flymake: (flymake). A universal on-the-fly syntax checker. +* Forms: (forms). Editing data bases by filling in forms. +* Gnus: (gnus). The newsreader Gnus. +* Htmlfontify: (htmlfontify). Convert source code to html. +* Ido: (ido). Interactively do things with buffers and files. +* MH-E: (mh-e). Emacs interface to the MH mail system. +* Mairix: (mairix-el). Emacs interface to the Mairix mail indexer. +* Message: (message). Mail & news composition mode goes with Gnus. +* Modus Themes: (modus-themes). Elegant, legible and customizable themes. +* Mu4e: (Mu4e). An email client for GNU Emacs. +* Newsticker: (newsticker). A feed reader for Emacs. +* PCL-CVS: (pcl-cvs). Emacs front-end to CVS. +* Rcirc: (rcirc). Internet Relay Chat (IRC) client. +* RefTeX: (reftex). LaTeX cross-references and citations. +* Remember: (remember). Simple information manager for Emacs. +* SASL: (sasl). The Emacs SASL library. +* SC: (sc). Cite parts of messages you're replying to. +* SES: (ses). Simple Emacs Spreadsheet. +* SRecode: (srecode). Semantic template code generator. +* Semantic: (semantic). Source code parser library and utilities. +* Sieve: (sieve). Managing Sieve scripts in Emacs. +* Speedbar: (speedbar). File/Tag summarizing utility. +* Todo Mode: (todo-mode). Make and maintain todo lists. +* Tramp: (tramp). Emacs remote file access via ssh and scp. +* Transient: (transient). Transient Commands. +* VIP: (vip). An obsolete VI-emulation for Emacs. +* VIPER: (viper). A VI-emulation mode for Emacs. +* Wisent: (wisent). Semantic Wisent parser development. +* WoMan: (woman). Browse UN*X Manual Pages "W.O. (without) Man". +* use-package: (use-package). Declarative package configuration for Emacs. +* vtable: (vtable). Variable Pitch Tables. + +SICP +* SICP: (sicp). Structure and Interpretation of Computer + Programs +Shells +* Bash: (bash). The GNU Bourne-Again SHell. +* ZSH: (zsh). The Z Shell Manual. + +Programming Languages +* Guile Reference: (guile). The Guile reference manual. +* python: (python.info). Python 3.13.7 documentation +* pythonpackagingguide: (pythonpackagingguide.info). + Python packaging guide. +* setuptools: (setuptools.info). + Python Setup tools. + +Basics +* Common options: (coreutils)Common options. +* Coreutils: (coreutils). Core GNU (file, text, shell) utilities. +* Date input formats: (coreutils)Date input formats. +* File permissions: (coreutils)File permissions. + Access modes. +* Finding files: (find). Operating on files matching certain criteria. + +Compression and Archiving +* Gzip: (gzip). General (de)compression of files (lzw). +* Tar: (tar). Making tape (or disk) archives. + +Development +* Git: (git). A fast distributed revision control system + +GNU Libraries +* Assuan: (assuan). An IPC library for non-persistent servers. + +GNU organization +* Standards: (standards). GNU coding standards. + +GNU Packages +* Aspell: (aspell). GNU Aspell spelling checker +* Aspell-dev: (aspell-dev). For Aspell developers + +GNU Utilities +* dirmngr: (gnupg). X.509 CRL and OCSP server. +* dirmngr-client: (gnupg). X.509 CRL and OCSP client. +* gpg-agent: (gnupg). The secret key daemon. +* gpg2: (gnupg). OpenPGP encryption and signing tool. +* gpgsm: (gnupg). S/MIME encryption and signing tool. +* pinentry: (pinentry). Securely ask for a passphrase or PIN. + +Individual utilities +* aclocal-invocation: (automake)aclocal Invocation. + Generating aclocal.m4. +* arch: (coreutils)arch invocation. Print machine hardware name. +* autoconf-invocation: (autoconf)autoconf Invocation. + How to create configuration + scripts +* autoheader: (autoconf)autoheader Invocation. How to create configuration + templates +* autom4te: (autoconf)autom4te Invocation. The Autoconf executables + backbone +* automake-invocation: (automake)automake Invocation. + Generating Makefile.in. +* autoreconf: (autoconf)autoreconf Invocation. Remaking multiple + ‘configure’ scripts +* autoscan: (autoconf)autoscan Invocation. Semi-automatic + ‘configure.ac’ writing +* autoupdate: (autoconf)autoupdate Invocation. Automatic update of + ‘configure.ac’ +* awk: (gawk)Invoking Gawk. Text scanning and processing. +* b2sum: (coreutils)b2sum invocation. Print or check BLAKE2 digests. +* base32: (coreutils)base32 invocation. Base32 encode/decode data. +* base64: (coreutils)base64 invocation. Base64 encode/decode data. +* basename: (coreutils)basename invocation. Strip directory and suffix. +* basenc: (coreutils)basenc invocation. Encoding/decoding of data. +* cat: (coreutils)cat invocation. Concatenate and write files. +* chcon: (coreutils)chcon invocation. Change SELinux CTX of files. +* chgrp: (coreutils)chgrp invocation. Change file groups. +* chmod: (coreutils)chmod invocation. Change access permissions. +* chown: (coreutils)chown invocation. Change file owners and groups. +* chroot: (coreutils)chroot invocation. Specify the root directory. +* cksum: (coreutils)cksum invocation. Print POSIX CRC checksum. +* cmp: (diffutils)Invoking cmp. Compare 2 files byte by byte. +* comm: (coreutils)comm invocation. Compare sorted files by line. +* config.status: (autoconf)config.status Invocation. + Recreating configurations. +* configure: (autoconf)configure Invocation. Configuring a package. +* cp: (coreutils)cp invocation. Copy files. +* csplit: (coreutils)csplit invocation. Split by context. +* cut: (coreutils)cut invocation. Print selected parts of lines. +* date: (coreutils)date invocation. Print/set system date and time. +* dd: (coreutils)dd invocation. Copy and convert a file. +* df: (coreutils)df invocation. Report file system usage. +* diff: (diffutils)Invoking diff. Compare 2 files line by line. +* diff3: (diffutils)Invoking diff3. Compare 3 files line by line. +* dir: (coreutils)dir invocation. List directories briefly. +* dircolors: (coreutils)dircolors invocation. Color setup for ls. +* dirname: (coreutils)dirname invocation. Strip last file name component. +* du: (coreutils)du invocation. Report file usage. +* echo: (coreutils)echo invocation. Print a line of text. +* env: (coreutils)env invocation. Modify the environment. +* expand: (coreutils)expand invocation. Convert tabs to spaces. +* expr: (coreutils)expr invocation. Evaluate expressions. +* factor: (coreutils)factor invocation. Print prime factors +* false: (coreutils)false invocation. Do nothing, unsuccessfully. +* find: (find)Finding Files. Finding and acting on files. +* fmt: (coreutils)fmt invocation. Reformat paragraph text. +* fold: (coreutils)fold invocation. Wrap long input lines. +* groups: (coreutils)groups invocation. Print group names a user is in. +* gunzip: (gzip)Overview. Decompression. +* gzexe: (gzip)Overview. Compress executables. +* head: (coreutils)head invocation. Output the first part of files. +* hostid: (coreutils)hostid invocation. Print numeric host identifier. +* hostname: (coreutils)hostname invocation. Print or set system name. +* id: (coreutils)id invocation. Print user identity. +* ifnames: (autoconf)ifnames Invocation. Listing conditionals in source. +* install: (coreutils)install invocation. Copy files and set attributes. +* join: (coreutils)join invocation. Join lines on a common field. +* kill: (coreutils)kill invocation. Send a signal to processes. +* link: (coreutils)link invocation. Make hard links between files. +* ln: (coreutils)ln invocation. Make links between files. +* locate: (find)Invoking locate. Finding files in a database. +* logname: (coreutils)logname invocation. Print current login name. +* ls: (coreutils)ls invocation. List directory contents. +* md5sum: (coreutils)md5sum invocation. Print or check MD5 digests. +* mkdir: (coreutils)mkdir invocation. Create directories. +* mkfifo: (coreutils)mkfifo invocation. Create FIFOs (named pipes). +* mknod: (coreutils)mknod invocation. Create special files. +* mktemp: (coreutils)mktemp invocation. Create temporary files. +* mv: (coreutils)mv invocation. Rename files. +* nice: (coreutils)nice invocation. Modify niceness. +* nl: (coreutils)nl invocation. Number lines and write files. +* nohup: (coreutils)nohup invocation. Immunize to hangups. +* nproc: (coreutils)nproc invocation. Print the number of processors. +* numfmt: (coreutils)numfmt invocation. Reformat numbers. +* od: (coreutils)od invocation. Dump files in octal, etc. +* paste: (coreutils)paste invocation. Merge lines of files. +* patch: (diffutils)Invoking patch. Apply a patch to a file. +* pathchk: (coreutils)pathchk invocation. Check file name portability. +* pinky: (coreutils)pinky invocation. Print information about users. +* pr: (coreutils)pr invocation. Paginate or columnate files. +* printenv: (coreutils)printenv invocation. Print environment variables. +* printf: (coreutils)printf invocation. Format and print data. +* ptx: (coreutils)ptx invocation. Produce permuted indexes. +* pwd: (coreutils)pwd invocation. Print working directory. +* readlink: (coreutils)readlink invocation. Print referent of a symlink. +* realpath: (coreutils)realpath invocation. Print resolved file names. +* rm: (coreutils)rm invocation. Remove files. +* rmdir: (coreutils)rmdir invocation. Remove empty directories. +* runcon: (coreutils)runcon invocation. Run in specified SELinux CTX. +* sdiff: (diffutils)Invoking sdiff. Merge 2 files side-by-side. +* seq: (coreutils)seq invocation. Print numeric sequences +* sha1sum: (coreutils)sha1sum invocation. Print or check SHA-1 digests. +* sha2: (coreutils)sha2 utilities. Print or check SHA-2 digests. +* shred: (coreutils)shred invocation. Remove files more securely. +* shuf: (coreutils)shuf invocation. Shuffling text files. +* sleep: (coreutils)sleep invocation. Delay for a specified time. +* sort: (coreutils)sort invocation. Sort text files. +* split: (coreutils)split invocation. Split into pieces. +* stat: (coreutils)stat invocation. Report file(system) status. +* stdbuf: (coreutils)stdbuf invocation. Modify stdio buffering. +* stty: (coreutils)stty invocation. Print/change terminal settings. +* sum: (coreutils)sum invocation. Print traditional checksum. +* sync: (coreutils)sync invocation. Sync files to stable storage. +* tac: (coreutils)tac invocation. Reverse files. +* tail: (coreutils)tail invocation. Output the last part of files. +* tar: (tar)tar invocation. Invoking GNU ‘tar’. +* tee: (coreutils)tee invocation. Redirect to multiple files. +* test: (coreutils)test invocation. File/string tests. +* testsuite: (autoconf)testsuite Invocation. Running an Autotest test suite. +* timeout: (coreutils)timeout invocation. Run with time limit. +* touch: (coreutils)touch invocation. Change file timestamps. +* tr: (coreutils)tr invocation. Translate characters. +* true: (coreutils)true invocation. Do nothing, successfully. +* truncate: (coreutils)truncate invocation. Shrink/extend size of a file. +* tsort: (coreutils)tsort invocation. Topological sort. +* tty: (coreutils)tty invocation. Print terminal name. +* uname: (coreutils)uname invocation. Print system information. +* unexpand: (coreutils)unexpand invocation. Convert spaces to tabs. +* uniq: (coreutils)uniq invocation. Uniquify files. +* unlink: (coreutils)unlink invocation. Removal via unlink(2). +* updatedb: (find)Invoking updatedb. Building the locate database. +* uptime: (coreutils)uptime invocation. Print uptime and load. +* users: (coreutils)users invocation. Print current user names. +* vdir: (coreutils)vdir invocation. List directories verbosely. +* wc: (coreutils)wc invocation. Line, word, and byte counts. +* who: (coreutils)who invocation. Print who is logged in. +* whoami: (coreutils)whoami invocation. Print effective user ID. +* xargs: (find)Invoking xargs. Operating on many files. +* yes: (coreutils)yes invocation. Print a string indefinitely. +* zcat: (gzip)Overview. Decompression to stdout. +* zdiff: (gzip)Overview. Compare compressed files. +* zforce: (gzip)Overview. Force .gz extension on files. +* zgrep: (gzip)Overview. Search compressed files. +* zmore: (gzip)Overview. Decompression output by pages. + +Kernel +* GRUB: (grub). The GRand Unified Bootloader +* grub-install: (grub)Invoking grub-install. + Install GRUB on your drive +* grub-mkconfig: (grub)Invoking grub-mkconfig. + Generate GRUB configuration +* grub-mkpasswd-pbkdf2: (grub)Invoking grub-mkpasswd-pbkdf2. +* grub-mkrelpath: (grub)Invoking grub-mkrelpath. +* grub-mkrescue: (grub)Invoking grub-mkrescue. + Make a GRUB rescue image +* grub-mount: (grub)Invoking grub-mount. + Mount a file system using GRUB +* grub-probe: (grub)Invoking grub-probe. + Probe device information +* grub-script-check: (grub)Invoking grub-script-check. + +Libraries +* History: (history). The GNU history library API. + +Math +* bc: (bc). An arbitrary precision calculator language. + +Miscellaneous + +Network applications +* Wget: (wget). Non-interactive network downloader. + +Software development +* Autoconf: (autoconf). Create source code configuration scripts. +* Autoconf Archive: (autoconf-archive). + A collection of reusable Autoconf macros. +* Automake: (automake). Making GNU standards-compliant Makefiles. +* Gdb: (gdb). The GNU debugger. +* Make: (make). Remake files automatically. +* gdbserver: (gdb) Server. The GNU debugging server. +* sbcl: (sbcl). The Steel Bank Common Lisp compiler + +System administration +* Stow: (stow). GNU Stow. +* parted: (parted). GNU partitioning software + +TeX +* LaTeX2e: (latex2e). Unofficial LaTeX reference manual. + +Texinfo documentation system +* Info: (info). How to use the documentation browsing system. +* Texinfo: (texinfo). The GNU documentation format. +* install-info: (texinfo)Invoking install-info. + Update info/dir entries. +* makeinfo: (texinfo)Invoking texi2any. + Translate Texinfo source. +* pdftexi2dvi: (texinfo)PDF Output. + PDF output for Texinfo. +* pod2texi: (pod2texi)Invoking pod2texi. + Translate Perl Pod to Texinfo. +* texi2any: (texinfo)Invoking texi2any. + Translate Texinfo source. +* texi2dvi: (texinfo)Format with texi2dvi. + Print Texinfo documents. +* texi2pdf: (texinfo)PDF Output. + PDF output for Texinfo. +* texi2any customization: (texi2any_api). + Customizing texi2any output +* texi2any modules: (texi2any_internals). + Texinfo Perl modules used in texi2any +* texindex: (texinfo)Format with tex/texindex. + Sort Texinfo index files. + +Text creation and manipulation +* Diffutils: (diffutils). Comparing and merging files. +* Gawk: (gawk). A text scanning and processing language. +* grep: (grep). Print lines that match patterns. +* sed: (sed). Stream EDitor. + + diff --git a/assets/info/dired-x.info.gz b/assets/info/dired-x.info.gz new file mode 100644 index 00000000..43a79a4f Binary files /dev/null and b/assets/info/dired-x.info.gz differ diff --git a/assets/info/ebrowse.info.gz b/assets/info/ebrowse.info.gz new file mode 100644 index 00000000..fa8896f1 Binary files /dev/null and b/assets/info/ebrowse.info.gz differ diff --git a/assets/info/ede.info.gz b/assets/info/ede.info.gz new file mode 100644 index 00000000..99e73fa7 Binary files /dev/null and b/assets/info/ede.info.gz differ diff --git a/assets/info/ediff.info.gz b/assets/info/ediff.info.gz new file mode 100644 index 00000000..8cade04b Binary files /dev/null and b/assets/info/ediff.info.gz differ diff --git a/assets/info/edt.info.gz b/assets/info/edt.info.gz new file mode 100644 index 00000000..a3bc3a2b Binary files /dev/null and b/assets/info/edt.info.gz differ diff --git a/assets/info/efaq.info.gz b/assets/info/efaq.info.gz new file mode 100644 index 00000000..f77e7ba2 Binary files /dev/null and b/assets/info/efaq.info.gz differ diff --git a/assets/info/eglot.info.gz b/assets/info/eglot.info.gz new file mode 100644 index 00000000..22e0f342 Binary files /dev/null and b/assets/info/eglot.info.gz differ diff --git a/assets/info/eieio.info.gz b/assets/info/eieio.info.gz new file mode 100644 index 00000000..f7f5b365 Binary files /dev/null and b/assets/info/eieio.info.gz differ diff --git a/assets/info/eintr.info.gz b/assets/info/eintr.info.gz new file mode 100644 index 00000000..6c85e99f Binary files /dev/null and b/assets/info/eintr.info.gz differ diff --git a/assets/info/elisp.info.gz b/assets/info/elisp.info.gz new file mode 100644 index 00000000..6eaa732c Binary files /dev/null and b/assets/info/elisp.info.gz differ diff --git a/assets/info/emacs-gnutls.info.gz b/assets/info/emacs-gnutls.info.gz new file mode 100644 index 00000000..71cfbcd6 Binary files /dev/null and b/assets/info/emacs-gnutls.info.gz differ diff --git a/assets/info/emacs-mime.info.gz b/assets/info/emacs-mime.info.gz new file mode 100644 index 00000000..c254357c Binary files /dev/null and b/assets/info/emacs-mime.info.gz differ diff --git a/assets/info/emacs.info.gz b/assets/info/emacs.info.gz new file mode 100644 index 00000000..e264ed44 Binary files /dev/null and b/assets/info/emacs.info.gz differ diff --git a/assets/info/epa.info.gz b/assets/info/epa.info.gz new file mode 100644 index 00000000..ad0c6fc1 Binary files /dev/null and b/assets/info/epa.info.gz differ diff --git a/assets/info/erc.info.gz b/assets/info/erc.info.gz new file mode 100644 index 00000000..2d9912c8 Binary files /dev/null and b/assets/info/erc.info.gz differ diff --git a/assets/info/ert.info.gz b/assets/info/ert.info.gz new file mode 100644 index 00000000..bc0a40e9 Binary files /dev/null and b/assets/info/ert.info.gz differ diff --git a/assets/info/eshell.info.gz b/assets/info/eshell.info.gz new file mode 100644 index 00000000..cd156a67 Binary files /dev/null and b/assets/info/eshell.info.gz differ diff --git a/assets/info/eudc.info.gz b/assets/info/eudc.info.gz new file mode 100644 index 00000000..b1ddef52 Binary files /dev/null and b/assets/info/eudc.info.gz differ diff --git a/assets/info/eww.info.gz b/assets/info/eww.info.gz new file mode 100644 index 00000000..1deb780c Binary files /dev/null and b/assets/info/eww.info.gz differ diff --git a/assets/info/find.info.gz b/assets/info/find.info.gz new file mode 100644 index 00000000..1135affb Binary files /dev/null and b/assets/info/find.info.gz differ diff --git a/assets/info/flymake.info.gz b/assets/info/flymake.info.gz new file mode 100644 index 00000000..2e777ab3 Binary files /dev/null and b/assets/info/flymake.info.gz differ diff --git a/assets/info/forms.info.gz b/assets/info/forms.info.gz new file mode 100644 index 00000000..16a3270e Binary files /dev/null and b/assets/info/forms.info.gz differ diff --git a/assets/info/gawk.info.gz b/assets/info/gawk.info.gz new file mode 100644 index 00000000..98e8bc83 Binary files /dev/null and b/assets/info/gawk.info.gz differ diff --git a/assets/info/gdb.info-1.gz b/assets/info/gdb.info-1.gz new file mode 100644 index 00000000..49d01b26 Binary files /dev/null and b/assets/info/gdb.info-1.gz differ diff --git a/assets/info/gdb.info-2.gz b/assets/info/gdb.info-2.gz new file mode 100644 index 00000000..088a4df3 Binary files /dev/null and b/assets/info/gdb.info-2.gz differ diff --git a/assets/info/gdb.info-3.gz b/assets/info/gdb.info-3.gz new file mode 100644 index 00000000..f4c00869 Binary files /dev/null and b/assets/info/gdb.info-3.gz differ diff --git a/assets/info/gdb.info-4.gz b/assets/info/gdb.info-4.gz new file mode 100644 index 00000000..57899f98 Binary files /dev/null and b/assets/info/gdb.info-4.gz differ diff --git a/assets/info/gdb.info-5.gz b/assets/info/gdb.info-5.gz new file mode 100644 index 00000000..cfb55ad7 Binary files /dev/null and b/assets/info/gdb.info-5.gz differ diff --git a/assets/info/gdb.info-6.gz b/assets/info/gdb.info-6.gz new file mode 100644 index 00000000..e1b30b3d Binary files /dev/null and b/assets/info/gdb.info-6.gz differ diff --git a/assets/info/gdb.info-7.gz b/assets/info/gdb.info-7.gz new file mode 100644 index 00000000..698402f3 Binary files /dev/null and b/assets/info/gdb.info-7.gz differ diff --git a/assets/info/gdb.info-8.gz b/assets/info/gdb.info-8.gz new file mode 100644 index 00000000..5cfa4d53 Binary files /dev/null and b/assets/info/gdb.info-8.gz differ diff --git a/assets/info/gdb.info-9.gz b/assets/info/gdb.info-9.gz new file mode 100644 index 00000000..af4a4ab7 Binary files /dev/null and b/assets/info/gdb.info-9.gz differ diff --git a/assets/info/gdb.info.gz b/assets/info/gdb.info.gz new file mode 100644 index 00000000..4354dabd Binary files /dev/null and b/assets/info/gdb.info.gz differ diff --git a/assets/info/git.info b/assets/info/git.info new file mode 100644 index 00000000..6c91b18a --- /dev/null +++ b/assets/info/git.info @@ -0,0 +1,5681 @@ +This is git.info, produced by makeinfo version 6.5 from +user-manual.texi. + +INFO-DIR-SECTION Development +START-INFO-DIR-ENTRY +* Git: (git). A fast distributed revision control system +END-INFO-DIR-ENTRY + + +File: git.info, Node: Top, Next: idm4, Up: (dir) + +Git User Manual +*************** + +* Menu: + +* : idm4. +* Repositories and Branches:: +* Exploring Git history:: +* Developing with Git:: +* Sharing development with others:: +* Rewriting history and maintaining patch series:: +* Advanced branch management:: +* Git concepts:: +* Submodules:: +* Low-level Git operations:: +* Hacking Git:: +* Git Glossary:: +* Git Quick Reference:: +* Notes and todo list for this manual:: + +— The Detailed Node Listing — + +Repositories and Branches + +* How to get a Git repository:: +* How to check out a different version of a project:: +* Understanding History; Commits:: +* Manipulating branches:: +* Examining an old version without creating a new branch:: +* Examining branches from a remote repository:: +* Naming branches, tags, and other references: Naming branches; tags; and other references. +* Updating a repository with git fetch:: +* Fetching branches from other repositories:: + +Exploring Git history + +* How to use bisect to find a regression:: +* Naming commits:: +* Creating tags:: +* Browsing revisions:: +* Generating diffs:: +* Viewing old file versions:: +* Examples:: + +Developing with Git + +* Telling Git your name:: +* Creating a new repository:: +* How to make a commit:: +* Creating good commit messages:: +* Ignoring files:: +* How to merge:: +* Resolving a merge:: +* Undoing a merge:: +* Fast-forward merges:: +* Fixing mistakes:: +* Ensuring good performance:: +* Ensuring reliability:: + +Sharing development with others + +* Getting updates with git pull:: +* Submitting patches to a project:: +* Importing patches to a project:: +* Public Git repositories:: +* How to get a Git repository with minimal history:: +* Examples: Examples <1>. + +Rewriting history and maintaining patch series + +* Creating the perfect patch series:: +* Keeping a patch series up to date using git rebase:: +* Rewriting a single commit:: +* Reordering or selecting from a patch series:: +* Using interactive rebases:: +* Other tools:: +* Problems with rewriting history:: +* Why bisecting merge commits can be harder than bisecting linear history:: + +Advanced branch management + +* Fetching individual branches:: +* git fetch and fast-forwards:: +* Forcing git fetch to do non-fast-forward updates:: +* Configuring remote-tracking branches:: + +Git concepts + +* The Object Database:: +* The index:: + +Submodules + +* Pitfalls with submodules:: + +Low-level Git operations + +* Object access and manipulation:: +* The Workflow:: +* Examining the data:: +* Merging multiple trees:: +* Merging multiple trees, continued: Merging multiple trees; continued. + +Hacking Git + +* Object storage format:: +* A birds-eye view of Git’s source code:: + +Git Glossary + +* Git explained:: + +Git Quick Reference + +* Creating a new repository: Creating a new repository <1>. +* Managing branches:: +* Exploring history:: +* Making changes:: +* Merging:: +* Sharing your changes:: +* Repository maintenance:: + +Notes and todo list for this manual + +* Todo list:: + + + +File: git.info, Node: idm4, Next: Repositories and Branches, Prev: Top, Up: Top + +Git is a fast distributed revision control system. + + This manual is designed to be readable by someone with basic UNIX +command-line skills, but no previous knowledge of Git. + + *note Repositories and Branches:: and *note Exploring Git history:: +explain how to fetch and study a project using git—read these chapters +to learn how to build and test a particular version of a software +project, search for regressions, and so on. + + People needing to do actual development will also want to read *note +Developing with Git:: and *note Sharing development with others::. + + Further chapters cover more specialized topics. + + Comprehensive reference documentation is available through the man +pages, or git-help(1) (git-help.html) command. For example, for the +command ‘git clone ’, you can either use: + + $ man git-clone + + or: + + $ git help clone + + With the latter, you can use the manual viewer of your choice; see +git-help(1) (git-help.html) for more information. + + See also *note Git Quick Reference:: for a brief overview of Git +commands, without any explanation. + + Finally, see *note Notes and todo list for this manual:: for ways +that you can help make this manual more complete. + + +File: git.info, Node: Repositories and Branches, Next: Exploring Git history, Prev: idm4, Up: Top + +1 Repositories and Branches +*************************** + +* Menu: + +* How to get a Git repository:: +* How to check out a different version of a project:: +* Understanding History; Commits:: +* Manipulating branches:: +* Examining an old version without creating a new branch:: +* Examining branches from a remote repository:: +* Naming branches, tags, and other references: Naming branches; tags; and other references. +* Updating a repository with git fetch:: +* Fetching branches from other repositories:: + + +File: git.info, Node: How to get a Git repository, Next: How to check out a different version of a project, Up: Repositories and Branches + +1.1 How to get a Git repository +=============================== + +It will be useful to have a Git repository to experiment with as you +read this manual. + + The best way to get one is by using the git-clone(1) (git-clone.html) +command to download a copy of an existing repository. If you don’t +already have a project in mind, here are some interesting examples: + + # Git itself (approx. 40MB download): + $ git clone git://git.kernel.org/pub/scm/git/git.git + # the Linux kernel (approx. 640MB download): + $ git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + + The initial clone may be time-consuming for a large project, but you +will only need to clone once. + + The clone command creates a new directory named after the project +(‘git’ or ‘linux’ in the examples above). After you cd into this +directory, you will see that it contains a copy of the project files, +called the working tree (*note [def_working_tree]::), together with a +special top-level directory named ‘.git’, which contains all the +information about the history of the project. + + +File: git.info, Node: How to check out a different version of a project, Next: Understanding History; Commits, Prev: How to get a Git repository, Up: Repositories and Branches + +1.2 How to check out a different version of a project +===================================================== + +Git is best thought of as a tool for storing the history of a collection +of files. It stores the history as a compressed collection of +interrelated snapshots of the project’s contents. In Git each such +version is called a commit (*note [def_commit]::). + + Those snapshots aren’t necessarily all arranged in a single line from +oldest to newest; instead, work may simultaneously proceed along +parallel lines of development, called branches (*note [def_branch]::), +which may merge and diverge. + + A single Git repository can track development on multiple branches. +It does this by keeping a list of heads (*note [def_head]::) which +reference the latest commit on each branch; the git-branch(1) +(git-branch.html) command shows you the list of branch heads: + + $ git branch + * master + + A freshly cloned repository contains a single branch head, by default +named "master", with the working directory initialized to the state of +the project referred to by that branch head. + + Most projects also use tags (*note [def_tag]::). Tags, like heads, +are references into the project’s history, and can be listed using the +git-tag(1) (git-tag.html) command: + + $ git tag -l + v2.6.11 + v2.6.11-tree + v2.6.12 + v2.6.12-rc2 + v2.6.12-rc3 + v2.6.12-rc4 + v2.6.12-rc5 + v2.6.12-rc6 + v2.6.13 + ... + + Tags are expected to always point at the same version of a project, +while heads are expected to advance as development progresses. + + Create a new branch head pointing to one of these versions and check +it out using git-switch(1) (git-switch.html): + + $ git switch -c new v2.6.13 + + The working directory then reflects the contents that the project had +when it was tagged v2.6.13, and git-branch(1) (git-branch.html) shows +two branches, with an asterisk marking the currently checked-out branch: + + $ git branch + master + * new + + If you decide that you’d rather see version 2.6.17, you can modify +the current branch to point at v2.6.17 instead, with + + $ git reset --hard v2.6.17 + + Note that if the current branch head was your only reference to a +particular point in history, then resetting that branch may leave you +with no way to find the history it used to point to; so use this command +carefully. + + +File: git.info, Node: Understanding History; Commits, Next: Manipulating branches, Prev: How to check out a different version of a project, Up: Repositories and Branches + +1.3 Understanding History: Commits +================================== + +Every change in the history of a project is represented by a commit. +The git-show(1) (git-show.html) command shows the most recent commit on +the current branch: + + $ git show + commit 17cf781661e6d38f737f15f53ab552f1e95960d7 + Author: Linus Torvalds + Date: Tue Apr 19 14:11:06 2005 -0700 + + Remove duplicate getenv(DB_ENVIRONMENT) call + + Noted by Tony Luck. + + diff --git a/init-db.c b/init-db.c + index 65898fa..b002dc6 100644 + --- a/init-db.c + +++ b/init-db.c + @@ -7,7 +7,7 @@ + + int main(int argc, char **argv) + { + - char *sha1_dir = getenv(DB_ENVIRONMENT), *path; + + char *sha1_dir, *path; + int len, i; + + if (mkdir(".git", 0755) < 0) { + + As you can see, a commit shows who made the latest change, what they +did, and why. + + Every commit has a 40-hexdigit id, sometimes called the "object name" +or the "SHA-1 id", shown on the first line of the ‘git show’ output. +You can usually refer to a commit by a shorter name, such as a tag or a +branch name, but this longer name can also be useful. Most importantly, +it is a globally unique name for this commit: so if you tell somebody +else the object name (for example in email), then you are guaranteed +that name will refer to the same commit in their repository that it does +in yours (assuming their repository has that commit at all). Since the +object name is computed as a hash over the contents of the commit, you +are guaranteed that the commit can never change without its name also +changing. + + In fact, in *note Git concepts:: we shall see that everything stored +in Git history, including file data and directory contents, is stored in +an object with a name that is a hash of its contents. + +* Menu: + +* Understanding history; commits, parents, and reachability: Understanding history; commits; parents; and reachability. +* Understanding history; History diagrams:: +* Understanding history; What is a branch?:: + + +File: git.info, Node: Understanding history; commits; parents; and reachability, Next: Understanding history; History diagrams, Up: Understanding History; Commits + +1.3.1 Understanding history: commits, parents, and reachability +--------------------------------------------------------------- + +Every commit (except the very first commit in a project) also has a +parent commit which shows what happened before this commit. Following +the chain of parents will eventually take you back to the beginning of +the project. + + However, the commits do not form a simple list; Git allows lines of +development to diverge and then reconverge, and the point where two +lines of development reconverge is called a "merge". The commit +representing a merge can therefore have more than one parent, with each +parent representing the most recent commit on one of the lines of +development leading to that point. + + The best way to see how this works is using the gitk(1) (gitk.html) +command; running gitk now on a Git repository and looking for merge +commits will help understand how Git organizes history. + + In the following, we say that commit X is "reachable" from commit Y +if commit X is an ancestor of commit Y. Equivalently, you could say that +Y is a descendant of X, or that there is a chain of parents leading from +commit Y to commit X. + + +File: git.info, Node: Understanding history; History diagrams, Next: Understanding history; What is a branch?, Prev: Understanding history; commits; parents; and reachability, Up: Understanding History; Commits + +1.3.2 Understanding history: History diagrams +--------------------------------------------- + +We will sometimes represent Git history using diagrams like the one +below. Commits are shown as "o", and the links between them with lines +drawn with - / and \. Time goes left to right: + + o--o--o <-- Branch A + / + o--o--o <-- master + \ + o--o--o <-- Branch B + + If we need to talk about a particular commit, the character "o" may +be replaced with another letter or number. + + +File: git.info, Node: Understanding history; What is a branch?, Prev: Understanding history; History diagrams, Up: Understanding History; Commits + +1.3.3 Understanding history: What is a branch? +---------------------------------------------- + +When we need to be precise, we will use the word "branch" to mean a line +of development, and "branch head" (or just "head") to mean a reference +to the most recent commit on a branch. In the example above, the branch +head named "A" is a pointer to one particular commit, but we refer to +the line of three commits leading up to that point as all being part of +"branch A". + + However, when no confusion will result, we often just use the term +"branch" both for branches and for branch heads. + + +File: git.info, Node: Manipulating branches, Next: Examining an old version without creating a new branch, Prev: Understanding History; Commits, Up: Repositories and Branches + +1.4 Manipulating branches +========================= + +Creating, deleting, and modifying branches is quick and easy; here’s a +summary of the commands: + +‘git branch’ + list all branches. + +‘git branch ’ + create a new branch named ‘’, referencing the same point in + history as the current branch. + +‘git branch ’ + create a new branch named ‘’, referencing ‘’, + which may be specified any way you like, including using a branch + name or a tag name. + +‘git branch -d ’ + delete the branch ‘’; if the branch is not fully merged in + its upstream branch or contained in the current branch, this + command will fail with a warning. + +‘git branch -D ’ + delete the branch ‘’ irrespective of its merged status. + +‘git switch ’ + make the current branch ‘’, updating the working directory + to reflect the version referenced by ‘’. + +‘git switch -c ’ + create a new branch ‘’ referencing ‘’, and check + it out. + + The special symbol "HEAD" can always be used to refer to the current +branch. In fact, Git uses a file named ‘HEAD’ in the ‘.git’ directory +to remember which branch is current: + + $ cat .git/HEAD + ref: refs/heads/master + + +File: git.info, Node: Examining an old version without creating a new branch, Next: Examining branches from a remote repository, Prev: Manipulating branches, Up: Repositories and Branches + +1.5 Examining an old version without creating a new branch +========================================================== + +The ‘git switch’ command normally expects a branch head, but will also +accept an arbitrary commit when invoked with –detach; for example, you +can check out the commit referenced by a tag: + + $ git switch --detach v2.6.17 + Note: checking out 'v2.6.17'. + + You are in 'detached HEAD' state. You can look around, make experimental + changes and commit them, and you can discard any commits you make in this + state without impacting any branches by performing another switch. + + If you want to create a new branch to retain commits you create, you may + do so (now or later) by using -c with the switch command again. Example: + + git switch -c new_branch_name + + HEAD is now at 427abfa Linux v2.6.17 + + The HEAD then refers to the SHA-1 of the commit instead of to a +branch, and git branch shows that you are no longer on a branch: + + $ cat .git/HEAD + 427abfa28afedffadfca9dd8b067eb6d36bac53f + $ git branch + * (detached from v2.6.17) + master + + In this case we say that the HEAD is "detached". + + This is an easy way to check out a particular version without having +to make up a name for the new branch. You can still create a new branch +(or tag) for this version later if you decide to. + + +File: git.info, Node: Examining branches from a remote repository, Next: Naming branches; tags; and other references, Prev: Examining an old version without creating a new branch, Up: Repositories and Branches + +1.6 Examining branches from a remote repository +=============================================== + +The "master" branch that was created at the time you cloned is a copy of +the HEAD in the repository that you cloned from. That repository may +also have had other branches, though, and your local repository keeps +branches which track each of those remote branches, called +remote-tracking branches, which you can view using the ‘-r’ option to +git-branch(1) (git-branch.html): + + $ git branch -r + origin/HEAD + origin/html + origin/maint + origin/man + origin/master + origin/next + origin/seen + origin/todo + + In this example, "origin" is called a remote repository, or "remote" +for short. The branches of this repository are called "remote branches" +from our point of view. The remote-tracking branches listed above were +created based on the remote branches at clone time and will be updated +by ‘git fetch’ (hence ‘git pull’) and ‘git push’. See *note Updating a +repository with git fetch:: for details. + + You might want to build on one of these remote-tracking branches on a +branch of your own, just as you would for a tag: + + $ git switch -c my-todo-copy origin/todo + + You can also check out ‘origin/todo’ directly to examine it or write +a one-off patch. See detached head (*note Examining an old version +without creating a new branch::). + + Note that the name "origin" is just the name that Git uses by default +to refer to the repository that you cloned from. + + +File: git.info, Node: Naming branches; tags; and other references, Next: Updating a repository with git fetch, Prev: Examining branches from a remote repository, Up: Repositories and Branches + +1.7 Naming branches, tags, and other references +=============================================== + +Branches, remote-tracking branches, and tags are all references to +commits. All references are named with a slash-separated path name +starting with ‘refs’; the names we’ve been using so far are actually +shorthand: + + • The branch ‘test’ is short for ‘refs/heads/test’. + + • The tag ‘v2.6.18’ is short for ‘refs/tags/v2.6.18’. + + • ‘origin/master’ is short for ‘refs/remotes/origin/master’. + + The full name is occasionally useful if, for example, there ever +exists a tag and a branch with the same name. + + (Newly created refs are actually stored in the ‘.git/refs’ directory, +under the path given by their name. However, for efficiency reasons +they may also be packed together in a single file; see git-pack-refs(1) +(git-pack-refs.html)). + + As another useful shortcut, the "HEAD" of a repository can be +referred to just using the name of that repository. So, for example, +"origin" is usually a shortcut for the HEAD branch in the repository +"origin". + + For the complete list of paths which Git checks for references, and +the order it uses to decide which to choose when there are multiple +references with the same shorthand name, see the "SPECIFYING REVISIONS" +section of gitrevisions(7) (gitrevisions.html). + + +File: git.info, Node: Updating a repository with git fetch, Next: Fetching branches from other repositories, Prev: Naming branches; tags; and other references, Up: Repositories and Branches + +1.8 Updating a repository with git fetch +======================================== + +After you clone a repository and commit a few changes of your own, you +may wish to check the original repository for updates. + + The ‘git-fetch’ command, with no arguments, will update all of the +remote-tracking branches to the latest version found in the original +repository. It will not touch any of your own branches—not even the +"master" branch that was created for you on clone. + + +File: git.info, Node: Fetching branches from other repositories, Prev: Updating a repository with git fetch, Up: Repositories and Branches + +1.9 Fetching branches from other repositories +============================================= + +You can also track branches from repositories other than the one you +cloned from, using git-remote(1) (git-remote.html): + + $ git remote add staging git://git.kernel.org/.../gregkh/staging.git + $ git fetch staging + ... + From git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging + * [new branch] master -> staging/master + * [new branch] staging-linus -> staging/staging-linus + * [new branch] staging-next -> staging/staging-next + + New remote-tracking branches will be stored under the shorthand name +that you gave ‘git remote add’, in this case ‘staging’: + + $ git branch -r + origin/HEAD -> origin/master + origin/master + staging/master + staging/staging-linus + staging/staging-next + + If you run ‘git fetch ’ later, the remote-tracking branches +for the named ‘’ will be updated. + + If you examine the file ‘.git/config’, you will see that Git has +added a new stanza: + + $ cat .git/config + ... + [remote "staging"] + url = git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git + fetch = +refs/heads/*:refs/remotes/staging/* + ... + + This is what causes Git to track the remote’s branches; you may +modify or delete these configuration options by editing ‘.git/config’ +with a text editor. (See the "CONFIGURATION FILE" section of +git-config(1) (git-config.html) for details.) + + +File: git.info, Node: Exploring Git history, Next: Developing with Git, Prev: Repositories and Branches, Up: Top + +2 Exploring Git history +*********************** + +Git is best thought of as a tool for storing the history of a collection +of files. It does this by storing compressed snapshots of the contents +of a file hierarchy, together with "commits" which show the +relationships between these snapshots. + + Git provides extremely flexible and fast tools for exploring the +history of a project. + + We start with one specialized tool that is useful for finding the +commit that introduced a bug into a project. + +* Menu: + +* How to use bisect to find a regression:: +* Naming commits:: +* Creating tags:: +* Browsing revisions:: +* Generating diffs:: +* Viewing old file versions:: +* Examples:: + + +File: git.info, Node: How to use bisect to find a regression, Next: Naming commits, Up: Exploring Git history + +2.1 How to use bisect to find a regression +========================================== + +Suppose version 2.6.18 of your project worked, but the version at +"master" crashes. Sometimes the best way to find the cause of such a +regression is to perform a brute-force search through the project’s +history to find the particular commit that caused the problem. The +git-bisect(1) (git-bisect.html) command can help you do this: + + $ git bisect start + $ git bisect good v2.6.18 + $ git bisect bad master + Bisecting: 3537 revisions left to test after this + [65934a9a028b88e83e2b0f8b36618fe503349f8e] BLOCK: Make USB storage depend on SCSI rather than selecting it [try #6] + + If you run ‘git branch’ at this point, you’ll see that Git has +temporarily moved you in "(no branch)". HEAD is now detached from any +branch and points directly to a commit (with commit id 65934) that is +reachable from "master" but not from v2.6.18. Compile and test it, and +see whether it crashes. Assume it does crash. Then: + + $ git bisect bad + Bisecting: 1769 revisions left to test after this + [7eff82c8b1511017ae605f0c99ac275a7e21b867] i2c-core: Drop useless bitmaskings + + checks out an older version. Continue like this, telling Git at each +stage whether the version it gives you is good or bad, and notice that +the number of revisions left to test is cut approximately in half each +time. + + After about 13 tests (in this case), it will output the commit id of +the guilty commit. You can then examine the commit with git-show(1) +(git-show.html), find out who wrote it, and mail them your bug report +with the commit id. Finally, run + + $ git bisect reset + + to return you to the branch you were on before. + + Note that the version which ‘git bisect’ checks out for you at each +point is just a suggestion, and you’re free to try a different version +if you think it would be a good idea. For example, occasionally you may +land on a commit that broke something unrelated; run + + $ git bisect visualize + + which will run gitk and label the commit it chose with a marker that +says "bisect". Choose a safe-looking commit nearby, note its commit id, +and check it out with: + + $ git reset --hard fb47ddb2db + + then test, run ‘bisect good’ or ‘bisect bad’ as appropriate, and +continue. + + Instead of ‘git bisect visualize’ and then ‘git reset --hard +fb47ddb2db’, you might just want to tell Git that you want to skip the +current commit: + + $ git bisect skip + + In this case, though, Git may not eventually be able to tell the +first bad one between some first skipped commits and a later bad commit. + + There are also ways to automate the bisecting process if you have a +test script that can tell a good from a bad commit. See git-bisect(1) +(git-bisect.html) for more information about this and other ‘git bisect’ +features. + + +File: git.info, Node: Naming commits, Next: Creating tags, Prev: How to use bisect to find a regression, Up: Exploring Git history + +2.2 Naming commits +================== + +We have seen several ways of naming commits already: + + • 40-hexdigit object name + + • branch name: refers to the commit at the head of the given branch + + • tag name: refers to the commit pointed to by the given tag (we’ve + seen branches and tags are special cases of references (*note + Naming branches; tags; and other references::)). + + • HEAD: refers to the head of the current branch + + There are many more; see the "SPECIFYING REVISIONS" section of the +gitrevisions(7) (gitrevisions.html) man page for the complete list of +ways to name revisions. Some examples: + + $ git show fb47ddb2 # the first few characters of the object name + # are usually enough to specify it uniquely + $ git show HEAD^ # the parent of the HEAD commit + $ git show HEAD^^ # the grandparent + $ git show HEAD~4 # the great-great-grandparent + + Recall that merge commits may have more than one parent; by default, +‘^’ and ‘~’ follow the first parent listed in the commit, but you can +also choose: + + $ git show HEAD^1 # show the first parent of HEAD + $ git show HEAD^2 # show the second parent of HEAD + + In addition to HEAD, there are several other special names for +commits: + + Merges (to be discussed later), as well as operations such as ‘git +reset’, which change the currently checked-out commit, generally set +ORIG_HEAD to the value HEAD had before the current operation. + + The ‘git fetch’ operation always stores the head of the last fetched +branch in FETCH_HEAD. For example, if you run ‘git fetch’ without +specifying a local branch as the target of the operation + + $ git fetch git://example.com/proj.git theirbranch + + the fetched commits will still be available from FETCH_HEAD. + + When we discuss merges we’ll also see the special name MERGE_HEAD, +which refers to the other branch that we’re merging in to the current +branch. + + The git-rev-parse(1) (git-rev-parse.html) command is a low-level +command that is occasionally useful for translating some name for a +commit to the object name for that commit: + + $ git rev-parse origin + e05db0fd4f31dde7005f075a84f96b360d05984b + + +File: git.info, Node: Creating tags, Next: Browsing revisions, Prev: Naming commits, Up: Exploring Git history + +2.3 Creating tags +================= + +We can also create a tag to refer to a particular commit; after running + + $ git tag stable-1 1b2e1d63ff + + You can use ‘stable-1’ to refer to the commit 1b2e1d63ff. + + This creates a "lightweight" tag. If you would also like to include +a comment with the tag, and possibly sign it cryptographically, then you +should create a tag object instead; see the git-tag(1) (git-tag.html) +man page for details. + + +File: git.info, Node: Browsing revisions, Next: Generating diffs, Prev: Creating tags, Up: Exploring Git history + +2.4 Browsing revisions +====================== + +The git-log(1) (git-log.html) command can show lists of commits. On its +own, it shows all commits reachable from the parent commit; but you can +also make more specific requests: + + $ git log v2.5.. # commits since (not reachable from) v2.5 + $ git log test..master # commits reachable from master but not test + $ git log master..test # ...reachable from test but not master + $ git log master...test # ...reachable from either test or master, + # but not both + $ git log --since="2 weeks ago" # commits from the last 2 weeks + $ git log Makefile # commits which modify Makefile + $ git log fs/ # ... which modify any file under fs/ + $ git log -S'foo()' # commits which add or remove any file data + # matching the string 'foo()' + + And of course you can combine all of these; the following finds +commits since v2.5 which touch the ‘Makefile’ or any file under ‘fs’: + + $ git log v2.5.. Makefile fs/ + + You can also ask git log to show patches: + + $ git log -p + + See the ‘--pretty’ option in the git-log(1) (git-log.html) man page +for more display options. + + Note that git log starts with the most recent commit and works +backwards through the parents; however, since Git history can contain +multiple independent lines of development, the particular order that +commits are listed in may be somewhat arbitrary. + + +File: git.info, Node: Generating diffs, Next: Viewing old file versions, Prev: Browsing revisions, Up: Exploring Git history + +2.5 Generating diffs +==================== + +You can generate diffs between any two versions using git-diff(1) +(git-diff.html): + + $ git diff master..test + + That will produce the diff between the tips of the two branches. If +you’d prefer to find the diff from their common ancestor to test, you +can use three dots instead of two: + + $ git diff master...test + + Sometimes what you want instead is a set of patches; for this you can +use git-format-patch(1) (git-format-patch.html): + + $ git format-patch master..test + + will generate a file with a patch for each commit reachable from test +but not from master. + + +File: git.info, Node: Viewing old file versions, Next: Examples, Prev: Generating diffs, Up: Exploring Git history + +2.6 Viewing old file versions +============================= + +You can always view an old version of a file by just checking out the +correct revision first. But sometimes it is more convenient to be able +to view an old version of a single file without checking anything out; +this command does that: + + $ git show v2.5:fs/locks.c + + Before the colon may be anything that names a commit, and after it +may be any path to a file tracked by Git. + + +File: git.info, Node: Examples, Prev: Viewing old file versions, Up: Exploring Git history + +2.7 Examples +============ + +* Menu: + +* Counting the number of commits on a branch:: +* Check whether two branches point at the same history:: +* Find first tagged version including a given fix:: +* Showing commits unique to a given branch:: +* Creating a changelog and tarball for a software release:: +* Finding commits referencing a file with given content:: + + +File: git.info, Node: Counting the number of commits on a branch, Next: Check whether two branches point at the same history, Up: Examples + +2.7.1 Counting the number of commits on a branch +------------------------------------------------ + +Suppose you want to know how many commits you’ve made on ‘mybranch’ +since it diverged from ‘origin’: + + $ git log --pretty=oneline origin..mybranch | wc -l + + Alternatively, you may often see this sort of thing done with the +lower-level command git-rev-list(1) (git-rev-list.html), which just +lists the SHA-1’s of all the given commits: + + $ git rev-list origin..mybranch | wc -l + + +File: git.info, Node: Check whether two branches point at the same history, Next: Find first tagged version including a given fix, Prev: Counting the number of commits on a branch, Up: Examples + +2.7.2 Check whether two branches point at the same history +---------------------------------------------------------- + +Suppose you want to check whether two branches point at the same point +in history. + + $ git diff origin..master + + will tell you whether the contents of the project are the same at the +two branches; in theory, however, it’s possible that the same project +contents could have been arrived at by two different historical routes. +You could compare the object names: + + $ git rev-list origin + e05db0fd4f31dde7005f075a84f96b360d05984b + $ git rev-list master + e05db0fd4f31dde7005f075a84f96b360d05984b + + Or you could recall that the ‘...’ operator selects all commits +reachable from either one reference or the other but not both; so + + $ git log origin...master + + will return no commits when the two branches are equal. + + +File: git.info, Node: Find first tagged version including a given fix, Next: Showing commits unique to a given branch, Prev: Check whether two branches point at the same history, Up: Examples + +2.7.3 Find first tagged version including a given fix +----------------------------------------------------- + +Suppose you know that the commit e05db0fd fixed a certain problem. +You’d like to find the earliest tagged release that contains that fix. + + Of course, there may be more than one answer—if the history branched +after commit e05db0fd, then there could be multiple "earliest" tagged +releases. + + You could just visually inspect the commits since e05db0fd: + + $ gitk e05db0fd.. + + or you can use git-name-rev(1) (git-name-rev.html), which will give +the commit a name based on any tag it finds pointing to one of the +commit’s descendants: + + $ git name-rev --tags e05db0fd + e05db0fd tags/v1.5.0-rc1^0~23 + + The git-describe(1) (git-describe.html) command does the opposite, +naming the revision using a tag on which the given commit is based: + + $ git describe e05db0fd + v1.5.0-rc0-260-ge05db0f + + but that may sometimes help you guess which tags might come after the +given commit. + + If you just want to verify whether a given tagged version contains a +given commit, you could use git-merge-base(1) (git-merge-base.html): + + $ git merge-base e05db0fd v1.5.0-rc1 + e05db0fd4f31dde7005f075a84f96b360d05984b + + The merge-base command finds a common ancestor of the given commits, +and always returns one or the other in the case where one is a +descendant of the other; so the above output shows that e05db0fd +actually is an ancestor of v1.5.0-rc1. + + Alternatively, note that + + $ git log v1.5.0-rc1..e05db0fd + + will produce empty output if and only if v1.5.0-rc1 includes +e05db0fd, because it outputs only commits that are not reachable from +v1.5.0-rc1. + + As yet another alternative, the git-show-branch(1) +(git-show-branch.html) command lists the commits reachable from its +arguments with a display on the left-hand side that indicates which +arguments that commit is reachable from. So, if you run something like + + $ git show-branch e05db0fd v1.5.0-rc0 v1.5.0-rc1 v1.5.0-rc2 + ! [e05db0fd] Fix warnings in sha1_file.c - use C99 printf format if + available + ! [v1.5.0-rc0] GIT v1.5.0 preview + ! [v1.5.0-rc1] GIT v1.5.0-rc1 + ! [v1.5.0-rc2] GIT v1.5.0-rc2 + ... + + then a line like + + + ++ [e05db0fd] Fix warnings in sha1_file.c - use C99 printf format if + available + + shows that e05db0fd is reachable from itself, from v1.5.0-rc1, and +from v1.5.0-rc2, and not from v1.5.0-rc0. + + +File: git.info, Node: Showing commits unique to a given branch, Next: Creating a changelog and tarball for a software release, Prev: Find first tagged version including a given fix, Up: Examples + +2.7.4 Showing commits unique to a given branch +---------------------------------------------- + +Suppose you would like to see all the commits reachable from the branch +head named ‘master’ but not from any other head in your repository. + + We can list all the heads in this repository with git-show-ref(1) +(git-show-ref.html): + + $ git show-ref --heads + bf62196b5e363d73353a9dcf094c59595f3153b7 refs/heads/core-tutorial + db768d5504c1bb46f63ee9d6e1772bd047e05bf9 refs/heads/maint + a07157ac624b2524a059a3414e99f6f44bebc1e7 refs/heads/master + 24dbc180ea14dc1aebe09f14c8ecf32010690627 refs/heads/tutorial-2 + 1e87486ae06626c2f31eaa63d26fc0fd646c8af2 refs/heads/tutorial-fixes + + We can get just the branch-head names, and remove ‘master’, with the +help of the standard utilities cut and grep: + + $ git show-ref --heads | cut -d' ' -f2 | grep -v '^refs/heads/master' + refs/heads/core-tutorial + refs/heads/maint + refs/heads/tutorial-2 + refs/heads/tutorial-fixes + + And then we can ask to see all the commits reachable from master but +not from these other heads: + + $ gitk master --not $( git show-ref --heads | cut -d' ' -f2 | + grep -v '^refs/heads/master' ) + + Obviously, endless variations are possible; for example, to see all +commits reachable from some head but not from any tag in the repository: + + $ gitk $( git show-ref --heads ) --not $( git show-ref --tags ) + + (See gitrevisions(7) (gitrevisions.html) for explanations of +commit-selecting syntax such as ‘--not’.) + + +File: git.info, Node: Creating a changelog and tarball for a software release, Next: Finding commits referencing a file with given content, Prev: Showing commits unique to a given branch, Up: Examples + +2.7.5 Creating a changelog and tarball for a software release +------------------------------------------------------------- + +The git-archive(1) (git-archive.html) command can create a tar or zip +archive from any version of a project; for example: + + $ git archive -o latest.tar.gz --prefix=project/ HEAD + + will use HEAD to produce a gzipped tar archive in which each filename +is preceded by ‘project/’. The output file format is inferred from the +output file extension if possible, see git-archive(1) (git-archive.html) +for details. + + Versions of Git older than 1.7.7 don’t know about the ‘tar.gz’ +format, you’ll need to use gzip explicitly: + + $ git archive --format=tar --prefix=project/ HEAD | gzip >latest.tar.gz + + If you’re releasing a new version of a software project, you may want +to simultaneously make a changelog to include in the release +announcement. + + Linus Torvalds, for example, makes new kernel releases by tagging +them, then running: + + $ release-script 2.6.12 2.6.13-rc6 2.6.13-rc7 + + where release-script is a shell script that looks like: + + #!/bin/sh + stable="$1" + last="$2" + new="$3" + echo "# git tag v$new" + echo "git archive --prefix=linux-$new/ v$new | gzip -9 > ../linux-$new.tar.gz" + echo "git diff v$stable v$new | gzip -9 > ../patch-$new.gz" + echo "git log --no-merges v$new ^v$last > ../ChangeLog-$new" + echo "git shortlog --no-merges v$new ^v$last > ../ShortLog" + echo "git diff --stat --summary -M v$last v$new > ../diffstat-$new" + + and then he just cut-and-pastes the output commands after verifying +that they look OK. + + +File: git.info, Node: Finding commits referencing a file with given content, Prev: Creating a changelog and tarball for a software release, Up: Examples + +2.7.6 Finding commits referencing a file with given content +----------------------------------------------------------- + +Somebody hands you a copy of a file, and asks which commits modified a +file such that it contained the given content either before or after the +commit. You can find out with this: + + $ git log --raw --abbrev=40 --pretty=oneline | + grep -B 1 `git hash-object filename` + + Figuring out why this works is left as an exercise to the (advanced) +student. The git-log(1) (git-log.html), git-diff-tree(1) +(git-diff-tree.html), and git-hash-object(1) (git-hash-object.html) man +pages may prove helpful. + + +File: git.info, Node: Developing with Git, Next: Sharing development with others, Prev: Exploring Git history, Up: Top + +3 Developing with Git +********************* + +* Menu: + +* Telling Git your name:: +* Creating a new repository:: +* How to make a commit:: +* Creating good commit messages:: +* Ignoring files:: +* How to merge:: +* Resolving a merge:: +* Undoing a merge:: +* Fast-forward merges:: +* Fixing mistakes:: +* Ensuring good performance:: +* Ensuring reliability:: + + +File: git.info, Node: Telling Git your name, Next: Creating a new repository, Up: Developing with Git + +3.1 Telling Git your name +========================= + +Before creating any commits, you should introduce yourself to Git. The +easiest way to do so is to use git-config(1) (git-config.html): + + $ git config --global user.name 'Your Name Comes Here' + $ git config --global user.email 'you@yourdomain.example.com' + + Which will add the following to a file named ‘.gitconfig’ in your +home directory: + + [user] + name = Your Name Comes Here + email = you@yourdomain.example.com + + See the "CONFIGURATION FILE" section of git-config(1) +(git-config.html) for details on the configuration file. The file is +plain text, so you can also edit it with your favorite editor. + + +File: git.info, Node: Creating a new repository, Next: How to make a commit, Prev: Telling Git your name, Up: Developing with Git + +3.2 Creating a new repository +============================= + +Creating a new repository from scratch is very easy: + + $ mkdir project + $ cd project + $ git init + + If you have some initial content (say, a tarball): + + $ tar xzvf project.tar.gz + $ cd project + $ git init + $ git add . # include everything below ./ in the first commit: + $ git commit + + +File: git.info, Node: How to make a commit, Next: Creating good commit messages, Prev: Creating a new repository, Up: Developing with Git + +3.3 How to make a commit +======================== + +Creating a new commit takes three steps: + + 1. Making some changes to the working directory using your favorite + editor. + + 2. Telling Git about your changes. + + 3. Creating the commit using the content you told Git about in step 2. + + In practice, you can interleave and repeat steps 1 and 2 as many +times as you want: in order to keep track of what you want committed at +step 3, Git maintains a snapshot of the tree’s contents in a special +staging area called "the index." + + At the beginning, the content of the index will be identical to that +of the HEAD. The command ‘git diff --cached’, which shows the difference +between the HEAD and the index, should therefore produce no output at +that point. + + Modifying the index is easy: + + To update the index with the contents of a new or modified file, use + + $ git add path/to/file + + To remove a file from the index and from the working tree, use + + $ git rm path/to/file + + After each step you can verify that + + $ git diff --cached + + always shows the difference between the HEAD and the index file—this +is what you’d commit if you created the commit now—and that + + $ git diff + + shows the difference between the working tree and the index file. + + Note that ‘git add’ always adds just the current contents of a file +to the index; further changes to the same file will be ignored unless +you run ‘git add’ on the file again. + + When you’re ready, just run + + $ git commit + + and Git will prompt you for a commit message and then create the new +commit. Check to make sure it looks like what you expected with + + $ git show + + As a special shortcut, + + $ git commit -a + + will update the index with any files that you’ve modified or removed +and create a commit, all in one step. + + A number of commands are useful for keeping track of what you’re +about to commit: + + $ git diff --cached # difference between HEAD and the index; what + # would be committed if you ran "commit" now. + $ git diff # difference between the index file and your + # working directory; changes that would not + # be included if you ran "commit" now. + $ git diff HEAD # difference between HEAD and working tree; what + # would be committed if you ran "commit -a" now. + $ git status # a brief per-file summary of the above. + + You can also use git-gui(1) (git-gui.html) to create commits, view +changes in the index and the working tree files, and individually select +diff hunks for inclusion in the index (by right-clicking on the diff +hunk and choosing "Stage Hunk For Commit"). + + +File: git.info, Node: Creating good commit messages, Next: Ignoring files, Prev: How to make a commit, Up: Developing with Git + +3.4 Creating good commit messages +================================= + +Though not required, it’s a good idea to begin the commit message with a +single short (less than 50 character) line summarizing the change, +followed by a blank line and then a more thorough description. The text +up to the first blank line in a commit message is treated as the commit +title, and that title is used throughout Git. For example, +git-format-patch(1) (git-format-patch.html) turns a commit into email, +and it uses the title on the Subject line and the rest of the commit in +the body. + + +File: git.info, Node: Ignoring files, Next: How to merge, Prev: Creating good commit messages, Up: Developing with Git + +3.5 Ignoring files +================== + +A project will often generate files that you do _not_ want to track with +Git. This typically includes files generated by a build process or +temporary backup files made by your editor. Of course, _not_ tracking +files with Git is just a matter of _not_ calling ‘git add’ on them. But +it quickly becomes annoying to have these untracked files lying around; +e.g. they make ‘git add .’ practically useless, and they keep showing +up in the output of ‘git status’. + + You can tell Git to ignore certain files by creating a file called +‘.gitignore’ in the top level of your working directory, with contents +such as: + + # Lines starting with '#' are considered comments. + # Ignore any file named foo.txt. + foo.txt + # Ignore (generated) html files, + *.html + # except foo.html which is maintained by hand. + !foo.html + # Ignore objects and archives. + *.[oa] + + See gitignore(5) (gitignore.html) for a detailed explanation of the +syntax. You can also place .gitignore files in other directories in +your working tree, and they will apply to those directories and their +subdirectories. The ‘.gitignore’ files can be added to your repository +like any other files (just run ‘git add .gitignore’ and ‘git commit’, as +usual), which is convenient when the exclude patterns (such as patterns +matching build output files) would also make sense for other users who +clone your repository. + + If you wish the exclude patterns to affect only certain repositories +(instead of every repository for a given project), you may instead put +them in a file in your repository named ‘.git/info/exclude’, or in any +file specified by the ‘core.excludesFile’ configuration variable. Some +Git commands can also take exclude patterns directly on the command +line. See gitignore(5) (gitignore.html) for the details. + + +File: git.info, Node: How to merge, Next: Resolving a merge, Prev: Ignoring files, Up: Developing with Git + +3.6 How to merge +================ + +You can rejoin two diverging branches of development using git-merge(1) +(git-merge.html): + + $ git merge branchname + + merges the development in the branch ‘branchname’ into the current +branch. + + A merge is made by combining the changes made in ‘branchname’ and the +changes made up to the latest commit in your current branch since their +histories forked. The work tree is overwritten by the result of the +merge when this combining is done cleanly, or overwritten by a +half-merged results when this combining results in conflicts. +Therefore, if you have uncommitted changes touching the same files as +the ones impacted by the merge, Git will refuse to proceed. Most of the +time, you will want to commit your changes before you can merge, and if +you don’t, then git-stash(1) (git-stash.html) can take these changes +away while you’re doing the merge, and reapply them afterwards. + + If the changes are independent enough, Git will automatically +complete the merge and commit the result (or reuse an existing commit in +case of fast-forward (*note Fast-forward merges::), see below). On the +other hand, if there are conflicts—for example, if the same file is +modified in two different ways in the remote branch and the local +branch—then you are warned; the output may look something like this: + + $ git merge next + 100% (4/4) done + Auto-merged file.txt + CONFLICT (content): Merge conflict in file.txt + Automatic merge failed; fix conflicts and then commit the result. + + Conflict markers are left in the problematic files, and after you +resolve the conflicts manually, you can update the index with the +contents and run Git commit, as you normally would when creating a new +file. + + If you examine the resulting commit using gitk, you will see that it +has two parents, one pointing to the top of the current branch, and one +to the top of the other branch. + + +File: git.info, Node: Resolving a merge, Next: Undoing a merge, Prev: How to merge, Up: Developing with Git + +3.7 Resolving a merge +===================== + +When a merge isn’t resolved automatically, Git leaves the index and the +working tree in a special state that gives you all the information you +need to help resolve the merge. + + Files with conflicts are marked specially in the index, so until you +resolve the problem and update the index, git-commit(1) +(git-commit.html) will fail: + + $ git commit + file.txt: needs merge + + Also, git-status(1) (git-status.html) will list those files as +"unmerged", and the files with conflicts will have conflict markers +added, like this: + + <<<<<<< HEAD:file.txt + Hello world + ======= + Goodbye + >>>>>>> 77976da35a11db4580b80ae27e8d65caf5208086:file.txt + + All you need to do is edit the files to resolve the conflicts, and +then + + $ git add file.txt + $ git commit + + Note that the commit message will already be filled in for you with +some information about the merge. Normally you can just use this +default message unchanged, but you may add additional commentary of your +own if desired. + + The above is all you need to know to resolve a simple merge. But Git +also provides more information to help resolve conflicts: + +* Menu: + +* Getting conflict-resolution help during a merge:: + + +File: git.info, Node: Getting conflict-resolution help during a merge, Up: Resolving a merge + +3.7.1 Getting conflict-resolution help during a merge +----------------------------------------------------- + +All of the changes that Git was able to merge automatically are already +added to the index file, so git-diff(1) (git-diff.html) shows only the +conflicts. It uses an unusual syntax: + + $ git diff + diff --cc file.txt + index 802992c,2b60207..0000000 + --- a/file.txt + +++ b/file.txt + @@@ -1,1 -1,1 +1,5 @@@ + ++<<<<<<< HEAD:file.txt + +Hello world + ++======= + + Goodbye + ++>>>>>>> 77976da35a11db4580b80ae27e8d65caf5208086:file.txt + + Recall that the commit which will be committed after we resolve this +conflict will have two parents instead of the usual one: one parent will +be HEAD, the tip of the current branch; the other will be the tip of the +other branch, which is stored temporarily in MERGE_HEAD. + + During the merge, the index holds three versions of each file. Each +of these three "file stages" represents a different version of the file: + + $ git show :1:file.txt # the file in a common ancestor of both branches + $ git show :2:file.txt # the version from HEAD. + $ git show :3:file.txt # the version from MERGE_HEAD. + + When you ask git-diff(1) (git-diff.html) to show the conflicts, it +runs a three-way diff between the conflicted merge results in the work +tree with stages 2 and 3 to show only hunks whose contents come from +both sides, mixed (in other words, when a hunk’s merge results come only +from stage 2, that part is not conflicting and is not shown. Same for +stage 3). + + The diff above shows the differences between the working-tree version +of file.txt and the stage 2 and stage 3 versions. So instead of +preceding each line by a single ‘+’ or ‘-’, it now uses two columns: the +first column is used for differences between the first parent and the +working directory copy, and the second for differences between the +second parent and the working directory copy. (See the "COMBINED DIFF +FORMAT" section of git-diff-files(1) (git-diff-files.html) for a details +of the format.) + + After resolving the conflict in the obvious way (but before updating +the index), the diff will look like: + + $ git diff + diff --cc file.txt + index 802992c,2b60207..0000000 + --- a/file.txt + +++ b/file.txt + @@@ -1,1 -1,1 +1,1 @@@ + - Hello world + -Goodbye + ++Goodbye world + + This shows that our resolved version deleted "Hello world" from the +first parent, deleted "Goodbye" from the second parent, and added +"Goodbye world", which was previously absent from both. + + Some special diff options allow diffing the working directory against +any of these stages: + + $ git diff -1 file.txt # diff against stage 1 + $ git diff --base file.txt # same as the above + $ git diff -2 file.txt # diff against stage 2 + $ git diff --ours file.txt # same as the above + $ git diff -3 file.txt # diff against stage 3 + $ git diff --theirs file.txt # same as the above. + + The git-log(1) (git-log.html) and gitk(1) (gitk.html) commands also +provide special help for merges: + + $ git log --merge + $ gitk --merge + + These will display all commits which exist only on HEAD or on +MERGE_HEAD, and which touch an unmerged file. + + You may also use git-mergetool(1) (git-mergetool.html), which lets +you merge the unmerged files using external tools such as Emacs or +kdiff3. + + Each time you resolve the conflicts in a file and update the index: + + $ git add file.txt + + the different stages of that file will be "collapsed", after which +‘git diff’ will (by default) no longer show diffs for that file. + + +File: git.info, Node: Undoing a merge, Next: Fast-forward merges, Prev: Resolving a merge, Up: Developing with Git + +3.8 Undoing a merge +=================== + +If you get stuck and decide to just give up and throw the whole mess +away, you can always return to the pre-merge state with + + $ git merge --abort + + Or, if you’ve already committed the merge that you want to throw +away, + + $ git reset --hard ORIG_HEAD + + However, this last command can be dangerous in some cases—never throw +away a commit you have already committed if that commit may itself have +been merged into another branch, as doing so may confuse further merges. + + +File: git.info, Node: Fast-forward merges, Next: Fixing mistakes, Prev: Undoing a merge, Up: Developing with Git + +3.9 Fast-forward merges +======================= + +There is one special case not mentioned above, which is treated +differently. Normally, a merge results in a merge commit, with two +parents, one pointing at each of the two lines of development that were +merged. + + However, if the current branch is an ancestor of the other—so every +commit present in the current branch is already contained in the other +branch—then Git just performs a "fast-forward"; the head of the current +branch is moved forward to point at the head of the merged-in branch, +without any new commits being created. + + +File: git.info, Node: Fixing mistakes, Next: Ensuring good performance, Prev: Fast-forward merges, Up: Developing with Git + +3.10 Fixing mistakes +==================== + +If you’ve messed up the working tree, but haven’t yet committed your +mistake, you can return the entire working tree to the last committed +state with + + $ git restore --staged --worktree :/ + + If you make a commit that you later wish you hadn’t, there are two +fundamentally different ways to fix the problem: + + 1. You can create a new commit that undoes whatever was done by the + old commit. This is the correct thing if your mistake has already + been made public. + + 2. You can go back and modify the old commit. You should never do + this if you have already made the history public; Git does not + normally expect the "history" of a project to change, and cannot + correctly perform repeated merges from a branch that has had its + history changed. + +* Menu: + +* Fixing a mistake with a new commit:: +* Fixing a mistake by rewriting history:: +* Checking out an old version of a file:: +* Temporarily setting aside work in progress:: + + +File: git.info, Node: Fixing a mistake with a new commit, Next: Fixing a mistake by rewriting history, Up: Fixing mistakes + +3.10.1 Fixing a mistake with a new commit +----------------------------------------- + +Creating a new commit that reverts an earlier change is very easy; just +pass the git-revert(1) (git-revert.html) command a reference to the bad +commit; for example, to revert the most recent commit: + + $ git revert HEAD + + This will create a new commit which undoes the change in HEAD. You +will be given a chance to edit the commit message for the new commit. + + You can also revert an earlier change, for example, the next-to-last: + + $ git revert HEAD^ + + In this case Git will attempt to undo the old change while leaving +intact any changes made since then. If more recent changes overlap with +the changes to be reverted, then you will be asked to fix conflicts +manually, just as in the case of resolving a merge (*note Resolving a +merge::). + + +File: git.info, Node: Fixing a mistake by rewriting history, Next: Checking out an old version of a file, Prev: Fixing a mistake with a new commit, Up: Fixing mistakes + +3.10.2 Fixing a mistake by rewriting history +-------------------------------------------- + +If the problematic commit is the most recent commit, and you have not +yet made that commit public, then you may just destroy it using ‘git +reset’ (*note Undoing a merge::). + + Alternatively, you can edit the working directory and update the +index to fix your mistake, just as if you were going to create a new +commit (*note How to make a commit::), then run + + $ git commit --amend + + which will replace the old commit by a new commit incorporating your +changes, giving you a chance to edit the old commit message first. + + Again, you should never do this to a commit that may already have +been merged into another branch; use git-revert(1) (git-revert.html) +instead in that case. + + It is also possible to replace commits further back in the history, +but this is an advanced topic to be left for another chapter (*note +Rewriting history and maintaining patch series::). + + +File: git.info, Node: Checking out an old version of a file, Next: Temporarily setting aside work in progress, Prev: Fixing a mistake by rewriting history, Up: Fixing mistakes + +3.10.3 Checking out an old version of a file +-------------------------------------------- + +In the process of undoing a previous bad change, you may find it useful +to check out an older version of a particular file using git-restore(1) +(git-restore.html). The command + + $ git restore --source=HEAD^ path/to/file + + replaces path/to/file by the contents it had in the commit HEAD^, and +also updates the index to match. It does not change branches. + + If you just want to look at an old version of the file, without +modifying the working directory, you can do that with git-show(1) +(git-show.html): + + $ git show HEAD^:path/to/file + + which will display the given version of the file. + + +File: git.info, Node: Temporarily setting aside work in progress, Prev: Checking out an old version of a file, Up: Fixing mistakes + +3.10.4 Temporarily setting aside work in progress +------------------------------------------------- + +While you are in the middle of working on something complicated, you +find an unrelated but obvious and trivial bug. You would like to fix it +before continuing. You can use git-stash(1) (git-stash.html) to save +the current state of your work, and after fixing the bug (or, optionally +after doing so on a different branch and then coming back), unstash the +work-in-progress changes. + + $ git stash push -m "work in progress for foo feature" + + This command will save your changes away to the ‘stash’, and reset +your working tree and the index to match the tip of your current branch. +Then you can make your fix as usual. + + ... edit and test ... + $ git commit -a -m "blorpl: typofix" + + After that, you can go back to what you were working on with ‘git +stash pop’: + + $ git stash pop + + +File: git.info, Node: Ensuring good performance, Next: Ensuring reliability, Prev: Fixing mistakes, Up: Developing with Git + +3.11 Ensuring good performance +============================== + +On large repositories, Git depends on compression to keep the history +information from taking up too much space on disk or in memory. Some +Git commands may automatically run git-gc(1) (git-gc.html), so you don’t +have to worry about running it manually. However, compressing a large +repository may take a while, so you may want to call ‘gc’ explicitly to +avoid automatic compression kicking in when it is not convenient. + + +File: git.info, Node: Ensuring reliability, Prev: Ensuring good performance, Up: Developing with Git + +3.12 Ensuring reliability +========================= + +* Menu: + +* Checking the repository for corruption:: +* Recovering lost changes:: + + +File: git.info, Node: Checking the repository for corruption, Next: Recovering lost changes, Up: Ensuring reliability + +3.12.1 Checking the repository for corruption +--------------------------------------------- + +The git-fsck(1) (git-fsck.html) command runs a number of +self-consistency checks on the repository, and reports on any problems. +This may take some time. + + $ git fsck + dangling commit 7281251ddd2a61e38657c827739c57015671a6b3 + dangling commit 2706a059f258c6b245f298dc4ff2ccd30ec21a63 + dangling commit 13472b7c4b80851a1bc551779171dcb03655e9b5 + dangling blob 218761f9d90712d37a9c5e36f406f92202db07eb + dangling commit bf093535a34a4d35731aa2bd90fe6b176302f14f + dangling commit 8e4bec7f2ddaa268bef999853c25755452100f8e + dangling tree d50bb86186bf27b681d25af89d3b5b68382e4085 + dangling tree b24c2473f1fd3d91352a624795be026d64c8841f + ... + + You will see informational messages on dangling objects. They are +objects that still exist in the repository but are no longer referenced +by any of your branches, and can (and will) be removed after a while +with ‘gc’. You can run ‘git fsck --no-dangling’ to suppress these +messages, and still view real errors. + + +File: git.info, Node: Recovering lost changes, Prev: Checking the repository for corruption, Up: Ensuring reliability + +3.12.2 Recovering lost changes +------------------------------ + +* Menu: + +* Reflogs:: +* Examining dangling objects:: + + +File: git.info, Node: Reflogs, Next: Examining dangling objects, Up: Recovering lost changes + +3.12.2.1 Reflogs +................ + +Say you modify a branch with ‘git reset --hard’ (*note Fixing +mistakes::), and then realize that the branch was the only reference you +had to that point in history. + + Fortunately, Git also keeps a log, called a "reflog", of all the +previous values of each branch. So in this case you can still find the +old history using, for example, + + $ git log master@{1} + + This lists the commits reachable from the previous version of the +‘master’ branch head. This syntax can be used with any Git command that +accepts a commit, not just with ‘git log’. Some other examples: + + $ git show master@{2} # See where the branch pointed 2, + $ git show master@{3} # 3, ... changes ago. + $ gitk master@{yesterday} # See where it pointed yesterday, + $ gitk master@{"1 week ago"} # ... or last week + $ git log --walk-reflogs master # show reflog entries for master + + A separate reflog is kept for the HEAD, so + + $ git show HEAD@{"1 week ago"} + + will show what HEAD pointed to one week ago, not what the current +branch pointed to one week ago. This allows you to see the history of +what you’ve checked out. + + The reflogs are kept by default for 30 days, after which they may be +pruned. See git-reflog(1) (git-reflog.html) and git-gc(1) (git-gc.html) +to learn how to control this pruning, and see the "SPECIFYING REVISIONS" +section of gitrevisions(7) (gitrevisions.html) for details. + + Note that the reflog history is very different from normal Git +history. While normal history is shared by every repository that works +on the same project, the reflog history is not shared: it tells you only +about how the branches in your local repository have changed over time. + + +File: git.info, Node: Examining dangling objects, Prev: Reflogs, Up: Recovering lost changes + +3.12.2.2 Examining dangling objects +................................... + +In some situations the reflog may not be able to save you. For example, +suppose you delete a branch, then realize you need the history it +contained. The reflog is also deleted; however, if you have not yet +pruned the repository, then you may still be able to find the lost +commits in the dangling objects that ‘git fsck’ reports. See *note +Dangling objects:: for the details. + + $ git fsck + dangling commit 7281251ddd2a61e38657c827739c57015671a6b3 + dangling commit 2706a059f258c6b245f298dc4ff2ccd30ec21a63 + dangling commit 13472b7c4b80851a1bc551779171dcb03655e9b5 + ... + + You can examine one of those dangling commits with, for example, + + $ gitk 7281251ddd --not --all + + which does what it sounds like: it says that you want to see the +commit history that is described by the dangling commit(s), but not the +history that is described by all your existing branches and tags. Thus +you get exactly the history reachable from that commit that is lost. +(And notice that it might not be just one commit: we only report the +"tip of the line" as being dangling, but there might be a whole deep and +complex commit history that was dropped.) + + If you decide you want the history back, you can always create a new +reference pointing to it, for example, a new branch: + + $ git branch recovered-branch 7281251ddd + + Other types of dangling objects (blobs and trees) are also possible, +and dangling objects can arise in other situations. + + +File: git.info, Node: Sharing development with others, Next: Rewriting history and maintaining patch series, Prev: Developing with Git, Up: Top + +4 Sharing development with others +********************************* + +* Menu: + +* Getting updates with git pull:: +* Submitting patches to a project:: +* Importing patches to a project:: +* Public Git repositories:: +* How to get a Git repository with minimal history:: +* Examples: Examples <1>. + + +File: git.info, Node: Getting updates with git pull, Next: Submitting patches to a project, Up: Sharing development with others + +4.1 Getting updates with git pull +================================= + +After you clone a repository and commit a few changes of your own, you +may wish to check the original repository for updates and merge them +into your own work. + + We have already seen how to keep remote-tracking branches up to date +(*note Updating a repository with git fetch::) with git-fetch(1) +(git-fetch.html), and how to merge two branches. So you can merge in +changes from the original repository’s master branch with: + + $ git fetch + $ git merge origin/master + + However, the git-pull(1) (git-pull.html) command provides a way to do +this in one step: + + $ git pull origin master + + In fact, if you have ‘master’ checked out, then this branch has been +configured by ‘git clone’ to get changes from the HEAD branch of the +origin repository. So often you can accomplish the above with just a +simple + + $ git pull + + This command will fetch changes from the remote branches to your +remote-tracking branches ‘origin/*’, and merge the default branch into +the current branch. + + More generally, a branch that is created from a remote-tracking +branch will pull by default from that branch. See the descriptions of +the ‘branch..remote’ and ‘branch..merge’ options in +git-config(1) (git-config.html), and the discussion of the ‘--track’ +option in git-checkout(1) (git-checkout.html), to learn how to control +these defaults. + + In addition to saving you keystrokes, ‘git pull’ also helps you by +producing a default commit message documenting the branch and repository +that you pulled from. + + (But note that no such commit will be created in the case of a +fast-forward (*note Fast-forward merges::); instead, your branch will +just be updated to point to the latest commit from the upstream branch.) + + The ‘git pull’ command can also be given ‘.’ as the "remote" +repository, in which case it just merges in a branch from the current +repository; so the commands + + $ git pull . branch + $ git merge branch + + are roughly equivalent. + + +File: git.info, Node: Submitting patches to a project, Next: Importing patches to a project, Prev: Getting updates with git pull, Up: Sharing development with others + +4.2 Submitting patches to a project +=================================== + +If you just have a few changes, the simplest way to submit them may just +be to send them as patches in email: + + First, use git-format-patch(1) (git-format-patch.html); for example: + + $ git format-patch origin + + will produce a numbered series of files in the current directory, one +for each patch in the current branch but not in ‘origin/HEAD’. + + ‘git format-patch’ can include an initial "cover letter". You can +insert commentary on individual patches after the three dash line which +‘format-patch’ places after the commit message but before the patch +itself. If you use ‘git notes’ to track your cover letter material, +‘git format-patch --notes’ will include the commit’s notes in a similar +manner. + + You can then import these into your mail client and send them by +hand. However, if you have a lot to send at once, you may prefer to use +the git-send-email(1) (git-send-email.html) script to automate the +process. Consult the mailing list for your project first to determine +their requirements for submitting patches. + + +File: git.info, Node: Importing patches to a project, Next: Public Git repositories, Prev: Submitting patches to a project, Up: Sharing development with others + +4.3 Importing patches to a project +================================== + +Git also provides a tool called git-am(1) (git-am.html) (am stands for +"apply mailbox"), for importing such an emailed series of patches. Just +save all of the patch-containing messages, in order, into a single +mailbox file, say ‘patches.mbox’, then run + + $ git am -3 patches.mbox + + Git will apply each patch in order; if any conflicts are found, it +will stop, and you can fix the conflicts as described in "Resolving a +merge (*note Resolving a merge::)". (The ‘-3’ option tells Git to +perform a merge; if you would prefer it just to abort and leave your +tree and index untouched, you may omit that option.) + + Once the index is updated with the results of the conflict +resolution, instead of creating a new commit, just run + + $ git am --continue + + and Git will create the commit for you and continue applying the +remaining patches from the mailbox. + + The final result will be a series of commits, one for each patch in +the original mailbox, with authorship and commit log message each taken +from the message containing each patch. + + +File: git.info, Node: Public Git repositories, Next: How to get a Git repository with minimal history, Prev: Importing patches to a project, Up: Sharing development with others + +4.4 Public Git repositories +=========================== + +Another way to submit changes to a project is to tell the maintainer of +that project to pull the changes from your repository using git-pull(1) +(git-pull.html). In the section "Getting updates with ‘git pull’ (*note +Getting updates with git pull::)" we described this as a way to get +updates from the "main" repository, but it works just as well in the +other direction. + + If you and the maintainer both have accounts on the same machine, +then you can just pull changes from each other’s repositories directly; +commands that accept repository URLs as arguments will also accept a +local directory name: + + $ git clone /path/to/repository + $ git pull /path/to/other/repository + + or an ssh URL: + + $ git clone ssh://yourhost/~you/repository + + For projects with few developers, or for synchronizing a few private +repositories, this may be all you need. + + However, the more common way to do this is to maintain a separate +public repository (usually on a different host) for others to pull +changes from. This is usually more convenient, and allows you to +cleanly separate private work in progress from publicly visible work. + + You will continue to do your day-to-day work in your personal +repository, but periodically "push" changes from your personal +repository into your public repository, allowing other developers to +pull from that repository. So the flow of changes, in a situation where +there is one other developer with a public repository, looks like this: + + you push + your personal repo ------------------> your public repo + ^ | + | | + | you pull | they pull + | | + | | + | they push V + their public repo <------------------- their repo + + We explain how to do this in the following sections. + +* Menu: + +* Setting up a public repository:: +* Exporting a Git repository via the Git protocol:: +* Exporting a git repository via HTTP:: +* Pushing changes to a public repository:: +* What to do when a push fails:: +* Setting up a shared repository:: +* Allowing web browsing of a repository:: + + +File: git.info, Node: Setting up a public repository, Next: Exporting a Git repository via the Git protocol, Up: Public Git repositories + +4.4.1 Setting up a public repository +------------------------------------ + +Assume your personal repository is in the directory ‘~/proj’. We first +create a new clone of the repository and tell ‘git daemon’ that it is +meant to be public: + + $ git clone --bare ~/proj proj.git + $ touch proj.git/git-daemon-export-ok + + The resulting directory proj.git contains a "bare" git repository—it +is just the contents of the ‘.git’ directory, without any files checked +out around it. + + Next, copy ‘proj.git’ to the server where you plan to host the public +repository. You can use scp, rsync, or whatever is most convenient. + + +File: git.info, Node: Exporting a Git repository via the Git protocol, Next: Exporting a git repository via HTTP, Prev: Setting up a public repository, Up: Public Git repositories + +4.4.2 Exporting a Git repository via the Git protocol +----------------------------------------------------- + +This is the preferred method. + + If someone else administers the server, they should tell you what +directory to put the repository in, and what ‘git://’ URL it will appear +at. You can then skip to the section "Pushing changes to a public +repository (*note Pushing changes to a public repository::)", below. + + Otherwise, all you need to do is start git-daemon(1) +(git-daemon.html); it will listen on port 9418. By default, it will +allow access to any directory that looks like a Git directory and +contains the magic file git-daemon-export-ok. Passing some directory +paths as ‘git daemon’ arguments will further restrict the exports to +those paths. + + You can also run ‘git daemon’ as an inetd service; see the +git-daemon(1) (git-daemon.html) man page for details. (See especially +the examples section.) + + +File: git.info, Node: Exporting a git repository via HTTP, Next: Pushing changes to a public repository, Prev: Exporting a Git repository via the Git protocol, Up: Public Git repositories + +4.4.3 Exporting a git repository via HTTP +----------------------------------------- + +The Git protocol gives better performance and reliability, but on a host +with a web server set up, HTTP exports may be simpler to set up. + + All you need to do is place the newly created bare Git repository in +a directory that is exported by the web server, and make some +adjustments to give web clients some extra information they need: + + $ mv proj.git /home/you/public_html/proj.git + $ cd proj.git + $ git --bare update-server-info + $ mv hooks/post-update.sample hooks/post-update + + (For an explanation of the last two lines, see +git-update-server-info(1) (git-update-server-info.html) and githooks(5) +(githooks.html).) + + Advertise the URL of ‘proj.git’. Anybody else should then be able to +clone or pull from that URL, for example with a command line like: + + $ git clone http://yourserver.com/~you/proj.git + + (See also setup-git-server-over-http +(howto/setup-git-server-over-http.html) for a slightly more +sophisticated setup using WebDAV which also allows pushing over HTTP.) + + +File: git.info, Node: Pushing changes to a public repository, Next: What to do when a push fails, Prev: Exporting a git repository via HTTP, Up: Public Git repositories + +4.4.4 Pushing changes to a public repository +-------------------------------------------- + +Note that the two techniques outlined above (exporting via http (*note +Exporting a git repository via HTTP::) or git (*note Exporting a Git +repository via the Git protocol::)) allow other maintainers to fetch +your latest changes, but they do not allow write access, which you will +need to update the public repository with the latest changes created in +your private repository. + + The simplest way to do this is using git-push(1) (git-push.html) and +ssh; to update the remote branch named ‘master’ with the latest state of +your branch named ‘master’, run + + $ git push ssh://yourserver.com/~you/proj.git master:master + + or just + + $ git push ssh://yourserver.com/~you/proj.git master + + As with ‘git fetch’, ‘git push’ will complain if this does not result +in a fast-forward (*note Fast-forward merges::); see the following +section for details on handling this case. + + Note that the target of a ‘push’ is normally a bare (*note +[def_bare_repository]::) repository. You can also push to a repository +that has a checked-out working tree, but a push to update the currently +checked-out branch is denied by default to prevent confusion. See the +description of the receive.denyCurrentBranch option in git-config(1) +(git-config.html) for details. + + As with ‘git fetch’, you may also set up configuration options to +save typing; so, for example: + + $ git remote add public-repo ssh://yourserver.com/~you/proj.git + + adds the following to ‘.git/config’: + + [remote "public-repo"] + url = yourserver.com:proj.git + fetch = +refs/heads/*:refs/remotes/example/* + + which lets you do the same push with just + + $ git push public-repo master + + See the explanations of the ‘remote..url’, +‘branch..remote’, and ‘remote..push’ options in +git-config(1) (git-config.html) for details. + + +File: git.info, Node: What to do when a push fails, Next: Setting up a shared repository, Prev: Pushing changes to a public repository, Up: Public Git repositories + +4.4.5 What to do when a push fails +---------------------------------- + +If a push would not result in a fast-forward (*note Fast-forward +merges::) of the remote branch, then it will fail with an error like: + + ! [rejected] master -> master (non-fast-forward) + error: failed to push some refs to '...' + hint: Updates were rejected because the tip of your current branch is behind + hint: its remote counterpart. Integrate the remote changes (e.g. + hint: 'git pull ...') before pushing again. + hint: See the 'Note about fast-forwards' in 'git push --help' for details. + + This can happen, for example, if you: + + • use ‘git reset --hard’ to remove already-published commits, or + + • use ‘git commit --amend’ to replace already-published commits (as + in *note Fixing a mistake by rewriting history::), or + + • use ‘git rebase’ to rebase any already-published commits (as in + *note Keeping a patch series up to date using git rebase::). + + You may force ‘git push’ to perform the update anyway by preceding +the branch name with a plus sign: + + $ git push ssh://yourserver.com/~you/proj.git +master + + Note the addition of the ‘+’ sign. Alternatively, you can use the +‘-f’ flag to force the remote update, as in: + + $ git push -f ssh://yourserver.com/~you/proj.git master + + Normally whenever a branch head in a public repository is modified, +it is modified to point to a descendant of the commit that it pointed to +before. By forcing a push in this situation, you break that convention. +(See *note Problems with rewriting history::.) + + Nevertheless, this is a common practice for people that need a simple +way to publish a work-in-progress patch series, and it is an acceptable +compromise as long as you warn other developers that this is how you +intend to manage the branch. + + It’s also possible for a push to fail in this way when other people +have the right to push to the same repository. In that case, the +correct solution is to retry the push after first updating your work: +either by a pull, or by a fetch followed by a rebase; see the next +section (*note Setting up a shared repository::) and gitcvs-migration(7) +(gitcvs-migration.html) for more. + + +File: git.info, Node: Setting up a shared repository, Next: Allowing web browsing of a repository, Prev: What to do when a push fails, Up: Public Git repositories + +4.4.6 Setting up a shared repository +------------------------------------ + +Another way to collaborate is by using a model similar to that commonly +used in CVS, where several developers with special rights all push to +and pull from a single shared repository. See gitcvs-migration(7) +(gitcvs-migration.html) for instructions on how to set this up. + + However, while there is nothing wrong with Git’s support for shared +repositories, this mode of operation is not generally recommended, +simply because the mode of collaboration that Git supports—by exchanging +patches and pulling from public repositories—has so many advantages over +the central shared repository: + + • Git’s ability to quickly import and merge patches allows a single + maintainer to process incoming changes even at very high rates. + And when that becomes too much, ‘git pull’ provides an easy way for + that maintainer to delegate this job to other maintainers while + still allowing optional review of incoming changes. + + • Since every developer’s repository has the same complete copy of + the project history, no repository is special, and it is trivial + for another developer to take over maintenance of a project, either + by mutual agreement, or because a maintainer becomes unresponsive + or difficult to work with. + + • The lack of a central group of "committers" means there is less + need for formal decisions about who is "in" and who is "out". + + +File: git.info, Node: Allowing web browsing of a repository, Prev: Setting up a shared repository, Up: Public Git repositories + +4.4.7 Allowing web browsing of a repository +------------------------------------------- + +The gitweb cgi script provides users an easy way to browse your +project’s revisions, file contents and logs without having to install +Git. Features like RSS/Atom feeds and blame/annotation details may +optionally be enabled. + + The git-instaweb(1) (git-instaweb.html) command provides a simple way +to start browsing the repository using gitweb. The default server when +using instaweb is lighttpd. + + See the file gitweb/INSTALL in the Git source tree and gitweb(1) +(gitweb.html) for instructions on details setting up a permanent +installation with a CGI or Perl capable server. + + +File: git.info, Node: How to get a Git repository with minimal history, Next: Examples <1>, Prev: Public Git repositories, Up: Sharing development with others + +4.5 How to get a Git repository with minimal history +==================================================== + +A shallow clone (*note [def_shallow_clone]::), with its truncated +history, is useful when one is interested only in recent history of a +project and getting full history from the upstream is expensive. + + A shallow clone (*note [def_shallow_clone]::) is created by +specifying the git-clone(1) (git-clone.html) ‘--depth’ switch. The +depth can later be changed with the git-fetch(1) (git-fetch.html) +‘--depth’ switch, or full history restored with ‘--unshallow’. + + Merging inside a shallow clone (*note [def_shallow_clone]::) will +work as long as a merge base is in the recent history. Otherwise, it +will be like merging unrelated histories and may have to result in huge +conflicts. This limitation may make such a repository unsuitable to be +used in merge based workflows. + + +File: git.info, Node: Examples <1>, Prev: How to get a Git repository with minimal history, Up: Sharing development with others + +4.6 Examples +============ + +* Menu: + +* Maintaining topic branches for a Linux subsystem maintainer:: + + +File: git.info, Node: Maintaining topic branches for a Linux subsystem maintainer, Up: Examples <1> + +4.6.1 Maintaining topic branches for a Linux subsystem maintainer +----------------------------------------------------------------- + +This describes how Tony Luck uses Git in his role as maintainer of the +IA64 architecture for the Linux kernel. + + He uses two public branches: + + • A "test" tree into which patches are initially placed so that they + can get some exposure when integrated with other ongoing + development. This tree is available to Andrew for pulling into -mm + whenever he wants. + + • A "release" tree into which tested patches are moved for final + sanity checking, and as a vehicle to send them upstream to Linus + (by sending him a "please pull" request.) + + He also uses a set of temporary branches ("topic branches"), each +containing a logical grouping of patches. + + To set this up, first create your work tree by cloning Linus’s public +tree: + + $ git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git work + $ cd work + + Linus’s tree will be stored in the remote-tracking branch named +origin/master, and can be updated using git-fetch(1) (git-fetch.html); +you can track other public trees using git-remote(1) (git-remote.html) +to set up a "remote" and git-fetch(1) (git-fetch.html) to keep them up +to date; see *note Repositories and Branches::. + + Now create the branches in which you are going to work; these start +out at the current tip of origin/master branch, and should be set up +(using the ‘--track’ option to git-branch(1) (git-branch.html)) to merge +changes in from Linus by default. + + $ git branch --track test origin/master + $ git branch --track release origin/master + + These can be easily kept up to date using git-pull(1) +(git-pull.html). + + $ git switch test && git pull + $ git switch release && git pull + + Important note! If you have any local changes in these branches, +then this merge will create a commit object in the history (with no +local changes Git will simply do a "fast-forward" merge). Many people +dislike the "noise" that this creates in the Linux history, so you +should avoid doing this capriciously in the ‘release’ branch, as these +noisy commits will become part of the permanent history when you ask +Linus to pull from the release branch. + + A few configuration variables (see git-config(1) (git-config.html)) +can make it easy to push both branches to your public tree. (See *note +Setting up a public repository::.) + + $ cat >> .git/config <&2 + exit 1 + ;; + esac + + ==== merge script ==== + # Merge a branch into either the test or release branch + + pname=$0 + + usage() + { + echo "usage: $pname branch test|release" 1>&2 + exit 1 + } + + git show-ref -q --verify -- refs/heads/"$1" || { + echo "Can't see branch <$1>" 1>&2 + usage + } + + case "$2" in + test|release) + if [ $(git log $2..$1 | wc -c) -eq 0 ] + then + echo $1 already merged into $2 1>&2 + exit 1 + fi + git checkout $2 && git pull . $1 + ;; + *) + usage + ;; + esac + + ==== status script ==== + # report on status of my ia64 Git tree + + gb=$(tput setab 2) + rb=$(tput setab 1) + restore=$(tput setab 9) + + if [ `git rev-list test..release | wc -c` -gt 0 ] + then + echo $rb Warning: commits in release that are not in test $restore + git log test..release + fi + + for branch in `git show-ref --heads | sed 's|^.*/||'` + do + if [ $branch = test -o $branch = release ] + then + continue + fi + + echo -n $gb ======= $branch ====== $restore " " + status= + for ref in test release origin/master + do + if [ `git rev-list $ref..$branch | wc -c` -gt 0 ] + then + status=$status${ref:0:1} + fi + done + case $status in + trl) + echo $rb Need to pull into test $restore + ;; + rl) + echo "In test" + ;; + l) + echo "Waiting for linus" + ;; + "") + echo $rb All done $restore + ;; + *) + echo $rb "<$status>" $restore + ;; + esac + git log origin/master..$branch | git shortlog + done + + +File: git.info, Node: Rewriting history and maintaining patch series, Next: Advanced branch management, Prev: Sharing development with others, Up: Top + +5 Rewriting history and maintaining patch series +************************************************ + +Normally commits are only added to a project, never taken away or +replaced. Git is designed with this assumption, and violating it will +cause Git’s merge machinery (for example) to do the wrong thing. + + However, there is a situation in which it can be useful to violate +this assumption. + +* Menu: + +* Creating the perfect patch series:: +* Keeping a patch series up to date using git rebase:: +* Rewriting a single commit:: +* Reordering or selecting from a patch series:: +* Using interactive rebases:: +* Other tools:: +* Problems with rewriting history:: +* Why bisecting merge commits can be harder than bisecting linear history:: + + +File: git.info, Node: Creating the perfect patch series, Next: Keeping a patch series up to date using git rebase, Up: Rewriting history and maintaining patch series + +5.1 Creating the perfect patch series +===================================== + +Suppose you are a contributor to a large project, and you want to add a +complicated feature, and to present it to the other developers in a way +that makes it easy for them to read your changes, verify that they are +correct, and understand why you made each change. + + If you present all of your changes as a single patch (or commit), +they may find that it is too much to digest all at once. + + If you present them with the entire history of your work, complete +with mistakes, corrections, and dead ends, they may be overwhelmed. + + So the ideal is usually to produce a series of patches such that: + + 1. Each patch can be applied in order. + + 2. Each patch includes a single logical change, together with a + message explaining the change. + + 3. No patch introduces a regression: after applying any initial part + of the series, the resulting project still compiles and works, and + has no bugs that it didn’t have before. + + 4. The complete series produces the same end result as your own + (probably much messier!) development process did. + + We will introduce some tools that can help you do this, explain how +to use them, and then explain some of the problems that can arise +because you are rewriting history. + + +File: git.info, Node: Keeping a patch series up to date using git rebase, Next: Rewriting a single commit, Prev: Creating the perfect patch series, Up: Rewriting history and maintaining patch series + +5.2 Keeping a patch series up to date using git rebase +====================================================== + +Suppose that you create a branch ‘mywork’ on a remote-tracking branch +‘origin’, and create some commits on top of it: + + $ git switch -c mywork origin + $ vi file.txt + $ git commit + $ vi otherfile.txt + $ git commit + ... + + You have performed no merges into mywork, so it is just a simple +linear sequence of patches on top of ‘origin’: + + o--o--O <-- origin + \ + a--b--c <-- mywork + + Some more interesting work has been done in the upstream project, and +‘origin’ has advanced: + + o--o--O--o--o--o <-- origin + \ + a--b--c <-- mywork + + At this point, you could use ‘pull’ to merge your changes back in; +the result would create a new merge commit, like this: + + o--o--O--o--o--o <-- origin + \ \ + a--b--c--m <-- mywork + + However, if you prefer to keep the history in mywork a simple series +of commits without any merges, you may instead choose to use +git-rebase(1) (git-rebase.html): + + $ git switch mywork + $ git rebase origin + + This will remove each of your commits from mywork, temporarily saving +them as patches (in a directory named ‘.git/rebase-apply’), update +mywork to point at the latest version of origin, then apply each of the +saved patches to the new mywork. The result will look like: + + o--o--O--o--o--o <-- origin + \ + a'--b'--c' <-- mywork + + In the process, it may discover conflicts. In that case it will stop +and allow you to fix the conflicts; after fixing conflicts, use ‘git +add’ to update the index with those contents, and then, instead of +running ‘git commit’, just run + + $ git rebase --continue + + and Git will continue applying the rest of the patches. + + At any point you may use the ‘--abort’ option to abort this process +and return mywork to the state it had before you started the rebase: + + $ git rebase --abort + + If you need to reorder or edit a number of commits in a branch, it +may be easier to use ‘git rebase -i’, which allows you to reorder and +squash commits, as well as marking them for individual editing during +the rebase. See *note Using interactive rebases:: for details, and +*note Reordering or selecting from a patch series:: for alternatives. + + +File: git.info, Node: Rewriting a single commit, Next: Reordering or selecting from a patch series, Prev: Keeping a patch series up to date using git rebase, Up: Rewriting history and maintaining patch series + +5.3 Rewriting a single commit +============================= + +We saw in *note Fixing a mistake by rewriting history:: that you can +replace the most recent commit using + + $ git commit --amend + + which will replace the old commit by a new commit incorporating your +changes, giving you a chance to edit the old commit message first. This +is useful for fixing typos in your last commit, or for adjusting the +patch contents of a poorly staged commit. + + If you need to amend commits from deeper in your history, you can use +interactive rebase’s ‘edit’ instruction (*note Using interactive +rebases::). + + +File: git.info, Node: Reordering or selecting from a patch series, Next: Using interactive rebases, Prev: Rewriting a single commit, Up: Rewriting history and maintaining patch series + +5.4 Reordering or selecting from a patch series +=============================================== + +Sometimes you want to edit a commit deeper in your history. One +approach is to use ‘git format-patch’ to create a series of patches and +then reset the state to before the patches: + + $ git format-patch origin + $ git reset --hard origin + + Then modify, reorder, or eliminate patches as needed before applying +them again with git-am(1) (git-am.html): + + $ git am *.patch + + +File: git.info, Node: Using interactive rebases, Next: Other tools, Prev: Reordering or selecting from a patch series, Up: Rewriting history and maintaining patch series + +5.5 Using interactive rebases +============================= + +You can also edit a patch series with an interactive rebase. This is +the same as reordering a patch series using ‘format-patch’ (*note +Reordering or selecting from a patch series::), so use whichever +interface you like best. + + Rebase your current HEAD on the last commit you want to retain as-is. +For example, if you want to reorder the last 5 commits, use: + + $ git rebase -i HEAD~5 + + This will open your editor with a list of steps to be taken to +perform your rebase. + + pick deadbee The oneline of this commit + pick fa1afe1 The oneline of the next commit + ... + + # Rebase c0ffeee..deadbee onto c0ffeee + # + # Commands: + # p, pick = use commit + # r, reword = use commit, but edit the commit message + # e, edit = use commit, but stop for amending + # s, squash = use commit, but meld into previous commit + # f, fixup = like "squash", but discard this commit's log message + # x, exec = run command (the rest of the line) using shell + # + # These lines can be re-ordered; they are executed from top to bottom. + # + # If you remove a line here THAT COMMIT WILL BE LOST. + # + # However, if you remove everything, the rebase will be aborted. + # + # Note that empty commits are commented out + + As explained in the comments, you can reorder commits, squash them +together, edit commit messages, etc. by editing the list. Once you are +satisfied, save the list and close your editor, and the rebase will +begin. + + The rebase will stop where ‘pick’ has been replaced with ‘edit’ or +when a step in the list fails to mechanically resolve conflicts and +needs your help. When you are done editing and/or resolving conflicts +you can continue with ‘git rebase --continue’. If you decide that +things are getting too hairy, you can always bail out with ‘git rebase +--abort’. Even after the rebase is complete, you can still recover the +original branch by using the reflog (*note Reflogs::). + + For a more detailed discussion of the procedure and additional tips, +see the "INTERACTIVE MODE" section of git-rebase(1) (git-rebase.html). + + +File: git.info, Node: Other tools, Next: Problems with rewriting history, Prev: Using interactive rebases, Up: Rewriting history and maintaining patch series + +5.6 Other tools +=============== + +There are numerous other tools, such as StGit, which exist for the +purpose of maintaining a patch series. These are outside of the scope +of this manual. + + +File: git.info, Node: Problems with rewriting history, Next: Why bisecting merge commits can be harder than bisecting linear history, Prev: Other tools, Up: Rewriting history and maintaining patch series + +5.7 Problems with rewriting history +=================================== + +The primary problem with rewriting the history of a branch has to do +with merging. Suppose somebody fetches your branch and merges it into +their branch, with a result something like this: + + o--o--O--o--o--o <-- origin + \ \ + t--t--t--m <-- their branch: + + Then suppose you modify the last three commits: + + o--o--o <-- new head of origin + / + o--o--O--o--o--o <-- old head of origin + + If we examined all this history together in one repository, it will +look like: + + o--o--o <-- new head of origin + / + o--o--O--o--o--o <-- old head of origin + \ \ + t--t--t--m <-- their branch: + + Git has no way of knowing that the new head is an updated version of +the old head; it treats this situation exactly the same as it would if +two developers had independently done the work on the old and new heads +in parallel. At this point, if someone attempts to merge the new head +in to their branch, Git will attempt to merge together the two (old and +new) lines of development, instead of trying to replace the old by the +new. The results are likely to be unexpected. + + You may still choose to publish branches whose history is rewritten, +and it may be useful for others to be able to fetch those branches in +order to examine or test them, but they should not attempt to pull such +branches into their own work. + + For true distributed development that supports proper merging, +published branches should never be rewritten. + + +File: git.info, Node: Why bisecting merge commits can be harder than bisecting linear history, Prev: Problems with rewriting history, Up: Rewriting history and maintaining patch series + +5.8 Why bisecting merge commits can be harder than bisecting linear history +=========================================================================== + +The git-bisect(1) (git-bisect.html) command correctly handles history +that includes merge commits. However, when the commit that it finds is +a merge commit, the user may need to work harder than usual to figure +out why that commit introduced a problem. + + Imagine this history: + + ---Z---o---X---...---o---A---C---D + \ / + o---o---Y---...---o---B + + Suppose that on the upper line of development, the meaning of one of +the functions that exists at Z is changed at commit X. The commits from +Z leading to A change both the function’s implementation and all calling +sites that exist at Z, as well as new calling sites they add, to be +consistent. There is no bug at A. + + Suppose that in the meantime on the lower line of development +somebody adds a new calling site for that function at commit Y. The +commits from Z leading to B all assume the old semantics of that +function and the callers and the callee are consistent with each other. +There is no bug at B, either. + + Suppose further that the two development lines merge cleanly at C, so +no conflict resolution is required. + + Nevertheless, the code at C is broken, because the callers added on +the lower line of development have not been converted to the new +semantics introduced on the upper line of development. So if all you +know is that D is bad, that Z is good, and that git-bisect(1) +(git-bisect.html) identifies C as the culprit, how will you figure out +that the problem is due to this change in semantics? + + When the result of a ‘git bisect’ is a non-merge commit, you should +normally be able to discover the problem by examining just that commit. +Developers can make this easy by breaking their changes into small +self-contained commits. That won’t help in the case above, however, +because the problem isn’t obvious from examination of any single commit; +instead, a global view of the development is required. To make matters +worse, the change in semantics in the problematic function may be just +one small part of the changes in the upper line of development. + + On the other hand, if instead of merging at C you had rebased the +history between Z to B on top of A, you would have gotten this linear +history: + + ---Z---o---X--...---o---A---o---o---Y*--...---o---B*--D* + + Bisecting between Z and D* would hit a single culprit commit Y*, and +understanding why Y* was broken would probably be easier. + + Partly for this reason, many experienced Git users, even when working +on an otherwise merge-heavy project, keep the history linear by rebasing +against the latest upstream version before publishing. + + +File: git.info, Node: Advanced branch management, Next: Git concepts, Prev: Rewriting history and maintaining patch series, Up: Top + +6 Advanced branch management +**************************** + +* Menu: + +* Fetching individual branches:: +* git fetch and fast-forwards:: +* Forcing git fetch to do non-fast-forward updates:: +* Configuring remote-tracking branches:: + + +File: git.info, Node: Fetching individual branches, Next: git fetch and fast-forwards, Up: Advanced branch management + +6.1 Fetching individual branches +================================ + +Instead of using git-remote(1) (git-remote.html), you can also choose +just to update one branch at a time, and to store it locally under an +arbitrary name: + + $ git fetch origin todo:my-todo-work + + The first argument, ‘origin’, just tells Git to fetch from the +repository you originally cloned from. The second argument tells Git to +fetch the branch named ‘todo’ from the remote repository, and to store +it locally under the name ‘refs/heads/my-todo-work’. + + You can also fetch branches from other repositories; so + + $ git fetch git://example.com/proj.git master:example-master + + will create a new branch named ‘example-master’ and store in it the +branch named ‘master’ from the repository at the given URL. If you +already have a branch named example-master, it will attempt to +fast-forward (*note Fast-forward merges::) to the commit given by +example.com’s master branch. In more detail: + + +File: git.info, Node: git fetch and fast-forwards, Next: Forcing git fetch to do non-fast-forward updates, Prev: Fetching individual branches, Up: Advanced branch management + +6.2 git fetch and fast-forwards +=============================== + +In the previous example, when updating an existing branch, ‘git fetch’ +checks to make sure that the most recent commit on the remote branch is +a descendant of the most recent commit on your copy of the branch before +updating your copy of the branch to point at the new commit. Git calls +this process a fast-forward (*note Fast-forward merges::). + + A fast-forward looks something like this: + + o--o--o--o <-- old head of the branch + \ + o--o--o <-- new head of the branch + + In some cases it is possible that the new head will *not* actually be +a descendant of the old head. For example, the developer may have +realized she made a serious mistake, and decided to backtrack, resulting +in a situation like: + + o--o--o--o--a--b <-- old head of the branch + \ + o--o--o <-- new head of the branch + + In this case, ‘git fetch’ will fail, and print out a warning. + + In that case, you can still force Git to update to the new head, as +described in the following section. However, note that in the situation +above this may mean losing the commits labeled ‘a’ and ‘b’, unless +you’ve already created a reference of your own pointing to them. + + +File: git.info, Node: Forcing git fetch to do non-fast-forward updates, Next: Configuring remote-tracking branches, Prev: git fetch and fast-forwards, Up: Advanced branch management + +6.3 Forcing git fetch to do non-fast-forward updates +==================================================== + +If git fetch fails because the new head of a branch is not a descendant +of the old head, you may force the update with: + + $ git fetch git://example.com/proj.git +master:refs/remotes/example/master + + Note the addition of the ‘+’ sign. Alternatively, you can use the +‘-f’ flag to force updates of all the fetched branches, as in: + + $ git fetch -f origin + + Be aware that commits that the old version of example/master pointed +at may be lost, as we saw in the previous section. + + +File: git.info, Node: Configuring remote-tracking branches, Prev: Forcing git fetch to do non-fast-forward updates, Up: Advanced branch management + +6.4 Configuring remote-tracking branches +======================================== + +We saw above that ‘origin’ is just a shortcut to refer to the repository +that you originally cloned from. This information is stored in Git +configuration variables, which you can see using git-config(1) +(git-config.html): + + $ git config -l + core.repositoryformatversion=0 + core.filemode=true + core.logallrefupdates=true + remote.origin.url=git://git.kernel.org/pub/scm/git/git.git + remote.origin.fetch=+refs/heads/*:refs/remotes/origin/* + branch.master.remote=origin + branch.master.merge=refs/heads/master + + If there are other repositories that you also use frequently, you can +create similar configuration options to save typing; for example, + + $ git remote add example git://example.com/proj.git + + adds the following to ‘.git/config’: + + [remote "example"] + url = git://example.com/proj.git + fetch = +refs/heads/*:refs/remotes/example/* + + Also note that the above configuration can be performed by directly +editing the file ‘.git/config’ instead of using git-remote(1) +(git-remote.html). + + After configuring the remote, the following three commands will do +the same thing: + + $ git fetch git://example.com/proj.git +refs/heads/*:refs/remotes/example/* + $ git fetch example +refs/heads/*:refs/remotes/example/* + $ git fetch example + + See git-config(1) (git-config.html) for more details on the +configuration options mentioned above and git-fetch(1) (git-fetch.html) +for more details on the refspec syntax. + + +File: git.info, Node: Git concepts, Next: Submodules, Prev: Advanced branch management, Up: Top + +7 Git concepts +************** + +Git is built on a small number of simple but powerful ideas. While it +is possible to get things done without understanding them, you will find +Git much more intuitive if you do. + + We start with the most important, the object database (*note +[def_object_database]::) and the index (*note [def_index]::). + +* Menu: + +* The Object Database:: +* The index:: + + +File: git.info, Node: The Object Database, Next: The index, Up: Git concepts + +7.1 The Object Database +======================= + +We already saw in *note Understanding History: Commits: Understanding +History; Commits. that all commits are stored under a 40-digit "object +name". In fact, all the information needed to represent the history of +a project is stored in objects with such names. In each case the name +is calculated by taking the SHA-1 hash of the contents of the object. +The SHA-1 hash is a cryptographic hash function. What that means to us +is that it is impossible to find two different objects with the same +name. This has a number of advantages; among others: + + • Git can quickly determine whether two objects are identical or not, + just by comparing names. + + • Since object names are computed the same way in every repository, + the same content stored in two repositories will always be stored + under the same name. + + • Git can detect errors when it reads an object, by checking that the + object’s name is still the SHA-1 hash of its contents. + + (See *note Object storage format:: for the details of the object +formatting and SHA-1 calculation.) + + There are four different types of objects: "blob", "tree", "commit", +and "tag". + + • A "blob" object (*note [def_blob_object]::) is used to store file + data. + + • A "tree" object (*note [def_tree_object]::) ties one or more "blob" + objects into a directory structure. In addition, a tree object can + refer to other tree objects, thus creating a directory hierarchy. + + • A "commit" object (*note [def_commit_object]::) ties such directory + hierarchies together into a directed acyclic graph (*note + [def_DAG]::) of revisions—each commit contains the object name of + exactly one tree designating the directory hierarchy at the time of + the commit. In addition, a commit refers to "parent" commit + objects that describe the history of how we arrived at that + directory hierarchy. + + • A "tag" object (*note [def_tag_object]::) symbolically identifies + and can be used to sign other objects. It contains the object name + and type of another object, a symbolic name (of course!) and, + optionally, a signature. + + The object types in some more detail: + +* Menu: + +* Commit Object:: +* Tree Object:: +* Blob Object:: +* Trust:: +* Tag Object:: +* How Git stores objects efficiently; pack files:: +* Dangling objects:: +* Recovering from repository corruption:: + + +File: git.info, Node: Commit Object, Next: Tree Object, Up: The Object Database + +7.1.1 Commit Object +------------------- + +The "commit" object links a physical state of a tree with a description +of how we got there and why. Use the ‘--pretty=raw’ option to +git-show(1) (git-show.html) or git-log(1) (git-log.html) to examine your +favorite commit: + + $ git show -s --pretty=raw 2be7fcb476 + commit 2be7fcb4764f2dbcee52635b91fedb1b3dcf7ab4 + tree fb3a8bdd0ceddd019615af4d57a53f43d8cee2bf + parent 257a84d9d02e90447b149af58b271c19405edb6a + author Dave Watson 1187576872 -0400 + committer Junio C Hamano 1187591163 -0700 + + Fix misspelling of 'suppress' in docs + + Signed-off-by: Junio C Hamano + + As you can see, a commit is defined by: + + • a tree: The SHA-1 name of a tree object (as defined below), + representing the contents of a directory at a certain point in + time. + + • parent(s): The SHA-1 name(s) of some number of commits which + represent the immediately previous step(s) in the history of the + project. The example above has one parent; merge commits may have + more than one. A commit with no parents is called a "root" commit, + and represents the initial revision of a project. Each project + must have at least one root. A project can also have multiple + roots, though that isn’t common (or necessarily a good idea). + + • an author: The name of the person responsible for this change, + together with its date. + + • a committer: The name of the person who actually created the + commit, with the date it was done. This may be different from the + author, for example, if the author was someone who wrote a patch + and emailed it to the person who used it to create the commit. + + • a comment describing this commit. + + Note that a commit does not itself contain any information about what +actually changed; all changes are calculated by comparing the contents +of the tree referred to by this commit with the trees associated with +its parents. In particular, Git does not attempt to record file renames +explicitly, though it can identify cases where the existence of the same +file data at changing paths suggests a rename. (See, for example, the +‘-M’ option to git-diff(1) (git-diff.html)). + + A commit is usually created by git-commit(1) (git-commit.html), which +creates a commit whose parent is normally the current HEAD, and whose +tree is taken from the content currently stored in the index. + + +File: git.info, Node: Tree Object, Next: Blob Object, Prev: Commit Object, Up: The Object Database + +7.1.2 Tree Object +----------------- + +The ever-versatile git-show(1) (git-show.html) command can also be used +to examine tree objects, but git-ls-tree(1) (git-ls-tree.html) will give +you more details: + + $ git ls-tree fb3a8bdd0ce + 100644 blob 63c918c667fa005ff12ad89437f2fdc80926e21c .gitignore + 100644 blob 5529b198e8d14decbe4ad99db3f7fb632de0439d .mailmap + 100644 blob 6ff87c4664981e4397625791c8ea3bbb5f2279a3 COPYING + 040000 tree 2fb783e477100ce076f6bf57e4a6f026013dc745 Documentation + 100755 blob 3c0032cec592a765692234f1cba47dfdcc3a9200 GIT-VERSION-GEN + 100644 blob 289b046a443c0647624607d471289b2c7dcd470b INSTALL + 100644 blob 4eb463797adc693dc168b926b6932ff53f17d0b1 Makefile + 100644 blob 548142c327a6790ff8821d67c2ee1eff7a656b52 README + ... + + As you can see, a tree object contains a list of entries, each with a +mode, object type, SHA-1 name, and name, sorted by name. It represents +the contents of a single directory tree. + + The object type may be a blob, representing the contents of a file, +or another tree, representing the contents of a subdirectory. Since +trees and blobs, like all other objects, are named by the SHA-1 hash of +their contents, two trees have the same SHA-1 name if and only if their +contents (including, recursively, the contents of all subdirectories) +are identical. This allows Git to quickly determine the differences +between two related tree objects, since it can ignore any entries with +identical object names. + + (Note: in the presence of submodules, trees may also have commits as +entries. See *note Submodules:: for documentation.) + + Note that the files all have mode 644 or 755: Git actually only pays +attention to the executable bit. + + +File: git.info, Node: Blob Object, Next: Trust, Prev: Tree Object, Up: The Object Database + +7.1.3 Blob Object +----------------- + +You can use git-show(1) (git-show.html) to examine the contents of a +blob; take, for example, the blob in the entry for ‘COPYING’ from the +tree above: + + $ git show 6ff87c4664 + + Note that the only valid version of the GPL as far as this project + is concerned is _this_ particular version of the license (ie v2, not + v2.2 or v3.x or whatever), unless explicitly otherwise stated. + ... + + A "blob" object is nothing but a binary blob of data. It doesn’t +refer to anything else or have attributes of any kind. + + Since the blob is entirely defined by its data, if two files in a +directory tree (or in multiple different versions of the repository) +have the same contents, they will share the same blob object. The +object is totally independent of its location in the directory tree, and +renaming a file does not change the object that file is associated with. + + Note that any tree or blob object can be examined using git-show(1) +(git-show.html) with the : syntax. This can sometimes +be useful for browsing the contents of a tree that is not currently +checked out. + + +File: git.info, Node: Trust, Next: Tag Object, Prev: Blob Object, Up: The Object Database + +7.1.4 Trust +----------- + +If you receive the SHA-1 name of a blob from one source, and its +contents from another (possibly untrusted) source, you can still trust +that those contents are correct as long as the SHA-1 name agrees. This +is because the SHA-1 is designed so that it is infeasible to find +different contents that produce the same hash. + + Similarly, you need only trust the SHA-1 name of a top-level tree +object to trust the contents of the entire directory that it refers to, +and if you receive the SHA-1 name of a commit from a trusted source, +then you can easily verify the entire history of commits reachable +through parents of that commit, and all of those contents of the trees +referred to by those commits. + + So to introduce some real trust in the system, the only thing you +need to do is to digitally sign just _one_ special note, which includes +the name of a top-level commit. Your digital signature shows others +that you trust that commit, and the immutability of the history of +commits tells others that they can trust the whole history. + + In other words, you can easily validate a whole archive by just +sending out a single email that tells the people the name (SHA-1 hash) +of the top commit, and digitally sign that email using something like +GPG/PGP. + + To assist in this, Git also provides the tag object… + + +File: git.info, Node: Tag Object, Next: How Git stores objects efficiently; pack files, Prev: Trust, Up: The Object Database + +7.1.5 Tag Object +---------------- + +A tag object contains an object, object type, tag name, the name of the +person ("tagger") who created the tag, and a message, which may contain +a signature, as can be seen using git-cat-file(1) (git-cat-file.html): + + $ git cat-file tag v1.5.0 + object 437b1b20df4b356c9342dac8d38849f24ef44f27 + type commit + tag v1.5.0 + tagger Junio C Hamano 1171411200 +0000 + + GIT 1.5.0 + -----BEGIN PGP SIGNATURE----- + Version: GnuPG v1.4.6 (GNU/Linux) + + iD8DBQBF0lGqwMbZpPMRm5oRAuRiAJ9ohBLd7s2kqjkKlq1qqC57SbnmzQCdG4ui + nLE/L9aUXdWeTFPron96DLA= + =2E+0 + -----END PGP SIGNATURE----- + + See the git-tag(1) (git-tag.html) command to learn how to create and +verify tag objects. (Note that git-tag(1) (git-tag.html) can also be +used to create "lightweight tags", which are not tag objects at all, but +just simple references whose names begin with ‘refs/tags/’). + + +File: git.info, Node: How Git stores objects efficiently; pack files, Next: Dangling objects, Prev: Tag Object, Up: The Object Database + +7.1.6 How Git stores objects efficiently: pack files +---------------------------------------------------- + +Newly created objects are initially created in a file named after the +object’s SHA-1 hash (stored in ‘.git/objects’). + + Unfortunately this system becomes inefficient once a project has a +lot of objects. Try this on an old project: + + $ git count-objects + 6930 objects, 47620 kilobytes + + The first number is the number of objects which are kept in +individual files. The second is the amount of space taken up by those +"loose" objects. + + You can save space and make Git faster by moving these loose objects +in to a "pack file", which stores a group of objects in an efficient +compressed format; the details of how pack files are formatted can be +found in pack format (technical/pack-format.html). + + To put the loose objects into a pack, just run git repack: + + $ git repack + Counting objects: 6020, done. + Delta compression using up to 4 threads. + Compressing objects: 100% (6020/6020), done. + Writing objects: 100% (6020/6020), done. + Total 6020 (delta 4070), reused 0 (delta 0) + + This creates a single "pack file" in .git/objects/pack/ containing +all currently unpacked objects. You can then run + + $ git prune + + to remove any of the "loose" objects that are now contained in the +pack. This will also remove any unreferenced objects (which may be +created when, for example, you use ‘git reset’ to remove a commit). You +can verify that the loose objects are gone by looking at the +‘.git/objects’ directory or by running + + $ git count-objects + 0 objects, 0 kilobytes + + Although the object files are gone, any commands that refer to those +objects will work exactly as they did before. + + The git-gc(1) (git-gc.html) command performs packing, pruning, and +more for you, so is normally the only high-level command you need. + + +File: git.info, Node: Dangling objects, Next: Recovering from repository corruption, Prev: How Git stores objects efficiently; pack files, Up: The Object Database + +7.1.7 Dangling objects +---------------------- + +The git-fsck(1) (git-fsck.html) command will sometimes complain about +dangling objects. They are not a problem. + + The most common cause of dangling objects is that you’ve rebased a +branch, or you have pulled from somebody else who rebased a branch—see +*note Rewriting history and maintaining patch series::. In that case, +the old head of the original branch still exists, as does everything it +pointed to. The branch pointer itself just doesn’t, since you replaced +it with another one. + + There are also other situations that cause dangling objects. For +example, a "dangling blob" may arise because you did a ‘git add’ of a +file, but then, before you actually committed it and made it part of the +bigger picture, you changed something else in that file and committed +that *updated* thing—the old state that you added originally ends up not +being pointed to by any commit or tree, so it’s now a dangling blob +object. + + Similarly, when the "recursive" merge strategy runs, and finds that +there are criss-cross merges and thus more than one merge base (which is +fairly unusual, but it does happen), it will generate one temporary +midway tree (or possibly even more, if you had lots of criss-crossing +merges and more than two merge bases) as a temporary internal merge +base, and again, those are real objects, but the end result will not end +up pointing to them, so they end up "dangling" in your repository. + + Generally, dangling objects aren’t anything to worry about. They can +even be very useful: if you screw something up, the dangling objects can +be how you recover your old tree (say, you did a rebase, and realized +that you really didn’t want to—you can look at what dangling objects you +have, and decide to reset your head to some old dangling state). + + For commits, you can just use: + + $ gitk --not --all + + This asks for all the history reachable from the given commit but not +from any branch, tag, or other reference. If you decide it’s something +you want, you can always create a new reference to it, e.g., + + $ git branch recovered-branch + + For blobs and trees, you can’t do the same, but you can still examine +them. You can just do + + $ git show + + to show what the contents of the blob were (or, for a tree, basically +what the ‘ls’ for that directory was), and that may give you some idea +of what the operation was that left that dangling object. + + Usually, dangling blobs and trees aren’t very interesting. They’re +almost always the result of either being a half-way mergebase (the blob +will often even have the conflict markers from a merge in it, if you +have had conflicting merges that you fixed up by hand), or simply +because you interrupted a ‘git fetch’ with ^C or something like that, +leaving _some_ of the new objects in the object database, but just +dangling and useless. + + Anyway, once you are sure that you’re not interested in any dangling +state, you can just prune all unreachable objects: + + $ git prune + + and they’ll be gone. (You should only run ‘git prune’ on a quiescent +repository—it’s kind of like doing a filesystem fsck recovery: you don’t +want to do that while the filesystem is mounted. ‘git prune’ is +designed not to cause any harm in such cases of concurrent accesses to a +repository but you might receive confusing or scary messages.) + + +File: git.info, Node: Recovering from repository corruption, Prev: Dangling objects, Up: The Object Database + +7.1.8 Recovering from repository corruption +------------------------------------------- + +By design, Git treats data trusted to it with caution. However, even in +the absence of bugs in Git itself, it is still possible that hardware or +operating system errors could corrupt data. + + The first defense against such problems is backups. You can back up +a Git directory using clone, or just using cp, tar, or any other backup +mechanism. + + As a last resort, you can search for the corrupted objects and +attempt to replace them by hand. Back up your repository before +attempting this in case you corrupt things even more in the process. + + We’ll assume that the problem is a single missing or corrupted blob, +which is sometimes a solvable problem. (Recovering missing trees and +especially commits is *much* harder). + + Before starting, verify that there is corruption, and figure out +where it is with git-fsck(1) (git-fsck.html); this may be +time-consuming. + + Assume the output looks like this: + + $ git fsck --full --no-dangling + broken link from tree 2d9263c6d23595e7cb2a21e5ebbb53655278dff8 + to blob 4b9458b3786228369c63936db65827de3cc06200 + missing blob 4b9458b3786228369c63936db65827de3cc06200 + + Now you know that blob 4b9458b3 is missing, and that the tree +2d9263c6 points to it. If you could find just one copy of that missing +blob object, possibly in some other repository, you could move it into +‘.git/objects/4b/9458b3...’ and be done. Suppose you can’t. You can +still examine the tree that pointed to it with git-ls-tree(1) +(git-ls-tree.html), which might output something like: + + $ git ls-tree 2d9263c6d23595e7cb2a21e5ebbb53655278dff8 + 100644 blob 8d14531846b95bfa3564b58ccfb7913a034323b8 .gitignore + 100644 blob ebf9bf84da0aab5ed944264a5db2a65fe3a3e883 .mailmap + 100644 blob ca442d313d86dc67e0a2e5d584b465bd382cbf5c COPYING + ... + 100644 blob 4b9458b3786228369c63936db65827de3cc06200 myfile + ... + + So now you know that the missing blob was the data for a file named +‘myfile’. And chances are you can also identify the directory—let’s say +it’s in ‘somedirectory’. If you’re lucky the missing copy might be the +same as the copy you have checked out in your working tree at +‘somedirectory/myfile’; you can test whether that’s right with +git-hash-object(1) (git-hash-object.html): + + $ git hash-object -w somedirectory/myfile + + which will create and store a blob object with the contents of +somedirectory/myfile, and output the SHA-1 of that object. if you’re +extremely lucky it might be 4b9458b3786228369c63936db65827de3cc06200, in +which case you’ve guessed right, and the corruption is fixed! + + Otherwise, you need more information. How do you tell which version +of the file has been lost? + + The easiest way to do this is with: + + $ git log --raw --all --full-history -- somedirectory/myfile + + Because you’re asking for raw output, you’ll now get something like + + commit abc + Author: + Date: + ... + :100644 100644 4b9458b newsha M somedirectory/myfile + + + commit xyz + Author: + Date: + + ... + :100644 100644 oldsha 4b9458b M somedirectory/myfile + + This tells you that the immediately following version of the file was +"newsha", and that the immediately preceding version was "oldsha". You +also know the commit messages that went with the change from oldsha to +4b9458b and with the change from 4b9458b to newsha. + + If you’ve been committing small enough changes, you may now have a +good shot at reconstructing the contents of the in-between state +4b9458b. + + If you can do that, you can now recreate the missing object with + + $ git hash-object -w + + and your repository is good again! + + (Btw, you could have ignored the ‘fsck’, and started with doing a + + $ git log --raw --all + + and just looked for the sha of the missing object (4b9458b) in that +whole thing. It’s up to you—Git does *have* a lot of information, it is +just missing one particular blob version. + + +File: git.info, Node: The index, Prev: The Object Database, Up: Git concepts + +7.2 The index +============= + +The index is a binary file (generally kept in ‘.git/index’) containing a +sorted list of path names, each with permissions and the SHA-1 of a blob +object; git-ls-files(1) (git-ls-files.html) can show you the contents of +the index: + + $ git ls-files --stage + 100644 63c918c667fa005ff12ad89437f2fdc80926e21c 0 .gitignore + 100644 5529b198e8d14decbe4ad99db3f7fb632de0439d 0 .mailmap + 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 0 COPYING + 100644 a37b2152bd26be2c2289e1f57a292534a51a93c7 0 Documentation/.gitignore + 100644 fbefe9a45b00a54b58d94d06eca48b03d40a50e0 0 Documentation/Makefile + ... + 100644 2511aef8d89ab52be5ec6a5e46236b4b6bcd07ea 0 xdiff/xtypes.h + 100644 2ade97b2574a9f77e7ae4002a4e07a6a38e46d07 0 xdiff/xutils.c + 100644 d5de8292e05e7c36c4b68857c1cf9855e3d2f70a 0 xdiff/xutils.h + + Note that in older documentation you may see the index called the +"current directory cache" or just the "cache". It has three important +properties: + + 1. The index contains all the information necessary to generate a + single (uniquely determined) tree object. + + For example, running git-commit(1) (git-commit.html) generates this + tree object from the index, stores it in the object database, and + uses it as the tree object associated with the new commit. + + 2. The index enables fast comparisons between the tree object it + defines and the working tree. + + It does this by storing some additional data for each entry (such + as the last modified time). This data is not displayed above, and + is not stored in the created tree object, but it can be used to + determine quickly which files in the working directory differ from + what was stored in the index, and thus save Git from having to read + all of the data from such files to look for changes. + + 3. It can efficiently represent information about merge conflicts + between different tree objects, allowing each pathname to be + associated with sufficient information about the trees involved + that you can create a three-way merge between them. + + We saw in *note Getting conflict-resolution help during a merge:: + that during a merge the index can store multiple versions of a + single file (called "stages"). The third column in the + git-ls-files(1) (git-ls-files.html) output above is the stage + number, and will take on values other than 0 for files with merge + conflicts. + + The index is thus a sort of temporary staging area, which is filled +with a tree which you are in the process of working on. + + If you blow the index away entirely, you generally haven’t lost any +information as long as you have the name of the tree that it described. + + +File: git.info, Node: Submodules, Next: Low-level Git operations, Prev: Git concepts, Up: Top + +8 Submodules +************ + +Large projects are often composed of smaller, self-contained modules. +For example, an embedded Linux distribution’s source tree would include +every piece of software in the distribution with some local +modifications; a movie player might need to build against a specific, +known-working version of a decompression library; several independent +programs might all share the same build scripts. + + With centralized revision control systems this is often accomplished +by including every module in one single repository. Developers can +check out all modules or only the modules they need to work with. They +can even modify files across several modules in a single commit while +moving things around or updating APIs and translations. + + Git does not allow partial checkouts, so duplicating this approach in +Git would force developers to keep a local copy of modules they are not +interested in touching. Commits in an enormous checkout would be slower +than you’d expect as Git would have to scan every directory for changes. +If modules have a lot of local history, clones would take forever. + + On the plus side, distributed revision control systems can much +better integrate with external sources. In a centralized model, a +single arbitrary snapshot of the external project is exported from its +own revision control and then imported into the local revision control +on a vendor branch. All the history is hidden. With distributed +revision control you can clone the entire external history and much more +easily follow development and re-merge local changes. + + Git’s submodule support allows a repository to contain, as a +subdirectory, a checkout of an external project. Submodules maintain +their own identity; the submodule support just stores the submodule +repository location and commit ID, so other developers who clone the +containing project ("superproject") can easily clone all the submodules +at the same revision. Partial checkouts of the superproject are +possible: you can tell Git to clone none, some or all of the submodules. + + The git-submodule(1) (git-submodule.html) command is available since +Git 1.5.3. Users with Git 1.5.2 can look up the submodule commits in +the repository and manually check them out; earlier versions won’t +recognize the submodules at all. + + To see how submodule support works, create four example repositories +that can be used later as a submodule: + + $ mkdir ~/git + $ cd ~/git + $ for i in a b c d + do + mkdir $i + cd $i + git init + echo "module $i" > $i.txt + git add $i.txt + git commit -m "Initial commit, submodule $i" + cd .. + done + + Now create the superproject and add all the submodules: + + $ mkdir super + $ cd super + $ git init + $ for i in a b c d + do + git submodule add ~/git/$i $i + done + + *Note* + + Do not use local URLs here if you plan to publish your + superproject! + + See what files ‘git submodule’ created: + + $ ls -a + . .. .git .gitmodules a b c d + + The ‘git submodule add ’ command does a couple of +things: + + • It clones the submodule from ‘’ to the given ‘’ under + the current directory and by default checks out the master branch. + + • It adds the submodule’s clone path to the gitmodules(5) + (gitmodules.html) file and adds this file to the index, ready to be + committed. + + • It adds the submodule’s current commit ID to the index, ready to be + committed. + + Commit the superproject: + + $ git commit -m "Add submodules a, b, c and d." + + Now clone the superproject: + + $ cd .. + $ git clone super cloned + $ cd cloned + + The submodule directories are there, but they’re empty: + + $ ls -a a + . .. + $ git submodule status + -d266b9873ad50488163457f025db7cdd9683d88b a + -e81d457da15309b4fef4249aba9b50187999670d b + -c1536a972b9affea0f16e0680ba87332dc059146 c + -d96249ff5d57de5de093e6baff9e0aafa5276a74 d + + *Note* + + The commit object names shown above would be different for you, but + they should match the HEAD commit object names of your + repositories. You can check it by running ‘git ls-remote ../a’. + + Pulling down the submodules is a two-step process. First run ‘git +submodule init’ to add the submodule repository URLs to ‘.git/config’: + + $ git submodule init + + Now use ‘git submodule update’ to clone the repositories and check +out the commits specified in the superproject: + + $ git submodule update + $ cd a + $ ls -a + . .. .git a.txt + + One major difference between ‘git submodule update’ and ‘git +submodule add’ is that ‘git submodule update’ checks out a specific +commit, rather than the tip of a branch. It’s like checking out a tag: +the head is detached, so you’re not working on a branch. + + $ git branch + * (detached from d266b98) + master + + If you want to make a change within a submodule and you have a +detached head, then you should create or checkout a branch, make your +changes, publish the change within the submodule, and then update the +superproject to reference the new commit: + + $ git switch master + + or + + $ git switch -c fix-up + + then + + $ echo "adding a line again" >> a.txt + $ git commit -a -m "Updated the submodule from within the superproject." + $ git push + $ cd .. + $ git diff + diff --git a/a b/a + index d266b98..261dfac 160000 + --- a/a + +++ b/a + @@ -1 +1 @@ + -Subproject commit d266b9873ad50488163457f025db7cdd9683d88b + +Subproject commit 261dfac35cb99d380eb966e102c1197139f7fa24 + $ git add a + $ git commit -m "Updated submodule a." + $ git push + + You have to run ‘git submodule update’ after ‘git pull’ if you want +to update submodules, too. + +* Menu: + +* Pitfalls with submodules:: + + +File: git.info, Node: Pitfalls with submodules, Up: Submodules + +8.1 Pitfalls with submodules +============================ + +Always publish the submodule change before publishing the change to the +superproject that references it. If you forget to publish the submodule +change, others won’t be able to clone the repository: + + $ cd ~/git/super/a + $ echo i added another line to this file >> a.txt + $ git commit -a -m "doing it wrong this time" + $ cd .. + $ git add a + $ git commit -m "Updated submodule a again." + $ git push + $ cd ~/git/cloned + $ git pull + $ git submodule update + error: pathspec '261dfac35cb99d380eb966e102c1197139f7fa24' did not match any file(s) known to git. + Did you forget to 'git add'? + Unable to checkout '261dfac35cb99d380eb966e102c1197139f7fa24' in submodule path 'a' + + In older Git versions it could be easily forgotten to commit new or +modified files in a submodule, which silently leads to similar problems +as not pushing the submodule changes. Starting with Git 1.7.0 both ‘git +status’ and ‘git diff’ in the superproject show submodules as modified +when they contain new or modified files to protect against accidentally +committing such a state. ‘git diff’ will also add a ‘-dirty’ to the +work tree side when generating patch output or used with the +‘--submodule’ option: + + $ git diff + diff --git a/sub b/sub + --- a/sub + +++ b/sub + @@ -1 +1 @@ + -Subproject commit 3f356705649b5d566d97ff843cf193359229a453 + +Subproject commit 3f356705649b5d566d97ff843cf193359229a453-dirty + $ git diff --submodule + Submodule sub 3f35670..3f35670-dirty: + + You also should not rewind branches in a submodule beyond commits +that were ever recorded in any superproject. + + It’s not safe to run ‘git submodule update’ if you’ve made and +committed changes within a submodule without checking out a branch +first. They will be silently overwritten: + + $ cat a.txt + module a + $ echo line added from private2 >> a.txt + $ git commit -a -m "line added inside private2" + $ cd .. + $ git submodule update + Submodule path 'a': checked out 'd266b9873ad50488163457f025db7cdd9683d88b' + $ cd a + $ cat a.txt + module a + + *Note* + + The changes are still visible in the submodule’s reflog. + + If you have uncommitted changes in your submodule working tree, ‘git +submodule update’ will not overwrite them. Instead, you get the usual +warning about not being able switch from a dirty branch. + + +File: git.info, Node: Low-level Git operations, Next: Hacking Git, Prev: Submodules, Up: Top + +9 Low-level Git operations +************************** + +Many of the higher-level commands were originally implemented as shell +scripts using a smaller core of low-level Git commands. These can still +be useful when doing unusual things with Git, or just as a way to +understand its inner workings. + +* Menu: + +* Object access and manipulation:: +* The Workflow:: +* Examining the data:: +* Merging multiple trees:: +* Merging multiple trees, continued: Merging multiple trees; continued. + + +File: git.info, Node: Object access and manipulation, Next: The Workflow, Up: Low-level Git operations + +9.1 Object access and manipulation +================================== + +The git-cat-file(1) (git-cat-file.html) command can show the contents of +any object, though the higher-level git-show(1) (git-show.html) is +usually more useful. + + The git-commit-tree(1) (git-commit-tree.html) command allows +constructing commits with arbitrary parents and trees. + + A tree can be created with git-write-tree(1) (git-write-tree.html) +and its data can be accessed by git-ls-tree(1) (git-ls-tree.html). Two +trees can be compared with git-diff-tree(1) (git-diff-tree.html). + + A tag is created with git-mktag(1) (git-mktag.html), and the +signature can be verified by git-verify-tag(1) (git-verify-tag.html), +though it is normally simpler to use git-tag(1) (git-tag.html) for both. + + +File: git.info, Node: The Workflow, Next: Examining the data, Prev: Object access and manipulation, Up: Low-level Git operations + +9.2 The Workflow +================ + +High-level operations such as git-commit(1) (git-commit.html) and +git-restore(1) (git-restore.html) work by moving data between the +working tree, the index, and the object database. Git provides +low-level operations which perform each of these steps individually. + + Generally, all Git operations work on the index file. Some +operations work *purely* on the index file (showing the current state of +the index), but most operations move data between the index file and +either the database or the working directory. Thus there are four main +combinations: + +* Menu: + +* working directory → index:: +* index → object database:: +* object database → index:: +* index → working directory:: +* Tying it all together:: + + +File: git.info, Node: working directory → index, Next: index → object database, Up: The Workflow + +9.2.1 working directory → index +------------------------------- + +The git-update-index(1) (git-update-index.html) command updates the +index with information from the working directory. You generally update +the index information by just specifying the filename you want to +update, like so: + + $ git update-index filename + + but to avoid common mistakes with filename globbing etc., the command +will not normally add totally new entries or remove old entries, i.e. +it will normally just update existing cache entries. + + To tell Git that yes, you really do realize that certain files no +longer exist, or that new files should be added, you should use the +‘--remove’ and ‘--add’ flags respectively. + + NOTE! A ‘--remove’ flag does _not_ mean that subsequent filenames +will necessarily be removed: if the files still exist in your directory +structure, the index will be updated with their new status, not removed. +The only thing ‘--remove’ means is that update-index will be considering +a removed file to be a valid thing, and if the file really does not +exist any more, it will update the index accordingly. + + As a special case, you can also do ‘git update-index --refresh’, +which will refresh the "stat" information of each index to match the +current stat information. It will _not_ update the object status +itself, and it will only update the fields that are used to quickly test +whether an object still matches its old backing store object. + + The previously introduced git-add(1) (git-add.html) is just a wrapper +for git-update-index(1) (git-update-index.html). + + +File: git.info, Node: index → object database, Next: object database → index, Prev: working directory → index, Up: The Workflow + +9.2.2 index → object database +----------------------------- + +You write your current index file to a "tree" object with the program + + $ git write-tree + + that doesn’t come with any options—it will just write out the current +index into the set of tree objects that describe that state, and it will +return the name of the resulting top-level tree. You can use that tree +to re-generate the index at any time by going in the other direction: + + +File: git.info, Node: object database → index, Next: index → working directory, Prev: index → object database, Up: The Workflow + +9.2.3 object database → index +----------------------------- + +You read a "tree" file from the object database, and use that to +populate (and overwrite—don’t do this if your index contains any unsaved +state that you might want to restore later!) your current index. +Normal operation is just + + $ git read-tree + + and your index file will now be equivalent to the tree that you saved +earlier. However, that is only your _index_ file: your working +directory contents have not been modified. + + +File: git.info, Node: index → working directory, Next: Tying it all together, Prev: object database → index, Up: The Workflow + +9.2.4 index → working directory +------------------------------- + +You update your working directory from the index by "checking out" +files. This is not a very common operation, since normally you’d just +keep your files updated, and rather than write to your working +directory, you’d tell the index files about the changes in your working +directory (i.e. ‘git update-index’). + + However, if you decide to jump to a new version, or check out +somebody else’s version, or just restore a previous tree, you’d populate +your index file with read-tree, and then you need to check out the +result with + + $ git checkout-index filename + + or, if you want to check out all of the index, use ‘-a’. + + NOTE! ‘git checkout-index’ normally refuses to overwrite old files, +so if you have an old version of the tree already checked out, you will +need to use the ‘-f’ flag (_before_ the ‘-a’ flag or the filename) to +_force_ the checkout. + + Finally, there are a few odds and ends which are not purely moving +from one representation to the other: + + +File: git.info, Node: Tying it all together, Prev: index → working directory, Up: The Workflow + +9.2.5 Tying it all together +--------------------------- + +To commit a tree you have instantiated with ‘git write-tree’, you’d +create a "commit" object that refers to that tree and the history behind +it—most notably the "parent" commits that preceded it in history. + + Normally a "commit" has one parent: the previous state of the tree +before a certain change was made. However, sometimes it can have two or +more parent commits, in which case we call it a "merge", due to the fact +that such a commit brings together ("merges") two or more previous +states represented by other commits. + + In other words, while a "tree" represents a particular directory +state of a working directory, a "commit" represents that state in time, +and explains how we got there. + + You create a commit object by giving it the tree that describes the +state at the time of the commit, and a list of parents: + + $ git commit-tree -p [(-p )...] + + and then giving the reason for the commit on stdin (either through +redirection from a pipe or file, or by just typing it at the tty). + + ‘git commit-tree’ will return the name of the object that represents +that commit, and you should save it away for later use. Normally, you’d +commit a new ‘HEAD’ state, and while Git doesn’t care where you save the +note about that state, in practice we tend to just write the result to +the file pointed at by ‘.git/HEAD’, so that we can always see what the +last committed state was. + + Here is a picture that illustrates how various pieces fit together: + + commit-tree + commit obj + +----+ + | | + | | + V V + +-----------+ + | Object DB | + | Backing | + | Store | + +-----------+ + ^ + write-tree | | + tree obj | | + | | read-tree + | | tree obj + V + +-----------+ + | Index | + | "cache" | + +-----------+ + update-index ^ + blob obj | | + | | + checkout-index -u | | checkout-index + stat | | blob obj + V + +-----------+ + | Working | + | Directory | + +-----------+ + + +File: git.info, Node: Examining the data, Next: Merging multiple trees, Prev: The Workflow, Up: Low-level Git operations + +9.3 Examining the data +====================== + +You can examine the data represented in the object database and the +index with various helper tools. For every object, you can use +git-cat-file(1) (git-cat-file.html) to examine details about the object: + + $ git cat-file -t + + shows the type of the object, and once you have the type (which is +usually implicit in where you find the object), you can use + + $ git cat-file blob|tree|commit|tag + + to show its contents. NOTE! Trees have binary content, and as a +result there is a special helper for showing that content, called ‘git +ls-tree’, which turns the binary content into a more easily readable +form. + + It’s especially instructive to look at "commit" objects, since those +tend to be small and fairly self-explanatory. In particular, if you +follow the convention of having the top commit name in ‘.git/HEAD’, you +can do + + $ git cat-file commit HEAD + + to see what the top commit was. + + +File: git.info, Node: Merging multiple trees, Next: Merging multiple trees; continued, Prev: Examining the data, Up: Low-level Git operations + +9.4 Merging multiple trees +========================== + +Git can help you perform a three-way merge, which can in turn be used +for a many-way merge by repeating the merge procedure several times. +The usual situation is that you only do one three-way merge (reconciling +two lines of history) and commit the result, but if you like to, you can +merge several branches in one go. + + To perform a three-way merge, you start with the two commits you want +to merge, find their closest common parent (a third commit), and compare +the trees corresponding to these three commits. + + To get the "base" for the merge, look up the common parent of two +commits: + + $ git merge-base + + This prints the name of a commit they are both based on. You should +now look up the tree objects of those commits, which you can easily do +with + + $ git cat-file commit | head -1 + + since the tree object information is always the first line in a +commit object. + + Once you know the three trees you are going to merge (the one +"original" tree, aka the common tree, and the two "result" trees, aka +the branches you want to merge), you do a "merge" read into the index. +This will complain if it has to throw away your old index contents, so +you should make sure that you’ve committed those—in fact you would +normally always do a merge against your last commit (which should thus +match what you have in your current index anyway). + + To do the merge, do + + $ git read-tree -m -u + + which will do all trivial merge operations for you directly in the +index file, and you can just write the result out with ‘git write-tree’. + + +File: git.info, Node: Merging multiple trees; continued, Prev: Merging multiple trees, Up: Low-level Git operations + +9.5 Merging multiple trees, continued +===================================== + +Sadly, many merges aren’t trivial. If there are files that have been +added, moved or removed, or if both branches have modified the same +file, you will be left with an index tree that contains "merge entries" +in it. Such an index tree can _NOT_ be written out to a tree object, +and you will have to resolve any such merge clashes using other tools +before you can write out the result. + + You can examine such index state with ‘git ls-files --unmerged’ +command. An example: + + $ git read-tree -m $orig HEAD $target + $ git ls-files --unmerged + 100644 263414f423d0e4d70dae8fe53fa34614ff3e2860 1 hello.c + 100644 06fa6a24256dc7e560efa5687fa84b51f0263c3a 2 hello.c + 100644 cc44c73eb783565da5831b4d820c962954019b69 3 hello.c + + Each line of the ‘git ls-files --unmerged’ output begins with the +blob mode bits, blob SHA-1, _stage number_, and the filename. The +_stage number_ is Git’s way to say which tree it came from: stage 1 +corresponds to the ‘$orig’ tree, stage 2 to the ‘HEAD’ tree, and stage 3 +to the ‘$target’ tree. + + Earlier we said that trivial merges are done inside ‘git read-tree +-m’. For example, if the file did not change from ‘$orig’ to ‘HEAD’ or +‘$target’, or if the file changed from ‘$orig’ to ‘HEAD’ and ‘$orig’ to +‘$target’ the same way, obviously the final outcome is what is in +‘HEAD’. What the above example shows is that file ‘hello.c’ was changed +from ‘$orig’ to ‘HEAD’ and ‘$orig’ to ‘$target’ in a different way. You +could resolve this by running your favorite 3-way merge program, e.g. +‘diff3’, ‘merge’, or Git’s own merge-file, on the blob objects from +these three stages yourself, like this: + + $ git cat-file blob 263414f >hello.c~1 + $ git cat-file blob 06fa6a2 >hello.c~2 + $ git cat-file blob cc44c73 >hello.c~3 + $ git merge-file hello.c~2 hello.c~1 hello.c~3 + + This would leave the merge result in ‘hello.c~2’ file, along with +conflict markers if there are conflicts. After verifying the merge +result makes sense, you can tell Git what the final merge result for +this file is by: + + $ mv -f hello.c~2 hello.c + $ git update-index hello.c + + When a path is in the "unmerged" state, running ‘git update-index’ +for that path tells Git to mark the path resolved. + + The above is the description of a Git merge at the lowest level, to +help you understand what conceptually happens under the hood. In +practice, nobody, not even Git itself, runs ‘git cat-file’ three times +for this. There is a ‘git merge-index’ program that extracts the stages +to temporary files and calls a "merge" script on it: + + $ git merge-index git-merge-one-file hello.c + + and that is what higher level ‘git merge -s resolve’ is implemented +with. + + +File: git.info, Node: Hacking Git, Next: Git Glossary, Prev: Low-level Git operations, Up: Top + +10 Hacking Git +************** + +This chapter covers internal details of the Git implementation which +probably only Git developers need to understand. + +* Menu: + +* Object storage format:: +* A birds-eye view of Git’s source code:: + + +File: git.info, Node: Object storage format, Next: A birds-eye view of Git’s source code, Up: Hacking Git + +10.1 Object storage format +========================== + +All objects have a statically determined "type" which identifies the +format of the object (i.e. how it is used, and how it can refer to +other objects). There are currently four different object types: +"blob", "tree", "commit", and "tag". + + Regardless of object type, all objects share the following +characteristics: they are all deflated with zlib, and have a header that +not only specifies their type, but also provides size information about +the data in the object. It’s worth noting that the SHA-1 hash that is +used to name the object is the hash of the original data plus this +header, so ‘sha1sum’ _file_ does not match the object name for _file_. + + As a result, the general consistency of an object can always be +tested independently of the contents or the type of the object: all +objects can be validated by verifying that (a) their hashes match the +content of the file and (b) the object successfully inflates to a stream +of bytes that forms a sequence of ‘ + ++ + + ’. + + The structured objects can further have their structure and +connectivity to other objects verified. This is generally done with the +‘git fsck’ program, which generates a full dependency graph of all +objects, and verifies their internal consistency (in addition to just +verifying their superficial consistency through the hash). + + +File: git.info, Node: A birds-eye view of Git’s source code, Prev: Object storage format, Up: Hacking Git + +10.2 A birds-eye view of Git’s source code +========================================== + +It is not always easy for new developers to find their way through Git’s +source code. This section gives you a little guidance to show where to +start. + + A good place to start is with the contents of the initial commit, +with: + + $ git switch --detach e83c5163 + + The initial revision lays the foundation for almost everything Git +has today, but is small enough to read in one sitting. + + Note that terminology has changed since that revision. For example, +the README in that revision uses the word "changeset" to describe what +we now call a commit (*note [def_commit_object]::). + + Also, we do not call it "cache" any more, but rather "index"; +however, the file is still called ‘cache.h’. Remark: Not much reason to +change it now, especially since there is no good single name for it +anyway, because it is basically _the_ header file which is included by +_all_ of Git’s C sources. + + If you grasp the ideas in that initial commit, you should check out a +more recent version and skim ‘cache.h’, ‘object.h’ and ‘commit.h’. + + In the early days, Git (in the tradition of UNIX) was a bunch of +programs which were extremely simple, and which you used in scripts, +piping the output of one into another. This turned out to be good for +initial development, since it was easier to test new things. However, +recently many of these parts have become builtins, and some of the core +has been "libified", i.e. put into libgit.a for performance, +portability reasons, and to avoid code duplication. + + By now, you know what the index is (and find the corresponding data +structures in ‘cache.h’), and that there are just a couple of object +types (blobs, trees, commits and tags) which inherit their common +structure from ‘struct object’, which is their first member (and thus, +you can cast e.g. ‘(struct object *)commit’ to achieve the _same_ as +‘&commit->object’, i.e. get at the object name and flags). + + Now is a good point to take a break to let this information sink in. + + Next step: get familiar with the object naming. Read *note Naming +commits::. There are quite a few ways to name an object (and not only +revisions!). All of these are handled in ‘sha1_name.c’. Just have a +quick look at the function ‘get_sha1()’. A lot of the special handling +is done by functions like ‘get_sha1_basic()’ or the likes. + + This is just to get you into the groove for the most libified part of +Git: the revision walker. + + Basically, the initial version of ‘git log’ was a shell script: + + $ git-rev-list --pretty $(git-rev-parse --default HEAD "$@") | \ + LESS=-S ${PAGER:-less} + + What does this mean? + + ‘git rev-list’ is the original version of the revision walker, which +_always_ printed a list of revisions to stdout. It is still functional, +and needs to, since most new Git commands start out as scripts using +‘git rev-list’. + + ‘git rev-parse’ is not as important any more; it was only used to +filter out options that were relevant for the different plumbing +commands that were called by the script. + + Most of what ‘git rev-list’ did is contained in ‘revision.c’ and +‘revision.h’. It wraps the options in a struct named ‘rev_info’, which +controls how and what revisions are walked, and more. + + The original job of ‘git rev-parse’ is now taken by the function +‘setup_revisions()’, which parses the revisions and the common +command-line options for the revision walker. This information is +stored in the struct ‘rev_info’ for later consumption. You can do your +own command-line option parsing after calling ‘setup_revisions()’. +After that, you have to call ‘prepare_revision_walk()’ for +initialization, and then you can get the commits one by one with the +function ‘get_revision()’. + + If you are interested in more details of the revision walking +process, just have a look at the first implementation of ‘cmd_log()’; +call ‘git show v1.3.0~155^2~4’ and scroll down to that function (note +that you no longer need to call ‘setup_pager()’ directly). + + Nowadays, ‘git log’ is a builtin, which means that it is _contained_ +in the command ‘git’. The source side of a builtin is + + • a function called ‘cmd_’, typically defined in + ‘builtin/’ (note that older versions of Git used to have it + in ‘builtin-.c’ instead), and declared in ‘builtin.h’. + + • an entry in the ‘commands[]’ array in ‘git.c’, and + + • an entry in ‘BUILTIN_OBJECTS’ in the ‘Makefile’. + + Sometimes, more than one builtin is contained in one source file. +For example, ‘cmd_whatchanged()’ and ‘cmd_log()’ both reside in +‘builtin/log.c’, since they share quite a bit of code. In that case, +the commands which are _not_ named like the ‘.c’ file in which they live +have to be listed in ‘BUILT_INS’ in the ‘Makefile’. + + ‘git log’ looks more complicated in C than it does in the original +script, but that allows for a much greater flexibility and performance. + + Here again it is a good point to take a pause. + + Lesson three is: study the code. Really, it is the best way to learn +about the organization of Git (after you know the basic concepts). + + So, think about something which you are interested in, say, "how can +I access a blob just knowing the object name of it?". The first step is +to find a Git command with which you can do it. In this example, it is +either ‘git show’ or ‘git cat-file’. + + For the sake of clarity, let’s stay with ‘git cat-file’, because it + + • is plumbing, and + + • was around even in the initial commit (it literally went only + through some 20 revisions as ‘cat-file.c’, was renamed to + ‘builtin/cat-file.c’ when made a builtin, and then saw less than 10 + versions). + + So, look into ‘builtin/cat-file.c’, search for ‘cmd_cat_file()’ and +look what it does. + + git_config(git_default_config); + if (argc != 3) + usage("git cat-file [-t|-s|-e|-p|] "); + if (get_sha1(argv[2], sha1)) + die("Not a valid object name %s", argv[2]); + + Let’s skip over the obvious details; the only really interesting part +here is the call to ‘get_sha1()’. It tries to interpret ‘argv[2]’ as an +object name, and if it refers to an object which is present in the +current repository, it writes the resulting SHA-1 into the variable +‘sha1’. + + Two things are interesting here: + + • ‘get_sha1()’ returns 0 on _success_. This might surprise some new + Git hackers, but there is a long tradition in UNIX to return + different negative numbers in case of different errors—and 0 on + success. + + • the variable ‘sha1’ in the function signature of ‘get_sha1()’ is + ‘unsigned char *’, but is actually expected to be a pointer to + ‘unsigned char[20]’. This variable will contain the 160-bit SHA-1 + of the given commit. Note that whenever a SHA-1 is passed as + ‘unsigned char *’, it is the binary representation, as opposed to + the ASCII representation in hex characters, which is passed as + ‘char *’. + + You will see both of these things throughout the code. + + Now, for the meat: + + case 0: + buf = read_object_with_reference(sha1, argv[1], &size, NULL); + + This is how you read a blob (actually, not only a blob, but any type +of object). To know how the function ‘read_object_with_reference()’ +actually works, find the source code for it (something like ‘git grep +read_object_with | grep ":[a-z]"’ in the Git repository), and read the +source. + + To find out how the result can be used, just read on in +‘cmd_cat_file()’: + + write_or_die(1, buf, size); + + Sometimes, you do not know where to look for a feature. In many such +cases, it helps to search through the output of ‘git log’, and then ‘git +show’ the corresponding commit. + + Example: If you know that there was some test case for ‘git bundle’, +but do not remember where it was (yes, you _could_ ‘git grep bundle t/’, +but that does not illustrate the point!): + + $ git log --no-merges t/ + + In the pager (‘less’), just search for "bundle", go a few lines back, +and see that it is in commit 18449ab0. Now just copy this object name, +and paste it into the command line + + $ git show 18449ab0 + + Voila. + + Another example: Find out what to do in order to make some script a +builtin: + + $ git log --no-merges --diff-filter=A builtin/*.c + + You see, Git is actually the best tool to find out about the source +of Git itself! + + +File: git.info, Node: Git Glossary, Next: Git Quick Reference, Prev: Hacking Git, Up: Top + +11 Git Glossary +*************** + +* Menu: + +* Git explained:: + + +File: git.info, Node: Git explained, Up: Git Glossary + +11.1 Git explained +================== + +alternate object database + Via the alternates mechanism, a repository (*note + [def_repository]::) can inherit part of its object database (*note + [def_object_database]::) from another object database, which is + called an "alternate". + +bare repository + A bare repository is normally an appropriately named directory + (*note [def_directory]::) with a ‘.git’ suffix that does not have a + locally checked-out copy of any of the files under revision + control. That is, all of the Git administrative and control files + that would normally be present in the hidden ‘.git’ sub-directory + are directly present in the ‘repository.git’ directory instead, and + no other files are present and checked out. Usually publishers of + public repositories make bare repositories available. + +blob object + Untyped object (*note [def_object]::), e.g. the contents of a + file. + +branch + A "branch" is a line of development. The most recent commit (*note + [def_commit]::) on a branch is referred to as the tip of that + branch. The tip of the branch is referenced by a branch head + (*note [def_head]::), which moves forward as additional development + is done on the branch. A single Git repository (*note + [def_repository]::) can track an arbitrary number of branches, but + your working tree (*note [def_working_tree]::) is associated with + just one of them (the "current" or "checked out" branch), and HEAD + (*note [def_HEAD]::) points to that branch. + +cache + Obsolete for: index (*note [def_index]::). + +chain + A list of objects, where each object (*note [def_object]::) in the + list contains a reference to its successor (for example, the + successor of a commit (*note [def_commit]::) could be one of its + parents (*note [def_parent]::)). + +changeset + BitKeeper/cvsps speak for "commit (*note [def_commit]::)". Since + Git does not store changes, but states, it really does not make + sense to use the term "changesets" with Git. + +checkout + The action of updating all or part of the working tree (*note + [def_working_tree]::) with a tree object (*note + [def_tree_object]::) or blob (*note [def_blob_object]::) from the + object database (*note [def_object_database]::), and updating the + index (*note [def_index]::) and HEAD (*note [def_HEAD]::) if the + whole working tree has been pointed at a new branch (*note + [def_branch]::). + +cherry-picking + In SCM (*note [def_SCM]::) jargon, "cherry pick" means to choose a + subset of changes out of a series of changes (typically commits) + and record them as a new series of changes on top of a different + codebase. In Git, this is performed by the "git cherry-pick" + command to extract the change introduced by an existing commit + (*note [def_commit]::) and to record it based on the tip of the + current branch (*note [def_branch]::) as a new commit. + +clean + A working tree (*note [def_working_tree]::) is clean, if it + corresponds to the revision (*note [def_revision]::) referenced by + the current head (*note [def_head]::). Also see "dirty (*note + [def_dirty]::)". + +commit + As a noun: A single point in the Git history; the entire history of + a project is represented as a set of interrelated commits. The + word "commit" is often used by Git in the same places other + revision control systems use the words "revision" or "version". + Also used as a short hand for commit object (*note + [def_commit_object]::). + + As a verb: The action of storing a new snapshot of the project’s + state in the Git history, by creating a new commit representing the + current state of the index (*note [def_index]::) and advancing HEAD + (*note [def_HEAD]::) to point at the new commit. + +commit object + An object (*note [def_object]::) which contains the information + about a particular revision (*note [def_revision]::), such as + parents (*note [def_parent]::), committer, author, date and the + tree object (*note [def_tree_object]::) which corresponds to the + top directory (*note [def_directory]::) of the stored revision. + +commit-ish (also committish) + A commit object (*note [def_commit_object]::) or an object (*note + [def_object]::) that can be recursively dereferenced to a commit + object. The following are all commit-ishes: a commit object, a tag + object (*note [def_tag_object]::) that points to a commit object, a + tag object that points to a tag object that points to a commit + object, etc. + +core Git + Fundamental data structures and utilities of Git. Exposes only + limited source code management tools. + +DAG + Directed acyclic graph. The commit objects (*note + [def_commit_object]::) form a directed acyclic graph, because they + have parents (directed), and the graph of commit objects is acyclic + (there is no chain (*note [def_chain]::) which begins and ends with + the same object (*note [def_object]::)). + +dangling object + An unreachable object (*note [def_unreachable_object]::) which is + not reachable (*note [def_reachable]::) even from other unreachable + objects; a dangling object has no references to it from any + reference or object (*note [def_object]::) in the repository (*note + [def_repository]::). + +detached HEAD + Normally the HEAD (*note [def_HEAD]::) stores the name of a branch + (*note [def_branch]::), and commands that operate on the history + HEAD represents operate on the history leading to the tip of the + branch the HEAD points at. However, Git also allows you to check + out (*note [def_checkout]::) an arbitrary commit (*note + [def_commit]::) that isn’t necessarily the tip of any particular + branch. The HEAD in such a state is called "detached". + + Note that commands that operate on the history of the current + branch (e.g. ‘git commit’ to build a new history on top of it) + still work while the HEAD is detached. They update the HEAD to + point at the tip of the updated history without affecting any + branch. Commands that update or inquire information _about_ the + current branch (e.g. ‘git branch --set-upstream-to’ that sets what + remote-tracking branch the current branch integrates with) + obviously do not work, as there is no (real) current branch to ask + about in this state. + +directory + The list you get with "ls" :-) + +dirty + A working tree (*note [def_working_tree]::) is said to be "dirty" + if it contains modifications which have not been committed (*note + [def_commit]::) to the current branch (*note [def_branch]::). + +evil merge + An evil merge is a merge (*note [def_merge]::) that introduces + changes that do not appear in any parent (*note [def_parent]::). + +fast-forward + A fast-forward is a special type of merge (*note [def_merge]::) + where you have a revision (*note [def_revision]::) and you are + "merging" another branch (*note [def_branch]::)’s changes that + happen to be a descendant of what you have. In such a case, you do + not make a new merge (*note [def_merge]::) commit (*note + [def_commit]::) but instead just update to his revision. This will + happen frequently on a remote-tracking branch (*note + [def_remote_tracking_branch]::) of a remote repository (*note + [def_repository]::). + +fetch + Fetching a branch (*note [def_branch]::) means to get the branch’s + head ref (*note [def_head_ref]::) from a remote repository (*note + [def_repository]::), to find out which objects are missing from the + local object database (*note [def_object_database]::), and to get + them, too. See also git-fetch(1) (git-fetch.html). + +file system + Linus Torvalds originally designed Git to be a user space file + system, i.e. the infrastructure to hold files and directories. + That ensured the efficiency and speed of Git. + +Git archive + Synonym for repository (*note [def_repository]::) (for arch + people). + +gitfile + A plain file ‘.git’ at the root of a working tree that points at + the directory that is the real repository. + +grafts + Grafts enables two otherwise different lines of development to be + joined together by recording fake ancestry information for commits. + This way you can make Git pretend the set of parents (*note + [def_parent]::) a commit (*note [def_commit]::) has is different + from what was recorded when the commit was created. Configured via + the ‘.git/info/grafts’ file. + + Note that the grafts mechanism is outdated and can lead to problems + transferring objects between repositories; see git-replace(1) + (git-replace.html) for a more flexible and robust system to do the + same thing. + +hash + In Git’s context, synonym for object name (*note + [def_object_name]::). + +head + A named reference (*note [def_ref]::) to the commit (*note + [def_commit]::) at the tip of a branch (*note [def_branch]::). + Heads are stored in a file in ‘$GIT_DIR/refs/heads/’ directory, + except when using packed refs. (See git-pack-refs(1) + (git-pack-refs.html).) + +HEAD + The current branch (*note [def_branch]::). In more detail: Your + working tree (*note [def_working_tree]::) is normally derived from + the state of the tree referred to by HEAD. HEAD is a reference to + one of the heads (*note [def_head]::) in your repository, except + when using a detached HEAD (*note [def_detached_HEAD]::), in which + case it directly references an arbitrary commit. + +head ref + A synonym for head (*note [def_head]::). + +hook + During the normal execution of several Git commands, call-outs are + made to optional scripts that allow a developer to add + functionality or checking. Typically, the hooks allow for a + command to be pre-verified and potentially aborted, and allow for a + post-notification after the operation is done. The hook scripts + are found in the ‘$GIT_DIR/hooks/’ directory, and are enabled by + simply removing the ‘.sample’ suffix from the filename. In earlier + versions of Git you had to make them executable. + +index + A collection of files with stat information, whose contents are + stored as objects. The index is a stored version of your working + tree (*note [def_working_tree]::). Truth be told, it can also + contain a second, and even a third version of a working tree, which + are used when merging (*note [def_merge]::). + +index entry + The information regarding a particular file, stored in the index + (*note [def_index]::). An index entry can be unmerged, if a merge + (*note [def_merge]::) was started, but not yet finished (i.e. if + the index contains multiple versions of that file). + +master + The default development branch (*note [def_branch]::). Whenever + you create a Git repository (*note [def_repository]::), a branch + named "master" is created, and becomes the active branch. In most + cases, this contains the local development, though that is purely + by convention and is not required. + +merge + As a verb: To bring the contents of another branch (*note + [def_branch]::) (possibly from an external repository (*note + [def_repository]::)) into the current branch. In the case where + the merged-in branch is from a different repository, this is done + by first fetching (*note [def_fetch]::) the remote branch and then + merging the result into the current branch. This combination of + fetch and merge operations is called a pull (*note [def_pull]::). + Merging is performed by an automatic process that identifies + changes made since the branches diverged, and then applies all + those changes together. In cases where changes conflict, manual + intervention may be required to complete the merge. + + As a noun: unless it is a fast-forward (*note + [def_fast_forward]::), a successful merge results in the creation + of a new commit (*note [def_commit]::) representing the result of + the merge, and having as parents (*note [def_parent]::) the tips of + the merged branches (*note [def_branch]::). This commit is + referred to as a "merge commit", or sometimes just a "merge". + +object + The unit of storage in Git. It is uniquely identified by the SHA-1 + (*note [def_SHA1]::) of its contents. Consequently, an object + cannot be changed. + +object database + Stores a set of "objects", and an individual object (*note + [def_object]::) is identified by its object name (*note + [def_object_name]::). The objects usually live in + ‘$GIT_DIR/objects/’. + +object identifier + Synonym for object name (*note [def_object_name]::). + +object name + The unique identifier of an object (*note [def_object]::). The + object name is usually represented by a 40 character hexadecimal + string. Also colloquially called SHA-1 (*note [def_SHA1]::). + +object type + One of the identifiers "commit (*note [def_commit_object]::)", + "tree (*note [def_tree_object]::)", "tag (*note + [def_tag_object]::)" or "blob (*note [def_blob_object]::)" + describing the type of an object (*note [def_object]::). + +octopus + To merge (*note [def_merge]::) more than two branches (*note + [def_branch]::). + +origin + The default upstream repository (*note [def_repository]::). Most + projects have at least one upstream project which they track. By + default _origin_ is used for that purpose. New upstream updates + will be fetched into remote-tracking branches (*note + [def_remote_tracking_branch]::) named + origin/name-of-upstream-branch, which you can see using ‘git branch + -r’. + +overlay + Only update and add files to the working directory, but don’t + delete them, similar to how _cp -R_ would update the contents in + the destination directory. This is the default mode in a checkout + (*note [def_checkout]::) when checking out files from the index + (*note [def_index]::) or a tree-ish (*note [def_tree-ish]::). In + contrast, no-overlay mode also deletes tracked files not present in + the source, similar to _rsync –delete_. + +pack + A set of objects which have been compressed into one file (to save + space or to transmit them efficiently). + +pack index + The list of identifiers, and other information, of the objects in a + pack (*note [def_pack]::), to assist in efficiently accessing the + contents of a pack. + +pathspec + Pattern used to limit paths in Git commands. + + Pathspecs are used on the command line of "git ls-files", "git + ls-tree", "git add", "git grep", "git diff", "git checkout", and + many other commands to limit the scope of operations to some subset + of the tree or worktree. See the documentation of each command for + whether paths are relative to the current directory or toplevel. + The pathspec syntax is as follows: + + • any path matches itself + + • the pathspec up to the last slash represents a directory + prefix. The scope of that pathspec is limited to that + subtree. + + • the rest of the pathspec is a pattern for the remainder of the + pathname. Paths relative to the directory prefix will be + matched against that pattern using fnmatch(3); in particular, + _*_ and _?_ _can_ match directory separators. + + For example, Documentation/*.jpg will match all .jpg files in the + Documentation subtree, including + Documentation/chapter_1/figure_1.jpg. + + A pathspec that begins with a colon ‘:’ has special meaning. In + the short form, the leading colon ‘:’ is followed by zero or more + "magic signature" letters (which optionally is terminated by + another colon ‘:’), and the remainder is the pattern to match + against the path. The "magic signature" consists of ASCII symbols + that are neither alphanumeric, glob, regex special characters nor + colon. The optional colon that terminates the "magic signature" + can be omitted if the pattern begins with a character that does not + belong to "magic signature" symbol set and is not a colon. + + In the long form, the leading colon ‘:’ is followed by an open + parenthesis ‘(’, a comma-separated list of zero or more "magic + words", and a close parentheses ‘)’, and the remainder is the + pattern to match against the path. + + A pathspec with only a colon means "there is no pathspec". This + form should not be combined with other pathspec. + + top + The magic word ‘top’ (magic signature: ‘/’) makes the pattern + match from the root of the working tree, even when you are + running the command from inside a subdirectory. + + literal + Wildcards in the pattern such as ‘*’ or ‘?’ are treated as + literal characters. + + icase + Case insensitive match. + + glob + Git treats the pattern as a shell glob suitable for + consumption by fnmatch(3) with the FNM_PATHNAME flag: + wildcards in the pattern will not match a / in the pathname. + For example, "Documentation/*.html" matches + "Documentation/git.html" but not "Documentation/ppc/ppc.html" + or "tools/perf/Documentation/perf.html". + + Two consecutive asterisks ("‘**’") in patterns matched against + full pathname may have special meaning: + + • A leading "‘**’" followed by a slash means match in all + directories. For example, "‘**/foo’" matches file or + directory "‘foo’" anywhere, the same as pattern "‘foo’". + "‘**/foo/bar’" matches file or directory "‘bar’" anywhere + that is directly under directory "‘foo’". + + • A trailing "‘/**’" matches everything inside. For + example, "‘abc/**’" matches all files inside directory + "abc", relative to the location of the ‘.gitignore’ file, + with infinite depth. + + • A slash followed by two consecutive asterisks then a + slash matches zero or more directories. For example, + "‘a/**/b’" matches "‘a/b’", "‘a/x/b’", "‘a/x/y/b’" and so + on. + + • Other consecutive asterisks are considered invalid. + + Glob magic is incompatible with literal magic. + + attr + After ‘attr:’ comes a space separated list of "attribute + requirements", all of which must be met in order for the path + to be considered a match; this is in addition to the usual + non-magic pathspec pattern matching. See gitattributes(5) + (gitattributes.html). + + Each of the attribute requirements for the path takes one of + these forms: + + • "‘ATTR’" requires that the attribute ‘ATTR’ be set. + + • "‘-ATTR’" requires that the attribute ‘ATTR’ be unset. + + • "‘ATTR=VALUE’" requires that the attribute ‘ATTR’ be set + to the string ‘VALUE’. + + • "‘!ATTR’" requires that the attribute ‘ATTR’ be + unspecified. + + Note that when matching against a tree object, attributes + are still obtained from working tree, not from the given + tree object. + + exclude + After a path matches any non-exclude pathspec, it will be run + through all exclude pathspecs (magic signature: ‘!’ or its + synonym ‘^’). If it matches, the path is ignored. When there + is no non-exclude pathspec, the exclusion is applied to the + result set as if invoked without any pathspec. + +parent + A commit object (*note [def_commit_object]::) contains a (possibly + empty) list of the logical predecessor(s) in the line of + development, i.e. its parents. + +pickaxe + The term pickaxe (*note [def_pickaxe]::) refers to an option to the + diffcore routines that help select changes that add or delete a + given text string. With the ‘--pickaxe-all’ option, it can be used + to view the full changeset (*note [def_changeset]::) that + introduced or removed, say, a particular line of text. See + git-diff(1) (git-diff.html). + +plumbing + Cute name for core Git (*note [def_core_git]::). + +porcelain + Cute name for programs and program suites depending on core Git + (*note [def_core_git]::), presenting a high level access to core + Git. Porcelains expose more of a SCM (*note [def_SCM]::) interface + than the plumbing (*note [def_plumbing]::). + +per-worktree ref + Refs that are per-worktree (*note [def_working_tree]::), rather + than global. This is presently only HEAD (*note [def_HEAD]::) and + any refs that start with ‘refs/bisect/’, but might later include + other unusual refs. + +pseudoref + Pseudorefs are a class of files under ‘$GIT_DIR’ which behave like + refs for the purposes of rev-parse, but which are treated specially + by git. Pseudorefs both have names that are all-caps, and always + start with a line consisting of a SHA-1 (*note [def_SHA1]::) + followed by whitespace. So, HEAD is not a pseudoref, because it is + sometimes a symbolic ref. They might optionally contain some + additional data. ‘MERGE_HEAD’ and ‘CHERRY_PICK_HEAD’ are examples. + Unlike per-worktree refs (*note [def_per_worktree_ref]::), these + files cannot be symbolic refs, and never have reflogs. They also + cannot be updated through the normal ref update machinery. + Instead, they are updated by directly writing to the files. + However, they can be read as if they were refs, so ‘git rev-parse + MERGE_HEAD’ will work. + +pull + Pulling a branch (*note [def_branch]::) means to fetch (*note + [def_fetch]::) it and merge (*note [def_merge]::) it. See also + git-pull(1) (git-pull.html). + +push + Pushing a branch (*note [def_branch]::) means to get the branch’s + head ref (*note [def_head_ref]::) from a remote repository (*note + [def_repository]::), find out if it is an ancestor to the branch’s + local head ref, and in that case, putting all objects, which are + reachable (*note [def_reachable]::) from the local head ref, and + which are missing from the remote repository, into the remote + object database (*note [def_object_database]::), and updating the + remote head ref. If the remote head (*note [def_head]::) is not an + ancestor to the local head, the push fails. + +reachable + All of the ancestors of a given commit (*note [def_commit]::) are + said to be "reachable" from that commit. More generally, one + object (*note [def_object]::) is reachable from another if we can + reach the one from the other by a chain (*note [def_chain]::) that + follows tags (*note [def_tag]::) to whatever they tag, commits + (*note [def_commit_object]::) to their parents or trees, and trees + (*note [def_tree_object]::) to the trees or blobs (*note + [def_blob_object]::) that they contain. + +rebase + To reapply a series of changes from a branch (*note [def_branch]::) + to a different base, and reset the head (*note [def_head]::) of + that branch to the result. + +ref + A name that begins with ‘refs/’ (e.g. ‘refs/heads/master’) that + points to an object name (*note [def_object_name]::) or another ref + (the latter is called a symbolic ref (*note [def_symref]::)). For + convenience, a ref can sometimes be abbreviated when used as an + argument to a Git command; see gitrevisions(7) (gitrevisions.html) + for details. Refs are stored in the repository (*note + [def_repository]::). + + The ref namespace is hierarchical. Different subhierarchies are + used for different purposes (e.g. the ‘refs/heads/’ hierarchy is + used to represent local branches). + + There are a few special-purpose refs that do not begin with + ‘refs/’. The most notable example is ‘HEAD’. + +reflog + A reflog shows the local "history" of a ref. In other words, it + can tell you what the 3rd last revision in _this_ repository was, + and what was the current state in _this_ repository, yesterday + 9:14pm. See git-reflog(1) (git-reflog.html) for details. + +refspec + A "refspec" is used by fetch (*note [def_fetch]::) and push (*note + [def_push]::) to describe the mapping between remote ref (*note + [def_ref]::) and local ref. + +remote repository + A repository (*note [def_repository]::) which is used to track the + same project but resides somewhere else. To communicate with + remotes, see fetch (*note [def_fetch]::) or push (*note + [def_push]::). + +remote-tracking branch + A ref (*note [def_ref]::) that is used to follow changes from + another repository (*note [def_repository]::). It typically looks + like _refs/remotes/foo/bar_ (indicating that it tracks a branch + named _bar_ in a remote named _foo_), and matches the + right-hand-side of a configured fetch refspec (*note + [def_refspec]::). A remote-tracking branch should not contain + direct modifications or have local commits made to it. + +repository + A collection of refs (*note [def_ref]::) together with an object + database (*note [def_object_database]::) containing all objects + which are reachable (*note [def_reachable]::) from the refs, + possibly accompanied by meta data from one or more porcelains + (*note [def_porcelain]::). A repository can share an object + database with other repositories via alternates mechanism (*note + [def_alternate_object_database]::). + +resolve + The action of fixing up manually what a failed automatic merge + (*note [def_merge]::) left behind. + +revision + Synonym for commit (*note [def_commit]::) (the noun). + +rewind + To throw away part of the development, i.e. to assign the head + (*note [def_head]::) to an earlier revision (*note + [def_revision]::). + +SCM + Source code management (tool). + +SHA-1 + "Secure Hash Algorithm 1"; a cryptographic hash function. In the + context of Git used as a synonym for object name (*note + [def_object_name]::). + +shallow clone + Mostly a synonym to shallow repository (*note + [def_shallow_repository]::) but the phrase makes it more explicit + that it was created by running ‘git clone --depth=...’ command. + +shallow repository + A shallow repository (*note [def_repository]::) has an incomplete + history some of whose commits (*note [def_commit]::) have parents + (*note [def_parent]::) cauterized away (in other words, Git is told + to pretend that these commits do not have the parents, even though + they are recorded in the commit object (*note + [def_commit_object]::)). This is sometimes useful when you are + interested only in the recent history of a project even though the + real history recorded in the upstream is much larger. A shallow + repository is created by giving the ‘--depth’ option to + git-clone(1) (git-clone.html), and its history can be later + deepened with git-fetch(1) (git-fetch.html). + +stash entry + An object (*note [def_object]::) used to temporarily store the + contents of a dirty (*note [def_dirty]::) working directory and the + index for future reuse. + +submodule + A repository (*note [def_repository]::) that holds the history of a + separate project inside another repository (the latter of which is + called superproject (*note [def_superproject]::)). + +superproject + A repository (*note [def_repository]::) that references + repositories of other projects in its working tree as submodules + (*note [def_submodule]::). The superproject knows about the names + of (but does not hold copies of) commit objects of the contained + submodules. + +symref + Symbolic reference: instead of containing the SHA-1 (*note + [def_SHA1]::) id itself, it is of the format _ref: refs/some/thing_ + and when referenced, it recursively dereferences to this reference. + _HEAD (*note [def_HEAD]::)_ is a prime example of a symref. + Symbolic references are manipulated with the git-symbolic-ref(1) + (git-symbolic-ref.html) command. + +tag + A ref (*note [def_ref]::) under ‘refs/tags/’ namespace that points + to an object of an arbitrary type (typically a tag points to either + a tag (*note [def_tag_object]::) or a commit object (*note + [def_commit_object]::)). In contrast to a head (*note + [def_head]::), a tag is not updated by the ‘commit’ command. A Git + tag has nothing to do with a Lisp tag (which would be called an + object type (*note [def_object_type]::) in Git’s context). A tag + is most typically used to mark a particular point in the commit + ancestry chain (*note [def_chain]::). + +tag object + An object (*note [def_object]::) containing a ref (*note + [def_ref]::) pointing to another object, which can contain a + message just like a commit object (*note [def_commit_object]::). + It can also contain a (PGP) signature, in which case it is called a + "signed tag object". + +topic branch + A regular Git branch (*note [def_branch]::) that is used by a + developer to identify a conceptual line of development. Since + branches are very easy and inexpensive, it is often desirable to + have several small branches that each contain very well defined + concepts or small incremental yet related changes. + +tree + Either a working tree (*note [def_working_tree]::), or a tree + object (*note [def_tree_object]::) together with the dependent blob + (*note [def_blob_object]::) and tree objects (i.e. a stored + representation of a working tree). + +tree object + An object (*note [def_object]::) containing a list of file names + and modes along with refs to the associated blob and/or tree + objects. A tree (*note [def_tree]::) is equivalent to a directory + (*note [def_directory]::). + +tree-ish (also treeish) + A tree object (*note [def_tree_object]::) or an object (*note + [def_object]::) that can be recursively dereferenced to a tree + object. Dereferencing a commit object (*note + [def_commit_object]::) yields the tree object corresponding to the + revision (*note [def_revision]::)’s top directory (*note + [def_directory]::). The following are all tree-ishes: a commit-ish + (*note [def_commit-ish]::), a tree object, a tag object (*note + [def_tag_object]::) that points to a tree object, a tag object that + points to a tag object that points to a tree object, etc. + +unmerged index + An index (*note [def_index]::) which contains unmerged index + entries (*note [def_index_entry]::). + +unreachable object + An object (*note [def_object]::) which is not reachable (*note + [def_reachable]::) from a branch (*note [def_branch]::), tag (*note + [def_tag]::), or any other reference. + +upstream branch + The default branch (*note [def_branch]::) that is merged into the + branch in question (or the branch in question is rebased onto). It + is configured via branch..remote and branch..merge. If + the upstream branch of _A_ is _origin/B_ sometimes we say "_A_ is + tracking _origin/B_". + +working tree + The tree of actual checked out files. The working tree normally + contains the contents of the HEAD (*note [def_HEAD]::) commit’s + tree, plus any local changes that you have made but not yet + committed. + + +File: git.info, Node: Git Quick Reference, Next: Notes and todo list for this manual, Prev: Git Glossary, Up: Top + +Appendix A Git Quick Reference +****************************** + +This is a quick summary of the major commands; the previous chapters +explain how these work in more detail. + +* Menu: + +* Creating a new repository: Creating a new repository <1>. +* Managing branches:: +* Exploring history:: +* Making changes:: +* Merging:: +* Sharing your changes:: +* Repository maintenance:: + + +File: git.info, Node: Creating a new repository <1>, Next: Managing branches, Up: Git Quick Reference + +A.1 Creating a new repository +============================= + +From a tarball: + + $ tar xzf project.tar.gz + $ cd project + $ git init + Initialized empty Git repository in .git/ + $ git add . + $ git commit + + From a remote repository: + + $ git clone git://example.com/pub/project.git + $ cd project + + +File: git.info, Node: Managing branches, Next: Exploring history, Prev: Creating a new repository <1>, Up: Git Quick Reference + +A.2 Managing branches +===================== + + $ git branch # list all local branches in this repo + $ git switch test # switch working directory to branch "test" + $ git branch new # create branch "new" starting at current HEAD + $ git branch -d new # delete branch "new" + + Instead of basing a new branch on current HEAD (the default), use: + + $ git branch new test # branch named "test" + $ git branch new v2.6.15 # tag named v2.6.15 + $ git branch new HEAD^ # commit before the most recent + $ git branch new HEAD^^ # commit before that + $ git branch new test~10 # ten commits before tip of branch "test" + + Create and switch to a new branch at the same time: + + $ git switch -c new v2.6.15 + + Update and examine branches from the repository you cloned from: + + $ git fetch # update + $ git branch -r # list + origin/master + origin/next + ... + $ git switch -c masterwork origin/master + + Fetch a branch from a different repository, and give it a new name in +your repository: + + $ git fetch git://example.com/project.git theirbranch:mybranch + $ git fetch git://example.com/project.git v2.6.15:mybranch + + Keep a list of repositories you work with regularly: + + $ git remote add example git://example.com/project.git + $ git remote # list remote repositories + example + origin + $ git remote show example # get details + * remote example + URL: git://example.com/project.git + Tracked remote branches + master + next + ... + $ git fetch example # update branches from example + $ git branch -r # list all remote branches + + +File: git.info, Node: Exploring history, Next: Making changes, Prev: Managing branches, Up: Git Quick Reference + +A.3 Exploring history +===================== + + $ gitk # visualize and browse history + $ git log # list all commits + $ git log src/ # ...modifying src/ + $ git log v2.6.15..v2.6.16 # ...in v2.6.16, not in v2.6.15 + $ git log master..test # ...in branch test, not in branch master + $ git log test..master # ...in branch master, but not in test + $ git log test...master # ...in one branch, not in both + $ git log -S'foo()' # ...where difference contain "foo()" + $ git log --since="2 weeks ago" + $ git log -p # show patches as well + $ git show # most recent commit + $ git diff v2.6.15..v2.6.16 # diff between two tagged versions + $ git diff v2.6.15..HEAD # diff with current head + $ git grep "foo()" # search working directory for "foo()" + $ git grep v2.6.15 "foo()" # search old tree for "foo()" + $ git show v2.6.15:a.txt # look at old version of a.txt + + Search for regressions: + + $ git bisect start + $ git bisect bad # current version is bad + $ git bisect good v2.6.13-rc2 # last known good revision + Bisecting: 675 revisions left to test after this + # test here, then: + $ git bisect good # if this revision is good, or + $ git bisect bad # if this revision is bad. + # repeat until done. + + +File: git.info, Node: Making changes, Next: Merging, Prev: Exploring history, Up: Git Quick Reference + +A.4 Making changes +================== + +Make sure Git knows who to blame: + + $ cat >>~/.gitconfig <<\EOF + [user] + name = Your Name Comes Here + email = you@yourdomain.example.com + EOF + + Select file contents to include in the next commit, then make the +commit: + + $ git add a.txt # updated file + $ git add b.txt # new file + $ git rm c.txt # old file + $ git commit + + Or, prepare and create the commit in one step: + + $ git commit d.txt # use latest content only of d.txt + $ git commit -a # use latest content of all tracked files + + +File: git.info, Node: Merging, Next: Sharing your changes, Prev: Making changes, Up: Git Quick Reference + +A.5 Merging +=========== + + $ git merge test # merge branch "test" into the current branch + $ git pull git://example.com/project.git master + # fetch and merge in remote branch + $ git pull . test # equivalent to git merge test + + +File: git.info, Node: Sharing your changes, Next: Repository maintenance, Prev: Merging, Up: Git Quick Reference + +A.6 Sharing your changes +======================== + +Importing or exporting patches: + + $ git format-patch origin..HEAD # format a patch for each commit + # in HEAD but not in origin + $ git am mbox # import patches from the mailbox "mbox" + + Fetch a branch in a different Git repository, then merge into the +current branch: + + $ git pull git://example.com/project.git theirbranch + + Store the fetched branch into a local branch before merging into the +current branch: + + $ git pull git://example.com/project.git theirbranch:mybranch + + After creating commits on a local branch, update the remote branch +with your commits: + + $ git push ssh://example.com/project.git mybranch:theirbranch + + When remote and local branch are both named "test": + + $ git push ssh://example.com/project.git test + + Shortcut version for a frequently used remote repository: + + $ git remote add example ssh://example.com/project.git + $ git push example test + + +File: git.info, Node: Repository maintenance, Prev: Sharing your changes, Up: Git Quick Reference + +A.7 Repository maintenance +========================== + +Check for corruption: + + $ git fsck + + Recompress, remove unused cruft: + + $ git gc + + +File: git.info, Node: Notes and todo list for this manual, Prev: Git Quick Reference, Up: Top + +Appendix B Notes and todo list for this manual +********************************************** + +* Menu: + +* Todo list:: + + +File: git.info, Node: Todo list, Up: Notes and todo list for this manual + +B.1 Todo list +============= + +This is a work in progress. + + The basic requirements: + + • It must be readable in order, from beginning to end, by someone + intelligent with a basic grasp of the UNIX command line, but + without any special knowledge of Git. If necessary, any other + prerequisites should be specifically mentioned as they arise. + + • Whenever possible, section headings should clearly describe the + task they explain how to do, in language that requires no more + knowledge than necessary: for example, "importing patches into a + project" rather than "the ‘git am’ command" + + Think about how to create a clear chapter dependency graph that will +allow people to get to important topics without necessarily reading +everything in between. + + Scan ‘Documentation/’ for other stuff left out; in particular: + + • howto’s + + • some of ‘technical/’? + + • hooks + + • list of commands in git(1) (git.html) + + Scan email archives for other stuff left out + + Scan man pages to see if any assume more background than this manual +provides. + + Add more good examples. Entire sections of just cookbook examples +might be a good idea; maybe make an "advanced examples" section a +standard end-of-chapter section? + + Include cross-references to the glossary, where appropriate. + + Add a section on working with other version control systems, +including CVS, Subversion, and just imports of series of release +tarballs. + + Write a chapter on using plumbing and writing scripts. + + Alternates, clone -reference, etc. + + More on recovery from repository corruption. See: +https://lore.kernel.org/git/Pine.LNX.4.64.0702272039540.12485@woody.linux-foundation.org/ +(https://lore.kernel.org/git/Pine.LNX.4.64.0702272039540.12485@woody.linux-foundation.org/) +https://lore.kernel.org/git/Pine.LNX.4.64.0702141033400.3604@woody.linux-foundation.org/ +(https://lore.kernel.org/git/Pine.LNX.4.64.0702141033400.3604@woody.linux-foundation.org/) + + + +Tag Table: +Node: Top212 +Node: idm43164 +Node: Repositories and Branches4465 +Node: How to get a Git repository5070 +Node: How to check out a different version of a project6331 +Node: Understanding History; Commits8890 +Node: Understanding history; commits; parents; and reachability11147 +Node: Understanding history; History diagrams12483 +Node: Understanding history; What is a branch?13224 +Node: Manipulating branches13963 +Node: Examining an old version without creating a new branch15544 +Node: Examining branches from a remote repository17105 +Node: Naming branches; tags; and other references18861 +Node: Updating a repository with git fetch20420 +Node: Fetching branches from other repositories21093 +Node: Exploring Git history22786 +Node: How to use bisect to find a regression23583 +Node: Naming commits26582 +Node: Creating tags28944 +Node: Browsing revisions29512 +Node: Generating diffs31126 +Node: Viewing old file versions31882 +Node: Examples32449 +Node: Counting the number of commits on a branch32902 +Node: Check whether two branches point at the same history33546 +Node: Find first tagged version including a given fix34613 +Node: Showing commits unique to a given branch37277 +Node: Creating a changelog and tarball for a software release39049 +Node: Finding commits referencing a file with given content40885 +Node: Developing with Git41679 +Node: Telling Git your name42152 +Node: Creating a new repository42961 +Node: How to make a commit43476 +Node: Creating good commit messages46371 +Node: Ignoring files47075 +Node: How to merge49098 +Node: Resolving a merge51148 +Node: Getting conflict-resolution help during a merge52517 +Node: Undoing a merge56290 +Node: Fast-forward merges56938 +Node: Fixing mistakes57648 +Node: Fixing a mistake with a new commit58787 +Node: Fixing a mistake by rewriting history59757 +Node: Checking out an old version of a file60906 +Node: Temporarily setting aside work in progress61784 +Node: Ensuring good performance62830 +Node: Ensuring reliability63453 +Node: Checking the repository for corruption63694 +Node: Recovering lost changes64907 +Node: Reflogs65147 +Node: Examining dangling objects67010 +Node: Sharing development with others68647 +Node: Getting updates with git pull69089 +Node: Submitting patches to a project71302 +Node: Importing patches to a project72606 +Node: Public Git repositories73901 +Node: Setting up a public repository76446 +Node: Exporting a Git repository via the Git protocol77232 +Node: Exporting a git repository via HTTP78351 +Node: Pushing changes to a public repository79643 +Node: What to do when a push fails81787 +Node: Setting up a shared repository84199 +Node: Allowing web browsing of a repository85848 +Node: How to get a Git repository with minimal history86654 +Node: Examples <1>87717 +Node: Maintaining topic branches for a Linux subsystem maintainer87952 +Node: Rewriting history and maintaining patch series96755 +Node: Creating the perfect patch series97644 +Node: Keeping a patch series up to date using git rebase99127 +Node: Rewriting a single commit101762 +Node: Reordering or selecting from a patch series102587 +Node: Using interactive rebases103261 +Node: Other tools105631 +Node: Problems with rewriting history105984 +Node: Why bisecting merge commits can be harder than bisecting linear history107811 +Node: Advanced branch management110808 +Node: Fetching individual branches111175 +Node: git fetch and fast-forwards112293 +Node: Forcing git fetch to do non-fast-forward updates113765 +Node: Configuring remote-tracking branches114557 +Node: Git concepts116297 +Node: The Object Database116786 +Node: Commit Object119304 +Node: Tree Object121890 +Node: Blob Object123746 +Node: Trust124995 +Node: Tag Object126433 +Node: How Git stores objects efficiently; pack files127511 +Node: Dangling objects129556 +Node: Recovering from repository corruption133244 +Node: The index137453 +Node: Submodules140334 +Node: Pitfalls with submodules146399 +Node: Low-level Git operations148958 +Node: Object access and manipulation149539 +Node: The Workflow150418 +Node: working directory → index151307 +Node: index → object database153010 +Node: object database → index153601 +Node: index → working directory154258 +Node: Tying it all together155462 +Node: Examining the data158326 +Node: Merging multiple trees159445 +Node: Merging multiple trees; continued161275 +Node: Hacking Git164309 +Node: Object storage format164641 +Node: A birds-eye view of Git’s source code166225 +Node: Git Glossary175147 +Node: Git explained175305 +Ref: [def_alternate_object_database]175403 +Ref: [def_bare_repository]175654 +Ref: [def_blob_object]176236 +Ref: [def_branch]176328 +Ref: [def_cache]176940 +Ref: [def_chain]176995 +Ref: [def_changeset]177248 +Ref: [def_checkout]177449 +Ref: [def_cherry-picking]177877 +Ref: [def_clean]178369 +Ref: [def_commit]178603 +Ref: [def_commit_object]179245 +Ref: [def_commit-ish]179604 +Ref: [def_core_git]180007 +Ref: [def_DAG]180129 +Ref: [def_dangling_object]180454 +Ref: [def_detached_HEAD]180779 +Ref: [def_directory]181857 +Ref: [def_dirty]181904 +Ref: [def_evil_merge]182120 +Ref: [def_fast_forward]182270 +Ref: [def_fetch]182850 +Ref: [def_file_system]183203 +Ref: [def_git_archive]183404 +Ref: [def_gitfile]183496 +Ref: [def_grafts]183627 +Ref: [def_hash]184255 +Ref: [def_head]184344 +Ref: [def_HEAD]184642 +Ref: [def_head_ref]185057 +Ref: [def_hook]185113 +Ref: [def_index]185665 +Ref: [def_index_entry]186003 +Ref: [def_master]186286 +Ref: [def_merge]186617 +Ref: [def_object]187767 +Ref: [def_object_database]187940 +Ref: [def_object_identifier]188168 +Ref: [def_object_name]188245 +Ref: [def_object_type]188464 +Ref: [def_octopus]188724 +Ref: [def_origin]188821 +Ref: [def_overlay]189229 +Ref: [def_pack]189709 +Ref: [def_pack_index]189832 +Ref: [def_pathspec]190013 +Ref: [def_parent]195356 +Ref: [def_pickaxe]195535 +Ref: [def_plumbing]195925 +Ref: [def_porcelain]195989 +Ref: [def_per_worktree_ref]196261 +Ref: [def_pseudoref]196519 +Ref: [def_pull]197410 +Ref: [def_push]197586 +Ref: [def_reachable]198210 +Ref: [def_rebase]198749 +Ref: [def_ref]198931 +Ref: [def_reflog]199697 +Ref: [def_refspec]199977 +Ref: [def_remote]200160 +Ref: [def_remote_tracking_branch]200399 +Ref: [def_repository]200876 +Ref: [def_resolve]201337 +Ref: [def_revision]201454 +Ref: [def_rewind]201523 +Ref: [def_SCM]201680 +Ref: [def_SHA1]201721 +Ref: [def_shallow_clone]201887 +Ref: [def_shallow_repository]202097 +Ref: [def_stash]202846 +Ref: [def_submodule]203029 +Ref: [def_superproject]203241 +Ref: [def_symref]203545 +Ref: [def_tag]203936 +Ref: [def_tag_object]204545 +Ref: [def_topic_branch]204854 +Ref: [def_tree]205198 +Ref: [def_tree_object]205450 +Ref: [def_tree-ish]205703 +Ref: [def_unmerged_index]206327 +Ref: [def_unreachable_object]206451 +Ref: [def_upstream_branch]206655 +Ref: [def_working_tree]206987 +Node: Git Quick Reference207223 +Node: Creating a new repository <1>207713 +Node: Managing branches208146 +Node: Exploring history210068 +Node: Making changes211701 +Node: Merging212411 +Node: Sharing your changes212786 +Node: Repository maintenance213906 +Node: Notes and todo list for this manual214157 +Node: Todo list214376 + +End Tag Table + + +Local Variables: +coding: utf-8 +End: diff --git a/assets/info/gnupg.info-1.gz b/assets/info/gnupg.info-1.gz new file mode 100644 index 00000000..3eba6fe1 Binary files /dev/null and b/assets/info/gnupg.info-1.gz differ diff --git a/assets/info/gnupg.info-2.gz b/assets/info/gnupg.info-2.gz new file mode 100644 index 00000000..81c787ac Binary files /dev/null and b/assets/info/gnupg.info-2.gz differ diff --git a/assets/info/gnupg.info-3.gz b/assets/info/gnupg.info-3.gz new file mode 100644 index 00000000..5b8598dc Binary files /dev/null and b/assets/info/gnupg.info-3.gz differ diff --git a/assets/info/gnupg.info.gz b/assets/info/gnupg.info.gz new file mode 100644 index 00000000..03526a54 Binary files /dev/null and b/assets/info/gnupg.info.gz differ diff --git a/assets/info/gnus.info.gz b/assets/info/gnus.info.gz new file mode 100644 index 00000000..dc5dedff Binary files /dev/null and b/assets/info/gnus.info.gz differ diff --git a/assets/info/grep.info.gz b/assets/info/grep.info.gz new file mode 100644 index 00000000..b3227fdf Binary files /dev/null and b/assets/info/grep.info.gz differ diff --git a/assets/info/grub.info-1.gz b/assets/info/grub.info-1.gz new file mode 100644 index 00000000..8564f5ea Binary files /dev/null and b/assets/info/grub.info-1.gz differ diff --git a/assets/info/grub.info-2.gz b/assets/info/grub.info-2.gz new file mode 100644 index 00000000..8b636a67 Binary files /dev/null and b/assets/info/grub.info-2.gz differ diff --git a/assets/info/grub.info.gz b/assets/info/grub.info.gz new file mode 100644 index 00000000..8136b72b Binary files /dev/null and b/assets/info/grub.info.gz differ diff --git a/assets/info/guile.info-1.gz b/assets/info/guile.info-1.gz new file mode 100644 index 00000000..8d9a5817 Binary files /dev/null and b/assets/info/guile.info-1.gz differ diff --git a/assets/info/guile.info-10.gz b/assets/info/guile.info-10.gz new file mode 100644 index 00000000..9a734e04 Binary files /dev/null and b/assets/info/guile.info-10.gz differ diff --git a/assets/info/guile.info-11.gz b/assets/info/guile.info-11.gz new file mode 100644 index 00000000..2945f4fe Binary files /dev/null and b/assets/info/guile.info-11.gz differ diff --git a/assets/info/guile.info-2.gz b/assets/info/guile.info-2.gz new file mode 100644 index 00000000..5568bc7b Binary files /dev/null and b/assets/info/guile.info-2.gz differ diff --git a/assets/info/guile.info-3.gz b/assets/info/guile.info-3.gz new file mode 100644 index 00000000..1d21ebeb Binary files /dev/null and b/assets/info/guile.info-3.gz differ diff --git a/assets/info/guile.info-4.gz b/assets/info/guile.info-4.gz new file mode 100644 index 00000000..c53b8532 Binary files /dev/null and b/assets/info/guile.info-4.gz differ diff --git a/assets/info/guile.info-5.gz b/assets/info/guile.info-5.gz new file mode 100644 index 00000000..62dbcdd6 Binary files /dev/null and b/assets/info/guile.info-5.gz differ diff --git a/assets/info/guile.info-6.gz b/assets/info/guile.info-6.gz new file mode 100644 index 00000000..a8362dd2 Binary files /dev/null and b/assets/info/guile.info-6.gz differ diff --git a/assets/info/guile.info-7.gz b/assets/info/guile.info-7.gz new file mode 100644 index 00000000..d8ba8041 Binary files /dev/null and b/assets/info/guile.info-7.gz differ diff --git a/assets/info/guile.info-8.gz b/assets/info/guile.info-8.gz new file mode 100644 index 00000000..e3134c6d Binary files /dev/null and b/assets/info/guile.info-8.gz differ diff --git a/assets/info/guile.info-9.gz b/assets/info/guile.info-9.gz new file mode 100644 index 00000000..a4863699 Binary files /dev/null and b/assets/info/guile.info-9.gz differ diff --git a/assets/info/guile.info.gz b/assets/info/guile.info.gz new file mode 100644 index 00000000..0929095d Binary files /dev/null and b/assets/info/guile.info.gz differ diff --git a/assets/info/gzip.info.gz b/assets/info/gzip.info.gz new file mode 100644 index 00000000..be72e46b Binary files /dev/null and b/assets/info/gzip.info.gz differ diff --git a/assets/info/history.info.gz b/assets/info/history.info.gz new file mode 100644 index 00000000..7f0dd3e5 Binary files /dev/null and b/assets/info/history.info.gz differ diff --git a/assets/info/htmlfontify.info.gz b/assets/info/htmlfontify.info.gz new file mode 100644 index 00000000..ac5e1220 Binary files /dev/null and b/assets/info/htmlfontify.info.gz differ diff --git a/assets/info/idlwave.info.gz b/assets/info/idlwave.info.gz new file mode 100644 index 00000000..5bfbf2b7 Binary files /dev/null and b/assets/info/idlwave.info.gz differ diff --git a/assets/info/ido.info.gz b/assets/info/ido.info.gz new file mode 100644 index 00000000..5dc3db1d Binary files /dev/null and b/assets/info/ido.info.gz differ diff --git a/assets/info/info.info.gz b/assets/info/info.info.gz new file mode 100644 index 00000000..ff29aae3 Binary files /dev/null and b/assets/info/info.info.gz differ diff --git a/assets/info/latex2e.info b/assets/info/latex2e.info new file mode 100644 index 00000000..a547df1e --- /dev/null +++ b/assets/info/latex2e.info @@ -0,0 +1,21120 @@ +This is latex2e.info, produced by makeinfo version 6.8 from +latex2e.texi. + +This document is an unofficial reference manual for LaTeX, a document +preparation system, version of January 2022. + + This manual was originally translated from 'LATEX.HLP' v1.0a in the +VMS Help Library. The pre-translation version was written by George D. +Greenwade of Sam Houston State University. The LaTeX 2.09 version was +written by Stephen Gilmore. The LaTeX2e version was adapted from this +by Torsten Martinsen. Karl Berry made further updates and additions, +and gratefully acknowledges using 'Hypertext Help with LaTeX', by +Sheldon Green, and 'LaTeX Command Summary' (for LaTeX 2.09) by L. Botway +and C. Biemesderfer (published by the TeX Users Group as 'TeXniques' +number 10), as reference material. We also gratefully acknowledge +additional material appearing in latex2e-reference by Martin Herbert +Dietze. (From these references no text was directly copied.) + + Copyright 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, +2017, 2018, 2019, 2020, 2021, 2022 Karl Berry. +Copyright 1988, 1994, 2007 Stephen Gilmore. +Copyright 1994, 1995, 1996 Torsten Martinsen. + + Permission is granted to make and distribute verbatim copies of this +manual provided the copyright notice and this permission notice are +preserved on all copies. + + Permission is granted to copy and distribute modified versions of +this manual under the conditions for verbatim copying, provided that the +entire resulting derived work is distributed under the terms of a +permission notice identical to this one. + + Permission is granted to copy and distribute translations of this +manual into another language, under the above conditions for modified +versions. +INFO-DIR-SECTION TeX +START-INFO-DIR-ENTRY +* LaTeX2e: (latex2e). Unofficial LaTeX reference manual. +END-INFO-DIR-ENTRY + + +File: latex2e.info, Node: Top, Next: About this document, Up: (dir) + +LaTeX2e: An unofficial reference manual +*************************************** + +This document is an unofficial reference manual (version of January +2022) for LaTeX2e, a document preparation system. + +* Menu: + +* About this document:: Bug reporting, etc. +* Overview:: What is LaTeX? +* Document classes:: Some of the various classes available. +* Fonts:: Italic, bold, typewriter, etc. +* Layout:: Controlling the page layout. +* Sectioning:: Parts, Chapters, Sections, etc. +* Cross references:: Automatic referencing. +* Environments:: Such as enumerate & itemize. +* Line breaking:: Influencing line breaks. +* Page breaking:: Influencing page breaks. +* Footnotes:: How to produce footnotes. +* Definitions:: Define your own commands, etc. +* Counters:: Internal counters used by LaTeX. +* Lengths:: The length commands. +* Making paragraphs:: Paragraph commands. +* Math formulas:: How to create mathematical formulas. +* Modes:: Paragraph, Math or LR modes. +* Page styles:: Various styles of page layout. +* Spaces:: Horizontal and vertical space. +* Boxes:: Making boxes. +* Color:: Defining and using colors. +* Graphics:: Importing graphics from outside LaTeX. +* Special insertions:: Inserting reserved and special characters. +* Splitting the input:: Dealing with big files by splitting. +* Front/back matter:: Tables of contents, glossaries, indexes. +* Letters:: The 'letter' class. +* Input/output:: User interaction. +* Command line interface:: Common command-line options. +* Document templates:: Starter templates for various document classes. +* Index:: General index. + + +File: latex2e.info, Node: About this document, Next: Overview, Prev: Top, Up: Top + +1 About this document +********************* + +This is an unofficial reference manual for the LaTeX2e document +preparation system, which is a macro package for the TeX typesetting +program (*note Overview::). + + This document's home page is ; it has separate +web pages for each topic. Alternatively. + has the entire document on a +single page. For other output formats, the sources, and plenty more +information, see . + + In this document, we will mostly just use 'LaTeX' rather than +'LaTeX2e', since the previous version of LaTeX (2.09) was frozen decades +ago. + + LaTeX is maintained by a group of volunteers +(). The official documentation written by +the LaTeX project is available from their web site. The present +document is completely unofficial and has not been written or reviewed +by the LaTeX maintainers. Do not send bug reports or anything else +about this document to them. Instead, please send all comments to +. + + This document is a reference, not a tutorial. There is a vast array +of other information available about LaTeX, at all levels. Here are a +few introductions. + + + Two pages of recommended references to LaTeX documentation. + + + Writing your first document, with a bit of both text and math. + + + A longer introduction to LaTeX, translated to many languages. + + + Introduction to the TeX system, including LaTeX, with further + references. + + +File: latex2e.info, Node: Overview, Next: Document classes, Prev: About this document, Up: Top + +2 Overview of LaTeX +******************* + +LaTeX is a system for typesetting documents. It was originally created +by Leslie Lamport in 1984, but has been maintained by a group of +volunteers for many years now (). It is +widely used, particularly but not exclusively for mathematical and +technical documents. + + A LaTeX user writes an input file containing text to be typeset along +with interspersed commands. The default encoding for the text is UTF-8 +(as of 2018). The commands specify, for example, how the text should be +formatted. + + LaTeX is implemented as a set of related so-called "macros" which use +Donald E. Knuth's TeX typesetting program or one of its derivatives, +collectively known as "engines". Thus, the user produces output, +typically PDF, by giving the input file to a TeX engine. (The following +sections describe all this in more detail.) + + The term LaTeX is also sometimes used to mean the language in which +the input document is marked up, that is, to mean the set of commands +available to a LaTeX user. + + The name LaTeX is short for "Lamport TeX". It is pronounced LAH-teck +or LAY-teck, or sometimes LAY-tecks. Inside a document, produce the +logo with '\LaTeX'. Where use of the logo is not sensible, such as in +plain text, write it as 'LaTeX'. + +* Menu: + +* Starting and ending:: The standard beginning and end of a document. +* Output files:: Files produced. +* TeX engines:: Programs that can compile TeX and LaTeX. +* LaTeX command syntax:: General syntax of LaTeX commands. +* Environment:: An area of the source with distinct behavior. +* CTAN:: Our repository. + + +File: latex2e.info, Node: Starting and ending, Next: Output files, Up: Overview + +2.1 Starting and ending +======================= + +LaTeX files have a simple global structure, with a standard beginning +and ending. This is a small example. + + \documentclass{article} + \begin{document} + Hello, \LaTeX\ world. + \end{document} + +Every LaTeX document has a '\begin{document}' line and an +'\end{document}' line. + +Here, the 'article' is the "document class". It is implemented in a +file 'article.cls'. You can use any document class on your system. A +few document classes are defined by LaTeX itself, and vast array of +others are widely available. *Note Document classes::. + + You can include other LaTeX commands between the '\documentclass' and +the '\begin{document}' commands. This area is called the "preamble". + + The '\begin{document}', '\end{document}' pair defines an +"environment"; the 'document' environment (and no others) is required in +all LaTeX documents (*note document::). LaTeX make available to you +many environments that are documented here (*note Environments::). Many +more are available to you from external packages, most importantly those +available at CTAN (*note CTAN::). + + The following sections discuss how to produce PDF or other output +from a LaTeX input file. + + +File: latex2e.info, Node: Output files, Next: TeX engines, Prev: Starting and ending, Up: Overview + +2.2 Output files +================ + +LaTeX produces a main output file and at least two auxiliary files. The +main output file's name ends in either '.dvi' or '.pdf'. + +'.dvi' + If LaTeX is invoked with the system command 'latex' then it + produces a DeVice Independent file, with extension '.dvi'. You can + view this file with a command such as 'xdvi', or convert it to a + PostScript '.ps' file with 'dvips' or to a Portable Document Format + '.pdf' file with 'dvipdfmx'. The contents of the file can be + dumped in human-readable form with 'dvitype'. A vast array of + other DVI utility programs are available + (). + +'.pdf' + If LaTeX is invoked via the system command 'pdflatex', among other + commands (*note TeX engines::), then the main output is a Portable + Document Format (PDF) file. Typically this is a self-contained + file, with all fonts and images included. + + LaTeX always produces at least two additional files. + +'.log' + This transcript file contains summary information such as a list of + loaded packages. It also includes diagnostic messages and perhaps + additional information for any errors. + +'.aux' + Auxiliary information is used by LaTeX for things such as cross + references. For example, the first time that LaTeX finds a forward + reference--a cross reference to something that has not yet appeared + in the source--it will appear in the output as a doubled question + mark '??'. When the referred-to spot does eventually appear in the + source then LaTeX writes its location information to this '.aux' + file. On the next invocation, LaTeX reads the location information + from this file and uses it to resolve the reference, replacing the + double question mark with the remembered location. + + LaTeX may produce yet more files, characterized by the filename +ending. These include a '.lof' file that is used to make a list of +figures, a '.lot' file used to make a list of tables, and a '.toc' file +used to make a table of contents (*note Table of contents etc.::). A +particular class may create others; the list is open-ended. + + +File: latex2e.info, Node: TeX engines, Next: LaTeX command syntax, Prev: Output files, Up: Overview + +2.3 TeX engines +=============== + +LaTeX is a large set of commands that is executed by a TeX program +(*note Overview::). Such a set of commands is called a "format", and is +embodied in a binary '.fmt' file, which can be read much more quickly +than the corresponding TeX source. + + This section gives a terse overview of the TeX programs that are +commonly available (see also *note Command line interface::). + +'latex' +'pdflatex' + In TeX Live (), if LaTeX is invoked via + either the system command 'latex' or 'pdflatex', then the pdfTeX + engine is run (). When invoked as + 'latex', the main output is a '.dvi' file; as 'pdflatex', the main + output is a '.pdf' file. + + pdfTeX incorporates the e-TeX extensions to Knuth's original + program (), including additional + programming features and bi-directional typesetting, and has plenty + of extensions of its own. e-TeX is available on its own as the + system command 'etex', but this is plain TeX (and produces '.dvi'). + + In other TeX distributions, 'latex' may invoke e-TeX rather than + pdfTeX. In any case, the e-TeX extensions can be assumed to be + available in LaTeX. + +'lualatex' + If LaTeX is invoked via the system command 'lualatex', the LuaTeX + engine is run (). This program allows + code written in the scripting language Lua () to + interact with TeX's typesetting. LuaTeX handles UTF-8 Unicode + input natively, can handle OpenType and TrueType fonts, and + produces a '.pdf' file by default. There is also 'dvilualatex' to + produce a '.dvi' file. + +'xelatex' + If LaTeX is invoked with the system command 'xelatex', the XeTeX + engine is run (). Like LuaTeX, XeTeX + natively supports UTF-8 Unicode and TrueType and OpenType fonts, + though the implementation is completely different, mainly using + external libraries instead of internal code. XeTeX produces a + '.pdf' file as output; it does not support DVI output. + + Internally, XeTeX creates an '.xdv' file, a variant of DVI, and + translates that to PDF using the ('x')'dvipdfmx' program, but this + process is automatic. The '.xdv' file is only useful for + debugging. + +'platex' +'uplatex' + These commands provide significant additional support for Japanese + and other languages; the 'u' variant supports Unicode. See + and . + + As of 2019, there is a companion '-dev' command and format for all of +the above: + +'dvilualatex-dev' +'latex-dev' +'lualatex-dev' +'pdflatex-dev' +'platex-dev' +'uplatex-dev' +'xelatex-dev' + These are candidates for an upcoming LaTeX release. The main + purpose is to find and address compatibility problems before an + official release. + + These '-dev' formats make it easy for anyone to help test documents + and code: you can run, say, 'pdflatex-dev' instead of 'pdflatex', + without changing anything else in your environment. Indeed, it is + easiest and most helpful to always run the '-dev' versions instead + of bothering to switch back and forth. During quiet times after a + release, the commands will be equivalent. + + These are not daily snapshots or untested development code. They + undergo the same extensive regression testing by the LaTeX team + before being released. + + For more information, see "The LaTeX release workflow and the LaTeX + 'dev' formats" by Frank Mittelbach, 'TUGboat' 40:2, + . + + +File: latex2e.info, Node: LaTeX command syntax, Next: Environment, Prev: TeX engines, Up: Overview + +2.4 LaTeX command syntax +======================== + +In the LaTeX input file, a command name starts with a backslash +character, '\'. The name itself then consists of either (a) a string of +letters or (b) a single non-letter. + + LaTeX commands names are case sensitive so that '\pagebreak' differs +from '\Pagebreak' (the latter is not a standard command). Most command +names are lowercase, but in any event you must enter all commands in the +same case as they are defined. + + A command may be followed by zero, one, or more arguments. These +arguments may be either required or optional. Required arguments are +contained in curly braces, '{...}'. Optional arguments are contained in +square brackets, '[...]'. Generally, but not universally, if the +command accepts an optional argument, it comes first, before any +required arguments. + + Inside of an optional argument, to use the character close square +bracket (']') hide it inside curly braces, as in '\item[closing bracket +{]}]'. Similarly, if an optional argument comes last, with no required +argument after it, then to make the first character of the following +text be an open square bracket, hide it inside curly braces. + + LaTeX has the convention that some commands have a '*' form that is +related to the form without a '*', such as '\chapter' and '\chapter*'. +The exact difference in behavior varies from command to command. + + This manual describes all accepted options and '*'-forms for the +commands it covers (barring unintentional omissions, a.k.a. bugs). + + As of the 2020-10-01 release of LaTeX, the 'expl3' and 'xparse' +packages are part of the LaTeX2e format. They provide a completely +different underlying programming language syntax. We won't try to cover +them in this document; see the related package documentation and other +LaTeX manuals. + + +File: latex2e.info, Node: Environment, Next: CTAN, Prev: LaTeX command syntax, Up: Overview + +2.5 Environment +=============== + +Synopsis: + + \begin{ENVIRONMENT-NAME} + ... + \end{ENVIRONMENT-NAME} + + An "environment" is an area of LaTeX source, inside of which there is +a distinct behavior. For instance, for poetry in LaTeX put the lines +between '\begin{verse}' and '\end{verse}'. + + \begin{verse} + There once was a man from Nantucket \\ + ... + \end{verse} + + *Note Environments::, for a list of environments. Particularly +notable is that every LaTeX document must have a 'document' environment, +a '\begin{document} ... \end{document}' pair. + + The ENVIRONMENT-NAME at the beginning must exactly match that at the +end. This includes the case where ENVIRONMENT-NAME ends in a +star ('*'); both the '\begin' and '\end' texts must include the star. + + Environments may have arguments, including optional arguments. This +example produces a table. The first argument is optional (and causes +the table to be aligned on its top row) while the second argument is +required (it specifies the formatting of columns). + + \begin{tabular}[t]{r|l} + ... rows of table ... + \end{tabular} + + +File: latex2e.info, Node: CTAN, Prev: Environment, Up: Overview + +2.6 CTAN: The Comprehensive TeX Archive Network +=============================================== + +The Comprehensive TeX Archive Network, CTAN, is the TeX and LaTeX +community's repository of free material. It is a set of Internet sites +around the world that offer material related to LaTeX for download. +Visit CTAN on the web at . + + This material is organized into packages, discrete bundles that +typically offer some coherent functionality and are maintained by one +person or a small number of people. For instance, many publishers have +a package that allows authors to format papers to that publisher's +specifications. + + In addition to the massive holdings, the 'ctan.org' web site offers +features such as search by name or by functionality. + + CTAN is not a single host, but instead is a set of hosts, one of +which is the so-called "master". The master host actively manages the +material, for instance, by accepting uploads of new or updated packages. +For many years, it has been hosted by the German TeX group, DANTE e.V. + + Other sites around the world help out by mirroring, that is, +automatically syncing their collections with the master site and then in +turn making their copies publicly available. This gives users close to +their location better access and relieves the load on the master site. +The list of mirrors is at . + + +File: latex2e.info, Node: Document classes, Next: Fonts, Prev: Overview, Up: Top + +3 Document classes +****************** + +The document's overall class is defined with this command, which is +normally the first command in a LaTeX source file. + + \documentclass[OPTIONS]{CLASS} + + The following document CLASS names are built into LaTeX. (Many other +document classes are available as separate packages; *note Overview::.) + +'article' + For a journal article, a presentation, and miscellaneous general + use. + +'book' + Full-length books, including chapters and possibly including front + matter, such as a preface, and back matter, such as an appendix + (*note Front/back matter::). + +'letter' + Mail, optionally including mailing labels (*note Letters::). + +'report' + For documents of length between an 'article' and a 'book', such as + technical reports or theses, which may contain several chapters. + +'slides' + For slide presentations--rarely used nowadays. The 'beamer' + package is perhaps the most prevalent + (). *Note beamer template::, for a + small template for a beamer document. + + Standard OPTIONS are described in the next section. + +* Menu: + +* Document class options:: Global options. +* Additional packages:: Bring in packages. +* Class and package construction:: Create new classes and packages. + + +File: latex2e.info, Node: Document class options, Next: Additional packages, Up: Document classes + +3.1 Document class options +========================== + +You can specify "global options" or "class options" to the +'\documentclass' command by enclosing them in square brackets. To +specify more than one OPTION, separate them with a comma. + + \documentclass[OPTION1,OPTION2,...]{CLASS} + + Here is the list of the standard class options. + + All of the standard classes except 'slides' accept the following +options for selecting the typeface size (default is '10pt'): + + 10pt 11pt 12pt + + All of the standard classes accept these options for selecting the +paper size (these show height by width): + +'a4paper' + 210 by 297mm (about 8.25 by 11.75 inches) + +'a5paper' + 148 by 210mm (about 5.8 by 8.3 inches) + +'b5paper' + 176 by 250mm (about 6.9 by 9.8 inches) + +'executivepaper' + 7.25 by 10.5 inches + +'legalpaper' + 8.5 by 14 inches + +'letterpaper' + 8.5 by 11 inches (the default) + + When using one of the engines pdfLaTeX, LuaLaTeX, or XeLaTeX (*note +TeX engines::), options other than 'letterpaper' set the print area but +you must also set the physical paper size. One way to do that is to put +'\pdfpagewidth=\paperwidth' and '\pdfpageheight=\paperheight' in your +document's preamble. + + The 'geometry' package provides flexible ways of setting the print +area and physical page size. + + Miscellaneous other options: + +'draft' +'final' + Mark ('draft') or do not mark ('final') overfull boxes with a black + box in the margin; default is 'final'. + +'fleqn' + Put displayed formulas flush left; default is centered. + +'landscape' + Selects landscape format; default is portrait. + +'leqno' + Put equation numbers on the left side of equations; default is the + right side. + +'openbib' + Use "open" bibliography format. + +'titlepage' +'notitlepage' + Specifies whether there is a separate page for the title + information and for the abstract also, if there is one. The + default for the 'report' class is 'titlepage', for the other + classes it is 'notitlepage'. + + The following options are not available with the 'slides' class. + +'onecolumn' +'twocolumn' + Typeset in one or two columns; default is 'onecolumn'. + +'oneside' +'twoside' + Selects one- or two-sided layout; default is 'oneside', except that + in the 'book' class the default is 'twoside'. + + For one-sided printing, the text is centered on the page. For + two-sided printing, the '\evensidemargin' ('\oddsidemargin') + parameter determines the distance on even (odd) numbered pages + between the left side of the page and the text's left margin, with + '\oddsidemargin' being 40% of the difference between '\paperwidth' + and '\textwidth', and '\evensidemargin' is the remainder. + +'openright' +'openany' + Determines if a chapter should start on a right-hand page; default + is 'openright' for 'book', and 'openany' for 'report'. + + The 'slides' class offers the option 'clock' for printing the time at +the bottom of each note. + + +File: latex2e.info, Node: Additional packages, Next: Class and package construction, Prev: Document class options, Up: Document classes + +3.2 Additional packages +======================= + +Load a package PKG, with the package options given in the +comma-separated list OPTIONS, as here. + + \usepackage[OPTIONS]{PKG}. + + To specify more than one package you can separate them with a comma, +as in '\usepackage{PKG1,PKG2,...}', or use multiple '\usepackage' +commands. + + Any options given in the '\documentclass' command that are unknown to +the selected document class are passed on to the packages loaded with +'\usepackage'. + + +File: latex2e.info, Node: Class and package construction, Prev: Additional packages, Up: Document classes + +3.3 Class and package construction +================================== + +You can create new document classes and new packages. For instance, if +your memos must satisfy some local requirements, such as a standard +header for each page, then you could create a new class 'smcmemo.cls' +and begin your documents with '\documentclass{smcmemo}'. + + What separates a package from a document class is that the commands +in a package are useful across classes while those in a document class +are specific to that class. Thus, a command to set page headers is for +a package while a command to make the page headers say 'Memo from the +SMC Math Department' is for a class. + + Inside of a class or package file you can use the at-sign '@' as a +character in command names without having to surround the code +containing that command with '\makeatletter' and '\makeatother'. *Note +\makeatletter & \makeatother::. This allow you to create commands that +users will not accidentally redefine. Another technique is to preface +class- or package-specific commands with some string to prevent your +class or package from interfering with others. For instance, the class +'smcmemo' might have commands '\smc@tolist', '\smc@fromlist', etc. + +* Menu: + +* Class and package structure:: Layout of the file. +* Class and package commands:: List of commands. + + +File: latex2e.info, Node: Class and package structure, Next: Class and package commands, Up: Class and package construction + +3.3.1 Class and package structure +--------------------------------- + +A class file or package file typically has four parts. + 1. In the "identification part", the file says that it is a LaTeX + package or class and describes itself, using the '\NeedsTeXFormat' + and '\ProvidesClass' or '\ProvidesPackage' commands. + + 2. The "preliminary declarations part" declares some commands and can + also load other files. Usually these commands will be those needed + for the code used in the next part. For example, an 'smcmemo' + class might be called with an option to read in a file with a list + of people for the to-head, as '\documentclass[mathto]{smcmemo}', + and therefore needs to define a command + '\newcommand{\setto}[1]{\def\@tolist{#1}}' used in that file. + + 3. In the "handle options part" the class or package declares and + processes its options. Class options allow a user to start their + document as '\documentclass[OPTION LIST]{CLASS NAME}', to modify + the behavior of the class. An example is when you declare + '\documentclass[11pt]{article}' to set the default document font + size. + + 4. Finally, in the "more declarations part" the class or package + usually does most of its work: declaring new variables, commands + and fonts, and loading other files. + + Here is a starting class file, which should be saved as 'stub.cls' +where LaTeX can find it, for example in the same directory as the '.tex' +file. + + \NeedsTeXFormat{LaTeX2e} + \ProvidesClass{stub}[2017/07/06 stub to start building classes from] + \DeclareOption*{\PassOptionsToClass{\CurrentOption}{article}} + \ProcessOptions\relax + \LoadClass{article} + +It identifies itself, handles the class options via the default of +passing them all to the 'article' class, and then loads the 'article' +class to provide the basis for this class's code. + + For more, see the official guide for class and package writers, the +Class Guide, at + (much of +the descriptions here derive from this document), or the tutorial +. + + +File: latex2e.info, Node: Class and package commands, Prev: Class and package structure, Up: Class and package construction + +3.3.2 Class and package commands +-------------------------------- + +These are the commands designed to help writers of classes or packages. + +'\AtBeginDvi{specials}' + Save in a box register things that are written to the '.dvi' file + at the beginning of the shipout of the first page of the document. + +'\AtEndOfClass{CODE}' +'\AtEndOfPackage{CODE}' + Hook to insert CODE to be executed when LaTeX finishes processing + the current class or package. You can use these hooks multiple + times; the 'code' will be executed in the order that you called it. + See also *note \AtBeginDocument::. + +'\CheckCommand{CMD}[NUM][DEFAULT]{DEFINITION}' +'\CheckCommand*{CMD}[NUM][DEFAULT]{DEFINITION}' + Like '\newcommand' (*note \newcommand & \renewcommand::) but does + not define CMD; instead it checks that the current definition of + CMD is exactly as given by DEFINITION and is or is not "long" as + expected. A long command is a command that accepts '\par' within + an argument. The CMD command is expected to be long with the + unstarred version of '\CheckCommand'. Raises an error when the + check fails. This allows you to check before you start redefining + 'cmd' yourself that no other package has already redefined this + command. + +'\ClassError{CLASS NAME}{ERROR TEXT}{HELP TEXT}' +'\PackageError{PACKAGE NAME}{ERROR TEXT}{HELP TEXT}' +'\ClassWarning{CLASS NAME}{WARNING TEXT}' +'\PackageWarning{PACKAGE NAME}{WARNING TEXT}' +'\ClassWarningNoLine{CLASS NAME}{WARNING TEXT}' +'\PackageWarningNoLine{PACKAGE NAME}{WARNING TEXT}' +'\ClassInfo{CLASS NAME}{INFO TEXT}' +'\PackageInfo{PACKAGE NAME}{INFO TEXT}' +'\ClassInfoNoLine{CLASS NAME}{INFO TEXT}' +'\PackageInfoNoLine{PACKAGE NAME}{INFO TEXT}' + Produce an error message, or warning or informational messages. + + For '\ClassError' and '\PackageError' the message is ERROR TEXT, + followed by TeX's '?' error prompt. If the user then asks for help + by typing 'h', they see the HELP TEXT. + + The four warning commands are similar except that they write + WARNING TEXT on the screen with no error prompt. The four info + commands write INFO TEXT only in the transcript file. The 'NoLine' + versions do not show the number of the line generating the message, + while the other versions do show that number. + + To format the messages, including the HELP TEXT: use '\protect' to + stop a command from expanding, get a line break with + '\MessageBreak', and get a space with '\space' when a space + character does not allow it, like after a command. Note that LaTeX + appends a period to the messages. + +'\CurrentOption' + Expands to the name of the currently-being-processed option. Can + only be used within the CODE argument of either '\DeclareOption' or + '\DeclareOption*'. + +'\DeclareOption{OPTION}{CODE}' +'\DeclareOption*{CODE}' + Make an option available to a user to invoke in their + '\documentclass' command. For example, the 'smcmemo' class could + have an option '\documentclass[logo]{smcmemo}' allowing users to + put the institutional logo on the first page. The class file must + contain '\DeclareOption{logo}{CODE}' (and later, + '\ProcessOptions'). + + If you request an option that has not been declared, by default + this will produce a warning like 'Unused global option(s): + [badoption].' Change this behavior with the starred version + '\DeclareOption*{CODE}'. For example, many classes extend an + existing class, using a command such as '\LoadClass{article}', and + for passing extra options to the underlying class use code such as + this. + + \DeclareOption*{% + \PassOptionsToClass{\CurrentOption}{article}% + } + + Another example is that the class 'smcmemo' may allow users to keep + lists of memo recipients in external files. Then the user could + invoke '\documentclass[math]{smcmemo}' and it will read the file + 'math.memo'. This code handles the file if it exists and otherwise + passes the option to the 'article' class. + + \DeclareOption*{\InputIfFileExists{\CurrentOption.memo}{}{% + \PassOptionsToClass{\CurrentOption}{article}}} + +'\DeclareRobustCommand{CMD}[NUM][DEFAULT]{DEFINITION}' +'\DeclareRobustCommand*{CMD}[NUM][DEFAULT]{DEFINITION}' + Like '\newcommand' and '\newcommand*' (*note \newcommand & + \renewcommand::) but these declare a robust command, even if some + code within the DEFINITION is fragile. (For a discussion of robust + and fragile commands *note \protect::.) Use this command to define + new robust commands or to redefine existing commands and make them + robust. Unlike '\newcommand' these do not give an error if macro + CMD already exists; instead, a log message is put into the + transcript file if a command is redefined. + + Commands defined this way are a bit less efficient than those + defined using '\newcommand' so unless the command's data is fragile + and the command is used within a moving argument, use + '\newcommand'. + + The 'etoolbox' package offers the commands '\newrobustcmd', + '\newrobustcmd*', as well as the commands '\renewrobustcmd', + '\renewrobustcmd*', and the commands '\providerobustcmd', and + '\providerobustcmd*'. These are similar to '\newcommand', + '\newcommand*', '\renewcommand', '\renewcommand*', + '\providecommand', and '\providecommand*', but define a robust CMD + with two advantages as compared to '\DeclareRobustCommand': + 1. They use the low-level e-TeX protection mechanism rather than + the higher level LaTeX '\protect' mechanism, so they do not + incur the slight loss of performance mentioned above, and + 2. They make the same distinction between '\new...', '\renew...', + and '\provide...', as the standard commands, so they do not + just make a log message when you redefine CMD that already + exists, in that case you need to use either '\renew...' or + '\provide...' or you get an error. + +'\IfFileExists{FILENAME}{TRUE CODE}{FALSE CODE}' +'\InputIfFileExists{FILENAME}{TRUE CODE}{FALSE CODE}' + Execute TRUE CODE if LaTeX finds the file 'FILE NAME' or FALSE CODE + otherwise. In the first case it executing TRUE CODE and then + inputs the file. Thus the command + + \IfFileExists{img.pdf}{% + \includegraphics{img.pdf}}{\typeout{!! img.pdf not found} + + will include the graphic 'img.pdf' if it is found and otherwise + give a warning. + + This command looks for the file in all search paths that LaTeX + uses, not only in the current directory. To look only in the + current directory do something like '\IfFileExists{./filename}{TRUE + CODE}{FALSE CODE}'. If you ask for a filename without a '.tex' + extension then LaTeX will first look for the file by appending the + '.tex'; for more on how LaTeX handles file extensions see *note + \input::. + +'\LoadClass[OPTIONS LIST]{CLASS NAME}[RELEASE DATE]' +'\LoadClassWithOptions{CLASS NAME}[RELEASE DATE]' + Load a class, as with '\documentclass[OPTIONS LIST]{CLASS + NAME}[RELEASE INFO]'. An example is + '\LoadClass[twoside]{article}'. + + The OPTIONS LIST, if present, is a comma-separated list. The + RELEASE DATE is optional. If present it must have the form + YYYY/MM/DD. + + If you request a RELEASE DATE and the date of the package installed + on your system is earlier, then you get a warning on the screen and + in the log like this. + + You have requested, on input line 4, version `2038/01/19' of + document class article, but only version `2014/09/29 v1.4h + Standard LaTeX document class' is available. + + The command version '\LoadClassWithOptions' uses the list of + options for the current class. This means it ignores any options + passed to it via '\PassOptionsToClass'. This is a convenience + command that lets you build classes on existing ones, such as the + standard 'article' class, without having to track which options + were passed. + +'\ExecuteOptions{OPTIONS-LIST}' + For each option OPTION in the OPTIONS-LIST, in order, this command + executes the command '\ds@OPTION'. If this command is not defined + then that option is silently ignored. + + It can be used to provide a default option list before + '\ProcessOptions'. For example, if in a class file you want the + default to be 11pt fonts then you could specify + '\ExecuteOptions{11pt}\ProcessOptions\relax'. + +'\NeedsTeXFormat{FORMAT}[FORMAT DATE]' + Specifies the format that this class must be run under. Often + issued as the first line of a class file, and most often used as: + '\NeedsTeXFormat{LaTeX2e}'. When a document using that class is + processed, the format name given here must match the format that is + actually being run (including that the FORMAT string is case + sensitive). If it does not match then execution stops with an + error like 'This file needs format `LaTeX2e' but this is `xxx'.' + + To specify a version of the format that you know to have certain + features, include the optional FORMAT DATE on which those features + were implemented. If present it must be in the form 'YYYY/MM/DD'. + If the format version installed on your system is earlier than + FORMAT DATE then you get a warning like this. + + You have requested release `2038/01/20' of LaTeX, but only + release `2016/02/01' is available. + +'\OptionNotUsed' + Adds the current option to the list of unused options. Can only be + used within the CODE argument of either '\DeclareOption' or + '\DeclareOption*'. + +'\PassOptionsToClass{OPTION LIST}{CLASS NAME}' +'\PassOptionsToPackage{OPTION LIST}{PACKAGE NAME}' + Adds the options in the comma-separated list OPTION LIST to the + options used by any future '\RequirePackage' or '\usepackage' + command for package PACKAGE NAME or the class CLASS NAME. + + The reason for these commands is: you may load a package any number + of times with no options but if you want options then you may only + supply them when you first load the package. Loading a package + with options more than once will get you an error like 'Option + clash for package foo.' (LaTeX throws an error even if there is no + conflict between the options.) + + If your own code is bringing in a package twice then you can + collapse that to once, for example replacing the two + '\RequirePackage[landscape]{geometry}' and + '\RequirePackage[margins=1in]{geometry}' with the single command + '\RequirePackage[landscape,margins=1in]{geometry}'. + + However, imagine that you are loading 'firstpkg' and inside that + package it loads 'secondpkg', and you need the second package to be + loaded with option 'draft'. Then before doing the first package + you must queue up the options for the second package, like this. + + \PassOptionsToPackage{draft}{secondpkg} + \RequirePackage{firstpkg} + + (If 'firstpkg.sty' loads an option in conflict with what you want + then you may have to alter its source.) + + These commands are useful for general users as well as class and + package writers. For instance, suppose a user wants to load the + 'graphicx' package with the option 'draft' and also wants to use a + class 'foo' that loads the 'graphicx' package, but without that + option. The user could start their LaTeX file with + '\PassOptionsToPackage{draft}{graphicx}\documentclass{foo}'. + +'\ProcessOptions' +'\ProcessOptions*\@OPTIONS' + Execute the code for each option that the user has invoked. + Include it in the class file as '\ProcessOptions\relax' (because of + the existence of the starred command). + + Options come in two types. "Local options" have been specified for + this particular package in the OPTIONS argument of + '\PassOptionsToPackage{OPTIONS}', '\usepackage[OPTIONS]', or + '\RequirePackage[OPTIONS]'. "Global options" are those given by + the class user in '\documentclass[OPTIONS]' (If an option is + specified both locally and globally then it is local.) + + When '\ProcessOptions' is called for a package 'pkg.sty', the + following happens: + 1. For each option OPTION so far declared with '\DeclareOption', + it looks to see if that option is either a global or a local + option for 'pkg'. If so then it executes the declared code. + This is done in the order in which these options were given in + 'pkg.sty'. + 2. For each remaining local option, it executes the command + '\ds@'OPTION if it has been defined somewhere (other than by a + '\DeclareOption'); otherwise, it executes the default option + code given in '\DeclareOption*'. If no default option code + has been declared then it gives an error message. This is + done in the order in which these options were specified. + + When '\ProcessOptions' is called for a class it works in the same + way except that all options are local, and the default CODE for + '\DeclareOption*' is '\OptionNotUsed' rather than an error. + + The starred version '\ProcessOptions*' executes the options in the + order specified in the calling commands, rather than in the order + of declaration in the class or package. For a package this means + that the global options are processed first. + +'\ProvidesClass{CLASS NAME}[RELEASE DATE BRIEF ADDITIONAL INFORMATION]' +'\ProvidesClass{CLASS NAME}[RELEASE DATE]' +'\ProvidesPackage{PACKAGE NAME}[RELEASE DATE BRIEF ADDITIONAL INFORMATION]' +'\ProvidesPackage{PACKAGE NAME}[RELEASE DATE]' + Identifies the class or package, printing a message to the screen + and the log file. + + When you load a class or package, for example with + '\documentclass{smcmemo}' or '\usepackage{test}', LaTeX inputs a + file. If the name of the file does not match the class or package + name declared in it then you get a warning. Thus, if you invoke + '\documentclass{smcmemo}', and the file 'smcmemo.cls' has the + statement '\ProvidesClass{xxx}' then you get a warning like 'You + have requested document class `smcmemo', but the document class + provides 'xxx'.' This warning does not prevent LaTeX from + processing the rest of the class file normally. + + If you include the optional argument then you must include a date, + before any spaces, of the form 'YYYY/MM/DD'. The rest of the + optional argument is free-form, although it traditionally + identifies the class, and is written to the screen during + compilation and to the log file. Thus, if your file 'smcmemo.cls' + contains the line '\ProvidesClass{smcmemo}[2008/06/01 v1.0 SMC memo + class]' and your document's first line is '\documentclass{smcmemo}' + then you will see 'Document Class: smcmemo 2008/06/01 v1.0 SMC memo + class'. + + The date in the optional argument allows class and package users to + ask to be warned if the version of the class or package is earlier + than RELEASE DATE. For instance, a user could enter + '\documentclass{smcmemo}[2018/10/12]' or + '\usepackage{foo}[[2017/07/07]]' to require a class or package with + certain features by specifying that it must be released no earlier + than the given date. (Although, in practice package users only + rarely include a date, and class users almost never do.) + +'\ProvidesFile{FILENAME}[ADDITIONAL INFORMATION]' + Declare a file other than the main class and package files, such as + configuration files or font definition files. Put this command in + that file and you get in the log a string like 'File: test.config + 2017/10/12 config file for test.cls' for FILENAME equal to + 'test.config' and ADDITIONAL INFORMATION equal to '2017/10/12 + config file for test.cls'. + +'\RequirePackage[OPTION LIST]{PACKAGE NAME}[RELEASE DATE]' +'\RequirePackageWithOptions{PACKAGE NAME}[RELEASE DATE]' + Load a package, like the command '\usepackage' (*note Additional + packages::). The LaTeX development team strongly recommends use of + these commands over Plain TeX's '\input'; see the Class Guide. An + example is '\RequirePackage[landscape,margin=1in]{geometry}'. + + The OPTION LIST, if present, is a comma-separated list. The + RELEASE DATE, if present, must have the form YYYY/MM/DD. If the + release date of the package as installed on your system is earlier + than RELEASE DATE then you get a warning like 'You have requested, + on input line 9, version `2017/07/03' of package jhtest, but only + version `2000/01/01' is available'. + + The '\RequirePackageWithOptions' version uses the list of options + for the current class. This means it ignores any options passed to + it via '\PassOptionsToClass'. This is a convenience command to + allow easily building classes on existing ones without having to + track which options were passed. + + The difference between '\usepackage' and '\RequirePackage' is + small. The '\usepackage' command is intended for the document file + while '\RequirePackage' is intended for package and class files. + Thus, using '\usepackage' before the '\documentclass' command + causes LaTeX to give error like '\usepackage before + \documentclass', but you can use '\RequirePackage' there. + + +File: latex2e.info, Node: Fonts, Next: Layout, Prev: Document classes, Up: Top + +4 Fonts +******* + +LaTeX comes with powerful font capacities. For one thing, its New Font +Selection Scheme allows you to work easily with the font families in +your document (for instance, see *note Font styles::). And, LaTeX +documents can use most fonts that are available today, including +versions of Times Roman, Helvetica, Courier, etc. (Note, though, that +many fonts do not have support for mathematics.) + + The first typeface in the TeX world was the Computer Modern family, +developed by Donald Knuth. It is the default for LaTeX documents and is +still the most widely used. But changing to another font often only +involves a few commands. For instance, putting the following in your +preamble gives you a Palatino-like font, which is handsome and more +readable online than many other fonts, while still allowing you to +typeset mathematics. (This example is from Michael Sharpe, +.) + + \usepackage[osf]{newpxtext} % osf for text, not math + \usepackage{cabin} % sans serif + \usepackage[varqu,varl]{inconsolata} % sans serif typewriter + \usepackage[bigdelims,vvarbb]{newpxmath} % bb from STIX + \usepackage[cal=boondoxo]{mathalfa} % mathcal + +In addition, the 'xelatex' or 'lualatex' engines allow you to use any +fonts on your system that are in OpenType or TrueType format (*note TeX +engines::). + + The LaTeX Font Catalogue () shows font +sample graphics and copy-and-pasteable source to use many fonts, +including many with support for mathematics. It aims to cover all Latin +alphabet free fonts available for easy use with LaTeX. + + More information is also available from the TeX Users Group, at +. + +* Menu: + +* fontenc package:: Encoding of the font. +* Font styles:: Select roman, italics, etc. +* Font sizes:: Select point size. +* Low-level font commands:: Select encoding, family, series, shape. + + +File: latex2e.info, Node: fontenc package, Next: Font styles, Up: Fonts + +4.1 'fontenc' package +===================== + +Synopsis: + + \usepackage[FONT_ENCODING]{fontenc} + + or + + \usepackage[FONT_ENCODING1, FONT_ENCODING2, ...]{fontenc} + + Specify the font encodings. A font encoding is a mapping of the +character codes to the font glyphs that are used to typeset your output. + + This package only applies if you use the 'pdflatex' engine (*note TeX +engines::). If you use the 'xelatex' or 'lualatex' engine then instead +use the 'fontspec' package. + + TeX's original font family, Computer Modern, has a limited character +set. For instance, to make common accented characters you must use +'\accent' (*note \accent::) but this disables hyphenation. TeX users +have agreed on a number of standards to access the larger sets of +characters provided by modern fonts. If you are using 'pdflatex' then +put this in the preamble + + \usepackage[T1]{fontenc} + +gives you support for the most widespread European languages, including +French, German, Italian, Polish, and others. In particular, if you have +words with accented letters then LaTeX will hyphenate them and your +output can be copied and pasted. (The optional second line allows you +to directly enter accented characters into your source file.) + + If you are using an encoding such as 'T1' and the characters appear +blurry or do not magnify well then your fonts may be bitmapped, +sometimes called raster or Type 3. You want vector fonts. Use a +package such as 'lmodern' or 'cm-super' to get a font that extends +LaTeX's default using vector fonts. + + For each FONT_ENCODING given as an option but not already declared, +this package loads the encoding definition files, named +'FONT_ENCODINGenc.def'. It also sets '\encodingdefault' to be the last +encoding in the option list. + + These are the common values for FONT_ENCODING. + +'OT1' + The original encoding for TeX. Limited to mostly English + characters. + +'OMS, OML' + Math symbols and math letters encoding. + +'T1' + TeX text extended. Sometimes called the Cork encoding for the + Users Group meeting where it was developed. Gives access to most + European accented characters. The most common option for this + package. + +'TS1' + Text Companion encoding. + +LaTeX's default is to load 'OML', 'T1', 'OT1', and then 'OMS', and set +the default to 'OT1'. + + Even if you do not use accented letters, you may need to specify a +font encoding if your font requires it. + + If you use 'T1' encoded fonts other than the default Computer Modern +family then you may need to load the package that selects your fonts +before loading 'fontenc', to prevent the system from loading any +'T1' encoded fonts from the default. + + The LaTeX team reserve encoding names starting with: 'T' for the +standard text encodings with 256 characters, 'TS' for symbols that +extend the corresponding T encodings, 'X' for test encodings, 'M' for +standard math encodings with 256 characters, 'A' for special +applications, 'OT' for standard text encodings with 128 characters, and +'OM' for standard math encodings with 128 characters ('O' stands for +'obsolete'). + + This package provides a number of commands, detailed below. Many of +them are encoding-specific, so if you have defined a command that works +for one encoding but the current encoding is different then the command +is not in effect. + +* Menu: + +* \DeclareFontEncoding:: Define an encoding. +* \DeclareTextAccent:: Define an accent in the encoding. +* \DeclareTextAccentDefault:: Fallback for using an accent in the encoding. +* \DeclareTextCommand & \ProvideTextCommand:: New encoding-specific command. +* \DeclareTextCommandDefault & \ProvideTextCommandDefault:: Fallback for encoding-specific commands. +* \DeclareTextComposite:: Directly access an accented glyph, in the encoding. +* \DeclareTextCompositeCommand:: Run code in slot, in the encoding. +* \DeclareTextSymbol:: Define a symbol in the encoding. +* \DeclareTextSymbolDefault:: Fallback for a symbol in the encoding. +* \LastDeclaredEncoding:: Save most recently declared encoding. +* \UseTextSymbol & \UseTextAccent:: Temporarily switch to another encoding. + + +File: latex2e.info, Node: \DeclareFontEncoding, Next: \DeclareTextAccent, Up: fontenc package + +4.1.1 '\DeclareFontEncoding' +---------------------------- + +Synopsis: + + \DeclareFontEncoding{ENCODING}{TEXT-SETTINGS}{MATH-SETTINGS} + + Declare the font encoding ENCODING. It also saves the value of +ENCODING in '\LastDeclaredEncoding' (*note \LastDeclaredEncoding::). + + The file 't1enc.def' contains this line (followed by many others). + + \DeclareFontEncoding{T1}{}{} + + The TEXT-SETTINGS are the commands that LaTeX will run every time it +switches from one encoding to another with the '\selectfont' and +'\fontencoding' commands. The MATH-SETTINGS are the commands that LaTeX +will use whenever the font is accessed as a math alphabet. + + LaTeX ignores any space characters inside TEXT-SETTINGS and +MATH-SETTINGS, to prevent unintended spaces in the output. + + If you invent an encoding you should pick a two or three letter name +starting with 'L' for 'local', or 'E' for 'experimental'. + + Note that output encoding files may be read several times by LaTeX so +using, e.g., '\newcommand' may cause an error. In addition, such files +should contain '\ProvidesFile' line (*note Class and package +commands::). + + Note also that you should use the '\...Default' commands only in a +package, not in the encoding definition files, since those files should +only contain declarations specific to that encoding. + + +File: latex2e.info, Node: \DeclareTextAccent, Next: \DeclareTextAccentDefault, Prev: \DeclareFontEncoding, Up: fontenc package + +4.1.2 '\DeclareTextAccent' +-------------------------- + +Synopsis: + + \DeclareTextAccent{CMD}{ENCODING}{SLOT} + + Define an accent, to be put on top of other glyphs, in the encoding +ENCODING at the location SLOT. + + A "slot" is the number identifying a glyph within a font. + + This line from 't1enc.def' declares that to make a circumflex accent +as in '\^A', the system will put the accent in slot 2 over the 'A' +character, which is represented in ASCII as 65. (This holds unless +there is a relevant 'DeclareTextComposite' or +'\DeclareTextCompositeCommand' declaration; *note +\DeclareTextComposite::.) + + \DeclareTextAccent{\^}{T1}{2} + + If CMD has already been defined then '\DeclareTextAccent' does not +give an error but it does log the redefinition in the transcript file. + + +File: latex2e.info, Node: \DeclareTextAccentDefault, Next: \DeclareTextCommand & \ProvideTextCommand, Prev: \DeclareTextAccent, Up: fontenc package + +4.1.3 '\DeclareTextAccentDefault' +--------------------------------- + +Synopsis: + + \DeclareTextAccentDefault{\CMD}{ENCODING} + + If there is an encoding-specific accent command \CMD but there is no +associated '\DeclareTextAccent' for that encoding then this command will +pick up the slack, by saying to use it as described for ENCODING. + + For example, to make the encoding 'OT1' be the default encoding for +the accent '\"', declare this. + + \DeclareTextAccentDefault{\"}{OT1} + +If you issue a '\"' when the current encoding does not have a definition +for that accent then LaTeX will use the definition from 'OT1' + + That is, this command is equivalent to this call (*note +\UseTextSymbol & \UseTextAccent::). + + \DeclareTextCommandDefault[1]{\CMD} + {\UseTextAccent{ENCODING}{\CMD}{#1}} + + Note that '\DeclareTextAccentDefault' works for any one-argument +'fontenc' command, not just the accent command. + + +File: latex2e.info, Node: \DeclareTextCommand & \ProvideTextCommand, Next: \DeclareTextCommandDefault & \ProvideTextCommandDefault, Prev: \DeclareTextAccentDefault, Up: fontenc package + +4.1.4 '\DeclareTextCommand' & '\ProvideTextCommand' +--------------------------------------------------- + +Synopsis, one of: + + \DeclareTextCommand{\CMD}{ENCODING}{DEFN} + \DeclareTextCommand{\CMD}{ENCODING}[NARGS]{DEFN} + \DeclareTextCommand{\CMD}{ENCODING}[NARGS][OPTARGDEFAULT]{DEFN} + + or one of: + + \ProvideTextCommand{\CMD}{ENCODING}{DEFN} + \ProvideTextCommand{\CMD}{ENCODING}[NARGS]{DEFN} + \ProvideTextCommand{\CMD}{ENCODING}[NARGS][OPTARGDEFAULT]{DEFN} + + Define the command '\CMD', which will be specific to one encoding. +The command name CMD must be preceded by a backslash, '\'. These +commands can only appear in the preamble. Redefining \CMD does not +cause an error. The defined command will be robust even if the code in +DEFN is fragile (*note \protect::). + + For example, the file 't1enc.def' contains this line. + + \DeclareTextCommand{\textperthousand}{T1}{\%\char 24 } + + With that, you can express parts per thousand. + + \usepackage[T1]{fontenc} % in preamble + ... + Legal limit is \( 0.8 \)\textperthousand. + +If you change the font encoding to 'OT1' then you get an error like +'LaTeX Error: Command \textperthousand unavailable in encoding OT1'. + + The '\ProvideTextCommand' variant does the same, except that it does +nothing if '\CMD' is already defined. The '\DeclareTextSymbol' command +is faster than this one for simple slot-to-glyph association (*note +\DeclareTextSymbol::) + + The optional NARGS and OPTARGDEFAULT arguments play the same role +here as in '\newcommand' (*note \newcommand & \renewcommand::). +Briefly, NARGS is an integer from 0 to 9 specifying the number of +arguments that the defined command '\CMD' takes. This number includes +any optional argument. Omitting this argument is the same as specifying +0, meaning that '\CMD' will have no arguments. And, if OPTARGDEFAULT is +present then the first argument of '\CMD' is optional, with default +value OPTARGDEFAULT (which may be the empty string). If OPTARGDEFAULT +is not present then '\CMD' does not take an optional argument. + + +File: latex2e.info, Node: \DeclareTextCommandDefault & \ProvideTextCommandDefault, Next: \DeclareTextComposite, Prev: \DeclareTextCommand & \ProvideTextCommand, Up: fontenc package + +4.1.5 '\DeclareTextCommandDefault' & '\ProvideTextCommandDefault ' +------------------------------------------------------------------ + +Synopsis: + + \DeclareTextCommandDefault{\CMD}{DEFN} + + or: + + \ProvideTextCommandDefault{\CMD}{DEFN} + + Give a default definition for '\CMD', for when that command is not +defined in the encoding currently in force. This default should only +use encodings known to be available. + + This makes '\copyright' available. + + \DeclareTextCommandDefault{\copyright}{\textcircled{c}} + +It uses only an encoding (OMS) that is always available. + + The '\DeclareTextCommandDefault' should not occur in the encoding +definition files since those files should declare only commands for use +when you select that encoding. It should instead be in a package. + + As with the related non-default commands, the +'\ProvideTextCommandDefault' has exactly the same behavior as +'\DeclareTextCommandDefault' except that it does nothing if '\CMD' is +already defined (*note \DeclareTextCommand & \ProvideTextCommand::). +So, packages can use it to provide fallbacks that other packages can +improve upon. + + +File: latex2e.info, Node: \DeclareTextComposite, Next: \DeclareTextCompositeCommand, Prev: \DeclareTextCommandDefault & \ProvideTextCommandDefault, Up: fontenc package + +4.1.6 '\DeclareTextComposite' +----------------------------- + +Synopsis: + + \DeclareTextComposite{\CMD}{ENCODING}{SIMPLE_OBJECT}{SLOT} + + Access an accented glyph directly, that is, without having to put an +accent over a separate character. + + This line from 't1enc.def' means that '\^o' will cause LaTeX to +typeset lowercase 'o' by taking the character directly from slot 224 in +the font. + + \DeclareTextComposite{\^}{T1}{o}{244} + + *Note fontenc package::, for a list of common encodings. The +SIMPLE_OBJECT should be a single character or a single command. The +SLOT argument is usually a positive integer represented in decimal +(although octal or hexadecimal are possible). Normally \CMD has already +been declared for this encoding, either with '\DeclareTextAccent' or +with a one-argument '\DeclareTextCommand'. In 't1enc.def', the above +line follows the '\DeclareTextAccent{\^}{T1}{2}' command. + + +File: latex2e.info, Node: \DeclareTextCompositeCommand, Next: \DeclareTextSymbol, Prev: \DeclareTextComposite, Up: fontenc package + +4.1.7 '\DeclareTextCompositeCommand' +------------------------------------ + +Synopsis: + + \DeclareTextCompositeCommand{\CMD}{ENCODING}{ARG}{CODE} + + A more general version of '\DeclareTextComposite' that runs arbitrary +code with '\CMD'. + + This allows accents on 'i' to act like accents on dotless i, '\i'. + + \DeclareTextCompositeCommand{\'}{OT1}{i}{\'\i} + + *Note fontenc package::, for a list of common encodings. Normally +'\CMD' will have already been declared with '\DeclareTextAccent' or as a +one argument '\DeclareTextCommand'. + + +File: latex2e.info, Node: \DeclareTextSymbol, Next: \DeclareTextSymbolDefault, Prev: \DeclareTextCompositeCommand, Up: fontenc package + +4.1.8 '\DeclareTextSymbol' +-------------------------- + +Synopsis: + + \DeclareTextSymbol{\CMD}{ENCODING}{SLOT} + + Define a symbol in the encoding ENCODING at the location SLOT. +Symbols defined in this way are for use in text, not mathematics. + + For example, this line from 't1enc.def' declares the number of the +glyph to use for «, the left guillemet. + + \DeclareTextSymbol{\guillemotleft}{T1}{19} + +The command '\DeclareTextCommand{\guillemotleft}{T1}{\char 19}' has the +same effect but is slower (*note \DeclareTextCommand & +\ProvideTextCommand::). + + *Note fontenc package::, for a list of common encodings. The SLOT +can be specified in decimal, or octal (as in ''023'), or hexadecimal (as +in '"13'), although decimal has the advantage that single quote or +double quote could be redefined by another package. + + If '\CMD' has already been defined then '\DeclareTextSymbol' does not +give an error but it does log the redefinition in the transcript file. + + +File: latex2e.info, Node: \DeclareTextSymbolDefault, Next: \LastDeclaredEncoding, Prev: \DeclareTextSymbol, Up: fontenc package + +4.1.9 '\DeclareTextSymbolDefault' +--------------------------------- + +Synopsis: + + \DeclareTextSymbolDefault{\CMD}{ENCODING} + + If there is an encoding-specific symbol command '\CMD' but there is +no associated '\DeclareTextSymbol' for that encoding, then this command +will pick up the slack, by saying to get the symbol as described for +ENCODING. + + For example, to declare that if the current encoding has no meaning +for '\textdollar' then use the one from 'OT1', declare this. + + \DeclareTextSymbolDefault{\textdollar}{OT1} + + That is, this command is equivalent to this call (*note +\UseTextSymbol & \UseTextAccent::). + + \DeclareTextCommandDefault{\CMD} + {\UseTextSymbol{ENCODING}{\CMD}} + + Note that '\DeclareTextSymbolDefault' can be used to define a default +for any zero-argument 'fontenc' command. + + +File: latex2e.info, Node: \LastDeclaredEncoding, Next: \UseTextSymbol & \UseTextAccent, Prev: \DeclareTextSymbolDefault, Up: fontenc package + +4.1.10 '\LastDeclaredEncoding' +------------------------------ + +Synopsis: + + \LastDeclaredEncoding + + Get the name of the most recently declared encoding. The +'\DeclareFontEncoding' command stores the name so that it can be +retrieved with this command (*note \DeclareFontEncoding::). + + This relies on '\LastDeclaredEncoding' rather than give the name of +the encoding explicitly. + + \DeclareFontEncoding{JH1}{}{} + \DeclareTextAccent{\'}{\LastDeclaredEncoding}{0} + + +File: latex2e.info, Node: \UseTextSymbol & \UseTextAccent, Prev: \LastDeclaredEncoding, Up: fontenc package + +4.1.11 '\UseTextSymbol' & '\UseTextAccent' +------------------------------------------ + +Synopsis: + + \UseTextSymbol{ENCODING}{\CMD} + + or: + + \UseTextAccent{ENCODING}{\CMD}{TEXT} + + Use a symbol or accent not from the current encoding. + + In general, to use a 'fontenc' command in an encoding where it is not +defined, and if the command has no arguments, then you can use it like +this: + + \UseTextSymbol{OT1}{\ss} + +which is equivalent to this (note the outer braces form a group, so +LaTeX reverts back to the prior encoding after the '\ss'): + + {\fontencoding{OT1}\selectfont\ss} + + Similarly, to use a 'fontenc' command in an encoding where it is not +defined, and if the command has one argument, you can use it like this: + + \UseTextAccent{OT1}{\'}{a} + +which is equivalent to this (again note the outer braces forming a +group): + + {fontencoding{OT1}\selectfont\'{\fontencoding{ENC_IN_USE}\selectfont a}} + +Here, ENC_IN_USE is the encoding in force before this sequence of +commands, so that 'a' is typeset using the current encoding and only the +accent is taken from 'OT1'. + + +File: latex2e.info, Node: Font styles, Next: Font sizes, Prev: fontenc package, Up: Fonts + +4.2 Font styles +=============== + +The following type style commands are supported by LaTeX. + + In the table below the listed commands, the '\text...' commands, are +used with an argument as in '\textit{TEXT}'. This is the preferred +form. But shown after it in parenthesis is the corresponding +"declaration form", which is often useful. This form takes no +arguments, as in '{\itshape TEXT}'. The scope of the declaration form +lasts until the next type style command or the end of the current group. +In addition, each has an environment form such as +'\begin{itshape}...\end{itshape}', which we'll describe further at the +end of the section. + + These commands, in any of the three forms, are cumulative; for +instance you can get bold sans serif by saying either of +'\sffamily\bfseries' or '\bfseries\sffamily'. + + One advantage of these commands is that they automatically insert +italic corrections if needed (*note \/::). Specifically, they insert +the italic correction unless the following character is in the list +'\nocorrlist', which by default consists of period and comma. To +suppress the automatic insertion of italic correction, use '\nocorr' at +the start or end of the command argument, such as '\textit{\nocorr +text}' or '\textsc{text \nocorr}'. + +'\textrm (\rmfamily)' + Roman. + +'\textit (\itshape)' + Italics. + +'\textmd (\mdseries)' + Medium weight (default). + +'\textbf (\bfseries)' + Boldface. + +'\textup (\upshape)' + Upright (default). + +'\textsl (\slshape)' + Slanted. + +'\textsf (\sffamily)' + Sans serif. + +'\textsc (\scshape)' + Small caps. + +'\texttt (\ttfamily)' + Typewriter. + +'\textnormal (\normalfont)' + Main document font. + + Although it also changes fonts, the '\emph{TEXT}' command is +semantic, for text to be emphasized, and should not be used as a +substitute for '\textit'. For example, '\emph{START TEXT \emph{MIDDLE +TEXT} END TEXT}' will result in the START TEXT and END TEXT in italics, +but MIDDLE TEXT will be in roman. + + LaTeX also provides the following commands, which unconditionally +switch to the given style, that is, are _not_ cumulative. They are used +as declarations: '{\CMD...}' instead of '\CMD{...}'. + + (The unconditional commands below are an older version of font +switching. The earlier commands are an improvement in most +circumstances. But sometimes an unconditional font switch is what is +needed.) + +'\bf' + Switch to bold face. + +'\cal' + Switch to calligraphic letters for math. + +'\it' + Italics. + +'\rm' + Roman. + +'\sc' + Small caps. + +'\sf' + Sans serif. + +'\sl' + Slanted (oblique). + +'\tt' + Typewriter (monospace, fixed-width). + + The '\em' command is the unconditional version of '\emph'. + + The following commands are for use in math mode. They are not +cumulative, so '\mathbf{\mathit{SYMBOL}}' does not create a boldface and +italic SYMBOL; instead, it will just be in italics. This is because +typically math symbols need consistent typographic treatment, regardless +of the surrounding environment. + +'\mathrm' + Roman, for use in math mode. + +'\mathbf' + Boldface, for use in math mode. + +'\mathsf' + Sans serif, for use in math mode. + +'\mathtt' + Typewriter, for use in math mode. + +'\mathit' +'(\mit)' + Italics, for use in math mode. + +'\mathnormal' + For use in math mode, e.g., inside another type style declaration. + +'\mathcal' + Calligraphic letters, for use in math mode. + + In addition, the command '\mathversion{bold}' can be used for +switching to bold letters and symbols in formulas. +'\mathversion{normal}' restores the default. + + Finally, the command '\oldstylenums{NUMERALS}' will typeset so-called +"old-style" numerals, which have differing heights and depths (and +sometimes widths) from the standard "lining" numerals, which all have +the same height as uppercase letters. LaTeX's default fonts support +this, and will respect '\textbf' (but not other styles; there are no +italic old-style numerals in Computer Modern). Many other fonts have +old-style numerals also; sometimes package options are provided to make +them the default. FAQ entry: . + + +File: latex2e.info, Node: Font sizes, Next: Low-level font commands, Prev: Font styles, Up: Fonts + +4.3 Font sizes +============== + +The following standard type size commands are supported by LaTeX. The +table shows the command name and the corresponding actual font size used +(in points) with the '10pt', '11pt', and '12pt' document size options, +respectively (*note Document class options::). + +Command '10pt' '11pt' '12pt' +-------------------------------------------------------- +'\tiny' 5 6 6 +'\scriptsize' 7 8 8 +'\footnotesize' 8 9 10 +'\small' 9 10 10.95 +'\normalsize' (default) 10 10.95 12 +'\large' 12 12 14.4 +'\Large' 14.4 14.4 17.28 +'\LARGE' 17.28 17.28 20.74 +'\huge' 20.74 20.74 24.88 +'\Huge' 24.88 24.88 24.88 + + The commands are listed here in declaration (not environment) form, +since that is how they are typically used. For example. + + \begin{quotation} \small + The Tao that can be named is not the eternal Tao. + \end{quotation} + +Here, the scope of the '\small' lasts until the end of the 'quotation' +environment. It would also end at the next type style command or the +end of the current group, so you could enclose it in curly braces +'{\small This text is typeset in the small font.}'. + + Trying to use these commands in math, as with '$\small mv^2/2$', +results in 'LaTeX Font Warning: Command \small invalid in math mode', +and the font size doesn't change. To work with a too-large formula, +often the best option is to use the 'displaymath' environment (*note +Math formulas::), or one of the environments from the 'amsmath' package. +For inline mathematics, such as in a table of formulas, an alternative +is something like '{\small $mv^2/2$}'. (Sometimes '\scriptsize' and +'\scriptstyle' are confused. Both change the font size, but the latter +also changes a number of other aspects of how mathematics is typeset. +*Note Math styles::.) + + An "environment form" of each of these commands is also defined; for +instance, '\begin{tiny}...\end{tiny}'. However, in practice this form +can easily lead to unwanted spaces at the beginning and/or end of the +environment without careful consideration, so it's generally less +error-prone to stick to the declaration form. + + (Aside: Technically, due to the way LaTeX defines '\begin' and +'\end', nearly every command that does not take an argument technically +has an environment form. But in almost all cases, it would only cause +confusion to use it. The reason for mentioning the environment form of +the font size declarations specifically is that this particular use is +not rare.) + + +File: latex2e.info, Node: Low-level font commands, Prev: Font sizes, Up: Fonts + +4.4 Low-level font commands +=========================== + +These commands are primarily intended for writers of macros and +packages. The commands listed here are only a subset of the available +ones. + +'\fontencoding{ENCODING}' + Select the font encoding, the encoding of the output font. There + are a large number of valid encodings. The most common are 'OT1', + Knuth's original encoding for Computer Modern (the default), and + 'T1', also known as the Cork encoding, which has support for the + accented characters used by the most widespread European languages + (German, French, Italian, Polish and others), which allows TeX to + hyphenate words containing accented letters. For more, see + . + +'\fontfamily{FAMILY}' + Select the font family. The web page + provides one way to browse through + many of the fonts easily used with LaTeX. Here are examples of + some common families. + + 'pag' Avant Garde + 'fvs' Bitstream Vera Sans + 'pbk' Bookman + 'bch' Charter + 'ccr' Computer Concrete + 'cmr' Computer Modern + 'cmss' Computer Modern Sans Serif + 'cmtt' Computer Modern Typewriter + 'pcr' Courier + 'phv' Helvetica + 'fi4' Inconsolata + 'lmr' Latin Modern + 'lmss' Latin Modern Sans + 'lmtt' Latin Modern Typewriter + 'pnc' New Century Schoolbook + 'ppl' Palatino + 'ptm' Times + 'uncl' Uncial + 'put' Utopia + 'pzc' Zapf Chancery + +'\fontseries{SERIES}' + Select the font series. A "series" combines a "weight" and a + "width". Typically, a font supports only a few of the possible + combinations. Some common combined series values include: + + 'm' Medium (normal) + 'b' Bold + 'c' Condensed + 'bc' Bold condensed + 'bx' Bold extended + + The possible values for weight, individually, are: + + 'ul' Ultra light + 'el' Extra light + 'l' Light + 'sl' Semi light + 'm' Medium (normal) + 'sb' Semi bold + 'b' Bold + 'eb' Extra bold + 'ub' Ultra bold + + The possible values for width, individually, are (the meaning and + relationship of these terms varies with individual typefaces): + + 'uc' Ultra condensed + 'ec' Extra condensed + 'c' Condensed + 'sc' Semi condensed + 'm' Medium + 'sx' Semi expanded + 'x' Expanded + 'ex' Extra expanded + 'ux' Ultra expanded + + When forming the SERIES string from the weight and width, drop the + 'm' that stands for medium weight or medium width, unless both + weight and width are 'm', in which case use just one (''m''). + +'\fontshape{SHAPE}' + Select font shape. Valid shapes are: + + 'n' Upright (normal) + 'it' Italic + 'sl' Slanted (oblique) + 'sc' Small caps + 'ui' Upright italics + 'ol' Outline + + The two last shapes are not available for most font families, and + small caps are often missing as well. + +'\fontsize{SIZE}{SKIP}' + Set the font size and the line spacing. The unit of both + parameters defaults to points ('pt'). The line spacing is the + nominal vertical space between lines, baseline to baseline. It is + stored in the parameter '\baselineskip'. The default + '\baselineskip' for the Computer Modern typeface is 1.2 times the + '\fontsize'. Changing '\baselineskip' directly is inadvisable + since its value is reset every time a size change happens; instead + use '\baselinestretch'. (*note \baselineskip & + \baselinestretch::). + +'\linespread{FACTOR}' + Equivalent to '\renewcommand{\baselinestretch}{FACTOR}', and + therefore must be followed by '\selectfont' to have any effect. + Best specified in the preamble. *Note \baselineskip & + \baselinestretch::, for using 'setspace' package instead. + +'\selectfont' + The effects of the font commands described above do not happen + until '\selectfont' is called, as in + '\fontfamily{FAMILYNAME}\selectfont'. It is often useful to put + this in a macro: + '\newcommand*{\myfont}{\fontfamily{FAMILYNAME}\selectfont}' + (*note \newcommand & \renewcommand::). + +'\usefont{ENC}{FAMILY}{SERIES}{SHAPE}' + The same as invoking '\fontencoding', '\fontfamily', '\fontseries' + and '\fontshape' with the given parameters, followed by + '\selectfont'. For example: + + \usefont{ot1}{cmr}{m}{n} + + +File: latex2e.info, Node: Layout, Next: Sectioning, Prev: Fonts, Up: Top + +5 Layout +******** + +Commands for controlling the general page layout. + +* Menu: + +* \onecolumn:: Use one-column layout. +* \twocolumn:: Use two-column layout. +* \flushbottom:: Make all text pages the same height. +* \raggedbottom:: Allow text pages of differing height. +* Page layout parameters:: '\headheight' '\footskip'. +* \baselineskip & \baselinestretch:: Space between lines. +* Floats:: Figures, tables, etc. + + +File: latex2e.info, Node: \onecolumn, Next: \twocolumn, Up: Layout + +5.1 '\onecolumn' +================ + +Synopsis: + + \onecolumn + + Start a new page and produce single-column output. If the document +is given the class option 'onecolumn' then this is the default behavior +(*note Document class options::). This command is fragile (*note +\protect::). + + +File: latex2e.info, Node: \twocolumn, Next: \flushbottom, Prev: \onecolumn, Up: Layout + +5.2 '\twocolumn' +================ + +Synopses: + + \twocolumn + \twocolumn[PRELIM ONE COLUMN TEXT] + + Start a new page and produce two-column output. If the document is +given the class option 'twocolumn' then this is the default (*note +Document class options::). This command is fragile (*note \protect::). + + If the optional PRELIM ONE COLUMN TEXT argument is present, it is +typeset in one-column mode before the two-column typesetting starts. + + These parameters control typesetting in two-column output: + +'\columnsep' + The distance between columns. The default is 35pt. Change it with + a command such as '\setlength{\columnsep}{40pt}'. You must change + it before the two column mode starts; in the preamble is a good + place. + +'\columnseprule' + The width of the rule between columns. The default is 0pt, meaning + that there is no rule. Otherwise, the rule appears halfway between + the two columns. Change it with a command such as + '\setlength{\columnseprule}{0.4pt}', before the two-column mode + starts. + +'\columnwidth' + The width of a single column. In one-column mode this is equal to + '\textwidth'. In two-column mode by default LaTeX sets the width + of each of the two columns, '\columnwidth', to be half of + '\textwidth' minus '\columnsep'. + + In a two-column document, the starred environments 'table*' and +'figure*' are two columns wide, whereas the unstarred environments +'table' and 'figure' take up only one column (*note figure:: and *note +table::). LaTeX places starred floats at the top of a page. The +following parameters control float behavior of two-column output. + +'\dbltopfraction' + The maximum fraction at the top of a two-column page that may be + occupied by two-column wide floats. The default is 0.7, meaning + that the height of a 'table*' or 'figure*' environment must not + exceed '0.7\textheight'. If the height of your starred float + environment exceeds this then you can take one of the following + actions to prevent it from floating all the way to the back of the + document: + + * Use the '[tp]' location specifier to tell LaTeX to try to put + the bulky float on a page by itself, as well as at the top of + a page. + + * Use the '[t!]' location specifier to override the effect of + '\dbltopfraction' for this particular float. + + * Increase the value of '\dbltopfraction' to a suitably large + number, to avoid going to float pages so soon. + + You can redefine it, as with '\renewcommand{\dbltopfraction}{0.9}'. + +'\dblfloatpagefraction' + For a float page of two-column wide floats, this is the minimum + fraction that must be occupied by floats, limiting the amount of + blank space. LaTeX's default is '0.5'. Change it with + '\renewcommand'. + +'\dblfloatsep' + On a float page of two-column wide floats, this length is the + distance between floats, at both the top and bottom of the page. + The default is '12pt plus2pt minus2pt' for a document set at '10pt' + or '11pt', and '14pt plus2pt minus4pt' for a document set at + '12pt'. + +'\dbltextfloatsep' + This length is the distance between a multi-column float at the top + or bottom of a page and the main text. The default is '20pt + plus2pt minus4pt'. + +'\dbltopnumber' + On a float page of two-column wide floats, this counter gives the + maximum number of floats allowed at the top of the page. The LaTeX + default is '2'. + + This example uses '\twocolumn''s optional argument of to create a +title that spans the two-column article: + + \documentclass[twocolumn]{article} + \newcommand{\authormark}[1]{\textsuperscript{#1}} + \begin{document} + \twocolumn[{% inside this optional argument goes one-column text + \centering + \LARGE The Title \\[1.5em] + \large Author One\authormark{1}, + Author Two\authormark{2}, + Author Three\authormark{1} \\[1em] + \normalsize + \begin{tabular}{p{.2\textwidth}@{\hspace{2em}}p{.2\textwidth}} + \authormark{1}Department one &\authormark{2}Department two \\ + School one &School two + \end{tabular}\\[3em] % space below title part + }] + + Two column text here. + + +File: latex2e.info, Node: \flushbottom, Next: \raggedbottom, Prev: \twocolumn, Up: Layout + +5.3 '\flushbottom' +================== + +Make all pages in the documents after this declaration have the same +height, by stretching the vertical space where necessary to fill out the +page. This is most often used when making two-sided documents since the +differences in facing pages can be glaring. + + If TeX cannot satisfactorily stretch the vertical space in a page +then you get a message like 'Underfull \vbox (badness 10000) has +occurred while \output is active'. If you get that, one option is to +change to '\raggedbottom' (*note \raggedbottom::). Alternatively, you +can adjust the 'textheight' to make compatible pages, or you can add +some vertical stretch glue between lines or between paragraphs, as in +'\setlength{\parskip}{0ex plus0.1ex}'. Your last option is to, in a +final editing stage, adjust the height of individual pages (*note +\enlargethispage::). + + The '\flushbottom' state is the default only if you select the +'twoside' document class option (*note Document class options::). + + +File: latex2e.info, Node: \raggedbottom, Next: Page layout parameters, Prev: \flushbottom, Up: Layout + +5.4 '\raggedbottom' +=================== + +Make all later pages the natural height of the material on that page; no +rubber vertical lengths will be stretched. Thus, in a two-sided +document the facing pages may be different heights. This command can go +at any point in the document body. See *note \flushbottom::. + + This is the default unless you select the 'twoside' document class +option (*note Document class options::). + + +File: latex2e.info, Node: Page layout parameters, Next: \baselineskip & \baselinestretch, Prev: \raggedbottom, Up: Layout + +5.5 Page layout parameters +========================== + +'\columnsep' +'\columnseprule' +'\columnwidth' + The distance between the two columns, the width of a rule between + the columns, and the width of the columns, when the document class + option 'twocolumn' is in effect (*note Document class options::). + *Note \twocolumn::. + +'\headheight' + Height of the box that contains the running head. The default in + the 'article', 'report', and 'book' classes is '12pt', at all type + sizes. + +'\headsep' + Vertical distance between the bottom of the header line and the top + of the main text. The default in the 'article' and 'report' + classes is '25pt'. In the 'book' class the default is: if the + document is set at 10pt then it is '0.25in', and at 11pt or 12pt it + is '0.275in'. + +'\footskip' + Distance from the baseline of the last line of text to the baseline + of the page footer. The default in the 'article' and 'report' + classes is '30pt'. In the 'book' class the default is: when the + type size is 10pt the default is '0.35in', while at 11pt it is + '0.38in', and at 12pt it is '30pt'. + +'\linewidth' + Width of the current line, decreased for each nested 'list' (*note + list::). That is, the nominal value for '\linewidth' is to equal + '\textwidth' but for each nested list the '\linewidth' is decreased + by the sum of that list's '\leftmargin' and '\rightmargin' (*note + itemize::). + +'\marginparpush' +'\marginsep' +'\marginparwidth' + The minimum vertical space between two marginal notes, the + horizontal space between the text body and the marginal notes, and + the horizontal width of the notes. + + Normally marginal notes appear on the outside of the page, but the + declaration '\reversemarginpar' changes that (and + '\normalmarginpar' changes it back). + + The defaults for '\marginparpush' in both 'book' and 'article' + classes are: '7pt' if the document is set at 12pt, and '5pt' if the + document is set at 11pt or 10pt. + + For '\marginsep', in 'article' class the default is '10pt' except + if the document is set at 10pt and in two-column mode where the + default is '11pt'. + + For '\marginsep' in 'book' class the default is '10pt' in + two-column mode and '7pt' in one-column mode. + + For '\marginparwidth' in both 'book' and 'article' classes, in + two-column mode the default is 60% of '\paperwidth - \textwidth', + while in one-column mode it is 50% of that distance. + +'\oddsidemargin' +'\evensidemargin' + The '\oddsidemargin' length is the extra distance between the left + side of the page and the text's left margin, on odd-numbered pages + when the document class option 'twoside' is chosen and on all pages + when 'oneside' is in effect. When 'twoside' is in effect, on + even-numbered pages the extra distance on the left is + '\evensidemargin'. + + LaTeX's default is that '\oddsidemargin' is 40% of the difference + between '\paperwidth' and '\textwidth', and '\evensidemargin' is + the remainder. + +'\paperheight' + The height of the paper, as distinct from the height of the print + area. Normally set with a document class option, as in + '\documentclass[a4paper]{article}' (*note Document class + options::). + +'\paperwidth' + The width of the paper, as distinct from the width of the print + area. Normally set with a document class option, as in + '\documentclass[a4paper]{article}' (*note Document class + options::). + +'\textheight' + The normal vertical height of the page body. If the document is + set at a nominal type size of 10pt then for an 'article' or + 'report' the default is '43\baselineskip', while for a 'book' it is + '41\baselineskip'. At a type size of 11pt the default is + '38\baselineskip' for all document classes. At 12pt it is + '36\baselineskip' for all classes. + +'\textwidth' + The full horizontal width of the entire page body. For an + 'article' or 'report' document, the default is '345pt' when the + chosen type size is 10pt, the default is '360pt' at 11pt, and it is + '390pt' at 12pt. For a 'book' document, the default is '4.5in' at + a type size of 10pt, and '5in' at 11pt or 12pt. + + In multi-column output, '\textwidth' remains the width of the + entire page body, while '\columnwidth' is the width of one column + (*note \twocolumn::). + + In lists (*note list::), '\textwidth' remains the width of the + entire page body (and '\columnwidth' the width of the entire + column), while '\linewidth' may decrease for nested lists. + + Inside a minipage (*note minipage::) or '\parbox' (*note + \parbox::), all the width-related parameters are set to the + specified width, and revert to their normal values at the end of + the 'minipage' or '\parbox'. + +'\hsize' + This entry is included for completeness: '\hsize' is the TeX + primitive parameter used when text is broken into lines. It should + not be used in normal LaTeX documents. + +'\topmargin' + Space between the top of the TeX page (one inch from the top of the + paper, by default) and the top of the header. The value is + computed based on many other parameters: '\paperheight - 2in - + \headheight - \headsep - \textheight - \footskip', and then divided + by two. + +'\topskip' + Minimum distance between the top of the page body and the baseline + of the first line of text. For the standard classes, the default + is the same as the font size, e.g., '10pt' at a type size of 10pt. + + +File: latex2e.info, Node: \baselineskip & \baselinestretch, Next: Floats, Prev: Page layout parameters, Up: Layout + +5.6 '\baselineskip' & '\baselinestretch' +======================================== + +The '\baselineskip' is a rubber length (*note Lengths::). It gives the +"leading", the normal distance between lines in a paragraph, from +baseline to baseline. + + Ordinarily document authors do not directly change '\baselineskip' +while writing. Instead, it is set by the low level font selection +command '\fontsize' (*note low level font commands fontsize::). The +'\baselineskip''s value is reset every time a font change happens and so +any direct change to '\baselineskip' would vanish the next time there +was a font switch. For how to influence line spacing, see the +discussion of '\baselinestretch' below. + + Usually, a font's size and baseline skip is assigned by the font +designer. These numbers are nominal in the sense that if, for instance, +a font's style file has the command '\fontsize{10pt}{12pt}' then that +does not mean that the characters in the font are 10pt tall; for +instance, parentheses and accented capitals may be taller. Nor does it +mean that if the lines are spaced less than 12pt apart then they risk +touching. Rather these numbers are typographic judgements. (Often, the +'\baselineskip' is about twenty percent larger than the font size.) + + The '\baselineskip' is not a property of each line but of the entire +paragraph. As a result, large text in the middle of a paragraph, such +as a single '{\Huge Q}', will be squashed into its line. TeX will make +sure it doesn't scrape up against the line above but won't change the +'\baselineskip' for that one line to make extra room above. For the +fix, use a '\strut' (*note \strut::). + + The value of '\baselineskip' that TeX uses for the paragraph is the +value in effect at the blank line or command that ends the paragraph +unit. So if a document contains this paragraph then its lines will be +scrunched together, compared to lines in surrounding paragraphs. + + Many people see a page break between text and a displayed equation as + bad style, so in effect the display is part of the paragraph. + Because this display is in footnotesize, the entire paragraph has the + baseline spacing matching that size. + {\footnotesize $$a+b = c$$} + + The process for making paragraphs is that when a new line is added, +if the depth of the previous line plus the height of the new line is +less than '\baselineskip' then TeX inserts vertical glue to make up the +difference. There are two fine points. The first is that if the lines +would be too close together, closer than '\lineskiplimit', then TeX +instead uses '\lineskip' as the interline glue. The second is that TeX +doesn't actually use the depth of the previous line. Instead it uses +'\prevdepth', which usually contains that depth. But at the beginning +of the paragraph (or any vertical list) or just after a rule, +'\prevdepth' has the value -1000pt and this special value tells TeX not +to insert any interline glue at the paragraph start. + + In the standard classes '\lineskiplimit' is 0pt and '\lineskip' is +1pt. By the prior paragraph then, the distance between lines can +approach zero but if it becomes zero (or less than zero) then the lines +jump to 1pt apart. + + Sometimes authors must, for editing purposes, put the document in +double space or one-and-a-half space. The right way to influence the +interline distance is via '\baselinestretch'. It scales +'\baselineskip', and has a default value of 1.0. It is a command, not a +length, so change the scale factor as in +'\renewcommand{\baselinestretch}{1.5}\selectfont'. + + The most straightforward way to change the line spacing for an entire +document is to put '\linespread{FACTOR}' in the preamble. For double +spacing, take FACTOR to be 1.6 and for one-and-a-half spacing use 1.3. +These number are rough: for instance, since the '\baselineskip' is about +1.2 times the font size, multiplying by 1.6 gives a baseline skip to +font size ratio of about 2. (The '\linespread' command is defined as +'\renewcommand{\baselinestretch}{FACTOR}' so it won't take effect until +a font setting happens. But that always takes place at the start of a +document, so there you don't need to follow it with '\selectfont'.) + + A simpler approach is the 'setspace' package. The basic example: + + \usepackage{setspace} + \doublespacing % or \onehalfspacing for 1.5 + +In the preamble these will start the document off with that sizing. But +you can also use these declarations in the document body to change the +spacing from that point forward, and consequently there is +'\singlespacing' to return the spacing to normal. In the document body, +a better practice than using the declarations is to use environments, +such as '\begin{doublespace} ... \end{doublespace}'. The package also +has commands to do arbitrary spacing: '\setstretch{FACTOR}' and +'\begin{spacing}{FACTOR} ... \end{spacing}'. This package also keeps +the line spacing single-spaced in places where that is typically +desirable, such as footnotes and figure captions. See the package +documentation. + + +File: latex2e.info, Node: Floats, Prev: \baselineskip & \baselinestretch, Up: Layout + +5.7 Floats +========== + +Some typographic elements, such as figures and tables, cannot be broken +across pages. They must be typeset outside of the normal flow of text, +for instance floating to the top of a later page. + + LaTeX can have a number of different classes of floating material. +The default is the two classes, 'figure' (*note figure::) and 'table' +(*note table::), but you can create a new class with the package +'float'. + + Within any one float class LaTeX always respects the order, so that +the first figure in a document source must be typeset before the second +figure. However, LaTeX may mix the classes, so it can happen that while +the first table appears in the source before the first figure, it +appears in the output after it. + + The placement of floats is subject to parameters, given below, that +limit the number of floats that can appear at the top of a page, and the +bottom, etc. If so many floats are queued that the limits prevent them +all from fitting on a page then LaTeX places what it can and defers the +rest to the next page. In this way, floats may end up being typeset far +from their place in the source. In particular, a float that is big may +migrate to the end of the document. In which event, because all floats +in a class must appear in sequential order, every following float in +that class also appears at the end. + + In addition to changing the parameters, for each float you can tweak +where the float placement algorithm tries to place it by using its +PLACEMENT argument. The possible values are a sequence of the letters +below. The default for both 'figure' and 'table', in both 'article' and +'book' classes, is 'tbp'. + +'t' + (Top)--at the top of a text page. + +'b' + (Bottom)--at the bottom of a text page. (However, 'b' is not + allowed for full-width floats ('figure*') with double-column + output. To ameliorate this, use the 'stfloats' or 'dblfloatfix' + package, but see the discussion at caveats in the FAQ: + . + +'h' + (Here)--at the position in the text where the 'figure' environment + appears. However, 'h' is not allowed by itself; 't' is + automatically added. + + To absolutely force a float to appear "here", you can + '\usepackage{float}' and use the 'H' specifier which it defines. + For further discussion, see the FAQ entry at + . + +'p' + (Page of floats)--on a separate "float page", which is a page + containing no text, only floats. + +'!' + Used in addition to one of the above; for this float only, LaTeX + ignores the restrictions on both the number of floats that can + appear and the relative amounts of float and non-float text on the + page. The '!' specifier does _not_ mean "put the float here"; see + above. + + Note: the order in which letters appear in the PLACEMENT argument +does not change the order in which LaTeX tries to place the float; for +instance, 'btp' has the same effect as 'tbp'. All that PLACEMENT does +is that if a letter is not present then the algorithm does not try that +location. Thus, LaTeX's default of 'tbp' is to try every location +except placing the float where it occurs in the source. + + To prevent LaTeX from moving floats to the end of the document or a +chapter you can use a '\clearpage' command to start a new page and +insert all pending floats. If a pagebreak is undesirable then you can +use the 'afterpage' package and issue '\afterpage{\clearpage}'. This +will wait until the current page is finished and then flush all +outstanding floats. + + LaTeX can typeset a float before where it appears in the source +(although on the same output page) if there is a 't' specifier in the +PLACEMENT parameter. If this is not desired, and deleting the 't' is +not acceptable as it keeps the float from being placed at the top of the +next page, then you can prevent it by either using the 'flafter' package +or using the command '\suppressfloats[t]', which causes floats for the +top position on this page to moved to the next page. + + Parameters relating to fractions of pages occupied by float and +non-float text (change them with '\renewcommand{PARAMETER}{DECIMAL +BETWEEN 0 AND 1}'): + +'\bottomfraction' + The maximum fraction of the page allowed to be occupied by floats + at the bottom; default '.3'. + +'\floatpagefraction' + The minimum fraction of a float page that must be occupied by + floats; default '.5'. + +'\textfraction' + Minimum fraction of a page that must be text; if floats take up too + much space to preserve this much text, floats will be moved to a + different page. The default is '.2'. + +'\topfraction' + Maximum fraction at the top of a page that may be occupied before + floats; default '.7'. + + Parameters relating to vertical space around floats (change them with +a command of the form '\setlength{PARAMETER}{LENGTH EXPRESSION}'): + +'\floatsep' + Space between floats at the top or bottom of a page; default '12pt + plus2pt minus2pt'. + +'\intextsep' + Space above and below a float in the middle of the main text; + default '12pt plus2pt minus2pt' for 10 point and 11 point + documents, and '14pt plus4pt minus4pt' for 12 point documents. + +'\textfloatsep' + Space between the last (first) float at the top (bottom) of a page; + default '20pt plus2pt minus4pt'. + + Counters relating to the number of floats on a page (change them with +a command of the form '\setcounter{CTRNAME}{NATURAL NUMBER}'): + +'bottomnumber' + Maximum number of floats that can appear at the bottom of a text + page; default 1. + +'dbltopnumber' + Maximum number of full-sized floats that can appear at the top of a + two-column page; default 2. + +'topnumber' + Maximum number of floats that can appear at the top of a text page; + default 2. + +'totalnumber' + Maximum number of floats that can appear on a text page; default 3. + + The principal TeX FAQ entry relating to floats + contains suggestions for relaxing +LaTeX's default parameters to reduce the problem of floats being pushed +to the end. A full explanation of the float placement algorithm is in +Frank Mittelbach's article "How to influence the position of float +environments like figure and table in LaTeX?" +(). + +* Menu: + +* \caption:: Make a caption for a floating environment. + + +File: latex2e.info, Node: \caption, Up: Floats + +5.7.1 '\caption' +---------------- + +Synopsis: + + \caption{CAPTION-TEXT} + +or + + \caption[SHORT-CAPTION-TEXT]{CAPTION-TEXT} + + Make a caption for a floating environment, such as a 'figure' or +'table' environment (*note figure:: or *note table::). + + In this example, LaTeX places a caption below the vertical blank +space that is left by the author for the later inclusion of a picture. + + \begin{figure} + \vspace*{1cm} + \caption{Alonzo Cushing, Battery A, 4th US Artillery.} + \label{fig:CushingPic} + \end{figure} + +The '\caption' command will label the CAPTION-TEXT with something like +'Figure 1:' for an article or 'Figure 1.1:' for a book. The text is +centered if it is shorter than the text width, or set as an unindented +paragraph if it takes more than one line. + + In addition to placing the CAPTION-TEXT in the output, the '\caption' +command also saves that information for use in a list of figures or list +of tables (*note Table of contents etc.::). + + Here the '\caption' command uses the optional SHORT-CAPTION-TEXT, so +that the shorter text appears in the list of tables, rather than the +longer CAPTION-TEXT. + + \begin{table} + \centering + \begin{tabular}{|*{3}{c}|} + \hline + 4 &9 &2 \\ + 3 &5 &7 \\ + 8 &1 &6 \\ + \hline + \end{tabular} + \caption[\textit{Lo Shu} magic square]{% + The \textit{Lo Shu} magic square, which is unique among + squares of order three up to rotation and reflection.} + \label{tab:LoShu} + \end{table} + +LaTeX will label the CAPTION-TEXT with something like 'Table 1:' for an +article or 'Table 1.1:' for a book. + + The caption can appear at the top of the 'figure' or 'table'. For +instance, that would happen in the prior example by putting the +'\caption' between the '\centering' and the '\begin{tabular}'. + + Different floating environments are numbered separately, by default. +It is '\caption' that updates the counter, and so any '\label' must come +after the '\caption'. The counter for the 'figure' environment is named +'figure', and similarly the counter for the 'table' environment is +'table'. + + The text that will be put in the list of figures or list of tables is +moving argument. If you get the LaTeX error '! Argument of \@caption +has an extra }' then you must put '\protect' in front of any fragile +commands. *Note \protect::. + + The 'caption' package has many options to adjust how the caption +appears, for example changing the font size, making the caption be +hanging text rather than set as a paragraph, or making the caption +always set as a paragraph rather than centered when it is short. + + +File: latex2e.info, Node: Sectioning, Next: Cross references, Prev: Layout, Up: Top + +6 Sectioning +************ + +Structure your text into divisions: parts, chapters, sections, etc. All +sectioning commands have the same form, one of: + + SECTIONING-COMMAND{TITLE} + SECTIONING-COMMAND*{TITLE} + SECTIONING-COMMAND[TOC-TITLE]{TITLE} + +For instance, declare the start of a subsection as with +'\subsection{Motivation}'. + + The table has each SECTIONING-COMMAND in LaTeX. All are available in +all of LaTeX's standard document classes 'book', 'report', +and 'article', except that '\chapter' is not available in 'article'. + +Sectioning unit Command Level +-------------------------------------------------------------------- +Part '\part' -1 ('book', 'report'), 0 + ('article') +Chapter '\chapter' 0 +Section '\section' 1 +Subsection '\subsection' 2 +Subsubsection '\subsubsection' 3 +Paragraph '\paragraph' 4 +Subparagraph '\subparagraph' 5 + + All these commands have a '*'-form that prints TITLE as usual but +does not number it and does not make an entry in the table of contents. +An example of using this is for an appendix in an 'article'. The input +'\appendix\section{Appendix}' gives the output 'A Appendix' (*note +\appendix::). You can lose the numbering 'A' by instead entering +'\section*{Appendix}' (articles often omit a table of contents and have +simple page headers so the other differences from the '\section' command +may not matter). + + The section title TITLE provides the heading in the main text, but it +may also appear in the table of contents and in the running head or foot +(*note Page styles::). You may not want the same text in these places +as in the main text. All of these commands have an optional argument +TOC-TITLE for these other places. + + The level number in the table above determines which sectional units +are numbered, and which appear in the table of contents. If the +sectioning command's LEVEL is less than or equal to the value of the +counter 'secnumdepth' then the titles for this sectioning command will +be numbered (*note Sectioning/secnumdepth::). And, if LEVEL is less +than or equal to the value of the counter 'tocdepth' then the table of +contents will have an entry for this sectioning unit (*note +Sectioning/tocdepth::). + + LaTeX expects that before you have a '\subsection' you will have a +'\section' and, in a 'book' class document, that before a '\section' you +will have a '\chapter'. Otherwise you can get something like a +subsection numbered '3.0.1'. + + LaTeX lets you change the appearance of the sectional units. As a +simple example, you can change the section numbering to upper-case +letters with '\renewcommand\thesection{\Alph{section}}' in the preamble +(*note \alph \Alph \arabic \roman \Roman \fnsymbol::). CTAN has many +packages that make this adjustment easier, notably 'titlesec'. + + Two counters relate to the appearance of headings made by sectioning +commands. + +'secnumdepth' + Controls which sectioning unit are numbered. Setting the counter + with '\setcounter{secnumdepth}{LEVEL}' will suppress numbering of + sectioning at any depth greater than LEVEL (*note \setcounter::). + See the above table for the level numbers. For instance, if the + 'secnumdepth' is 1 in an 'article' then a '\section{Introduction}' + command will produce output like '1 Introduction' while + '\subsection{Discussion}' will produce output like 'Discussion', + without the number. LaTeX's default 'secnumdepth' is 3 in + 'article' class and 2 in the 'book' and 'report' classes. + +'tocdepth' + Controls which sectioning units are listed in the table of + contents. The setting '\setcounter{tocdepth}{LEVEL}' makes the + sectioning units at LEVEL be the smallest ones listed (*note + \setcounter::). See the above table for the level numbers. For + instance, if 'tocdepth' is 1 then the table of contents will list + sections but not subsections. LaTeX's default 'tocdepth' is 3 in + 'article' class and 2 in the 'book' and 'report' classes. + +* Menu: + +* \part:: Start a part. +* \chapter:: Start a chapter. +* \section:: Start a section. +* \subsection:: Start a subsection. +* \subsubsection & \paragraph & \subparagraph:: Lower divisions. +* \appendix:: Start appendices. +* \frontmatter & \mainmatter & \backmatter:: The three parts of a book. +* \@startsection:: Sectional unit headings. + + +File: latex2e.info, Node: \part, Next: \chapter, Up: Sectioning + +6.1 '\part' +=========== + +Synopsis, one of: + + \part{TITLE} + \part*{TITLE} + \part[TOC-TITLE]{TITLE} + + Start a document part. The standard LaTeX classes 'book', 'report', +and 'article', all have this command. + + This produces a document part, in a book. + + \part{VOLUME I \\ + PERSONAL MEMOIRS OF U.\ S.\ GRANT} + \chapter{ANCESTRY--BIRTH--BOYHOOD.} + My family is American, and has been for generations, + in all its branches, direct and collateral. + + In each standard class the '\part' command outputs a part number such +as 'Part I', alone on its line, in boldface, and in large type. Then +LaTeX outputs TITLE, also alone on its line, in bold and in even larger +type. In class 'book', the LaTeX default puts each part alone on its +own page. If the book is two-sided then LaTeX will skip a page if +needed to have the new part on an odd-numbered page. In 'report' it is +again alone on a page, but LaTeX won't force it onto an odd-numbered +page. In an 'article' LaTeX does not put it on a fresh page, but +instead outputs the part number and part title onto the main document +page. + + The '*' form shows TITLE but it does not show the part number, does +not increment the 'part' counter, and produces no table of contents +entry. + + The optional argument TOC-TITLE will appear as the part title in the +table of contents (*note Table of contents etc.::) and in running +headers (*note Page styles::). If it is not present then TITLE will be +there. This example puts a line break in TITLE but omits the break in +the table of contents. + + \part[Up from the bottom; my life]{Up from the bottom\\ my life} + + For determining which sectional units are numbered and which appear +in the table of contents, the level number of a part is -1 (*note +Sectioning/secnumdepth::, and *note Sectioning/tocdepth::). + + In the class 'article', if a paragraph immediately follows the part +title then it is not indented. To get an indent you can use the package +'indentfirst'. + + One package to change the behavior of '\part' is 'titlesec'. See its +documentation on CTAN. + + +File: latex2e.info, Node: \chapter, Next: \section, Prev: \part, Up: Sectioning + +6.2 '\chapter' +============== + +Synopsis, one of: + + \chapter{TITLE} + \chapter*{TITLE} + \chapter[TOC-TITLE]{TITLE} + + Start a chapter. The standard LaTeX classes 'book' and 'report' have +this command but 'article' does not. + + This produces a chapter. + + \chapter{Loomings} + Call me Ishmael. + Some years ago---never mind how long precisely---having little or no + money in my purse, and nothing particular to interest me on shore, I + thought I would sail about a little and see the watery part of + the world. + + The LaTeX default starts each chapter on a fresh page, an +odd-numbered page if the document is two-sided. It produces a chapter +number such as 'Chapter 1' in large boldface type (the size is '\huge'). +It then puts TITLE on a fresh line, in boldface type that is still +larger (size '\Huge'). It also increments the 'chapter' counter, adds +an entry to the table of contents (*note Table of contents etc.::), and +sets the running header information (*note Page styles::). + + The '*' form shows TITLE on a fresh line, in boldface. But it does +not show the chapter number, does not increment the 'chapter' counter, +produces no table of contents entry, and does not affect the running +header. (If you use the page style 'headings' in a two-sided document +then the header will be from the prior chapter.) This example +illustrates. + + \chapter*{Preamble} + + The optional argument TOC-TITLE will appear as the chapter title in +the table of contents (*note Table of contents etc.::) and in running +headers (*note Page styles::). If it is not present then TITLE will be +there. This shows the full name in the chapter title, + + \chapter[Weyl]{Hermann Klaus Hugo (Peter) Weyl (1885--1955)} + +but only 'Weyl' on the contents page. This puts a line break in the +title but that doesn't work well with running headers so it omits the +break in the contents + + \chapter[Given it all; my story]{Given it all\\ my story} + + For determining which sectional units are numbered and which appear +in the table of contents, the level number of a chapter is 0 (*note +Sectioning/secnumdepth:: and *note Sectioning/tocdepth::). + + The paragraph that follows the chapter title is not indented, as is a +standard typographical practice. To get an indent use the package +'indentfirst'. + + You can change what is shown for the chapter number. To change it to +something like 'Lecture 1', put in the preamble either +'\renewcommand{\chaptername}{Lecture}' or this (*note \makeatletter & +\makeatother::). + + \makeatletter + \renewcommand{\@chapapp}{Lecture} + \makeatother + +To make this change because of the primary language for the document, +see the package 'babel'. + + In a two-sided document LaTeX puts a chapter on odd-numbered page, if +necessary leaving an even-numbered page that is blank except for any +running headers. To make that page completely blank, see *note +\clearpage & \cleardoublepage::. + + To change the behavior of the '\chapter' command, you can copy its +definition from the LaTeX format file and make adjustments. But there +are also many packages on CTAN that address this. One is 'titlesec'. +See its documentation, but the example below gives a sense of what it +can do. + + \usepackage{titlesec} % in preamble + \titleformat{\chapter} + {\Huge\bfseries} % format of title + {} % label, such as 1.2 for a subsection + {0pt} % length of separation between label and title + {} % before-code hook + +This omits the chapter number 'Chapter 1' from the page but unlike +'\chapter*' it keeps the chapter in the table of contents and the +running headers. + + +File: latex2e.info, Node: \section, Next: \subsection, Prev: \chapter, Up: Sectioning + +6.3 '\section' +============== + +Synopsis, one of: + + \section{TITLE} + \section*{TITLE} + \section[TOC-TITLE]{TITLE} + + Start a section. The standard LaTeX classes 'article', 'book', and +'report' all have this command. + + This produces a section. + + In this Part we tend to be more interested in the function, + in the input-output behavior, + than in the details of implementing that behavior. + + \section{Turing machines} + Despite this desire to downplay implementation, + we follow the approach of A~Turing that the + first step toward defining the set of computable functions + is to reflect on the details of what mechanisms can do. + + For the standard LaTeX classes 'book' and 'report' the default output +is like '1.2 TITLE' (for chapter 1, section 2), alone on its line and +flush left, in boldface and a larger type (the type size is '\Large'). +The same holds in 'article' except that there are no chapters in that +class so it looks like '2 TITLE'. + + The '*' form shows TITLE. But it does not show the section number, +does not increment the 'section' counter, produces no table of contents +entry, and does not affect the running header. (If you use the page +style 'headings' in a two-sided document then the header will be from +the prior section.) + + The optional argument TOC-TITLE will appear as the section title in +the table of contents (*note Table of contents etc.::) and in running +headers (*note Page styles::). If it is not present then TITLE will be +there. This shows the full name in the title of the section: + + \section[Elizabeth~II]{Elizabeth the Second, + by the Grace of God of the United Kingdom, + Canada and Her other Realms and Territories Queen, + Head of the Commonwealth, Defender of the Faith.} + +but only 'Elizabeth II' on the contents page and in the headers. This +has a line break in TITLE but that does not work with headers so it is +omitted from the contents and headers. + + \section[Truth is, I cheated; my life story]{Truth is, + I cheated\\my life story} + + For determining which sectional units are numbered and which appear +in the table of contents, the level number of a section is 1 (*note +Sectioning/secnumdepth:: and *note Sectioning/tocdepth::). + + The paragraph that follows the section title is not indented, as is a +standard typographical practice. One way to get an indent is to use the +package 'indentfirst'. + + In general, to change the behavior of the '\section' command, there +are a number of options. One is the '\@startsection' command (*note +\@startsection::). There are also many packages on CTAN that address +this, including 'titlesec'. See the documentation but the example below +gives a sense of what they can do. + + \usepackage{titlesec} % in preamble + \titleformat{\section} + {\normalfont\Large\bfseries} % format of title + {\makebox[1pc][r]{\thesection\hspace{1pc}}} % label + {0pt} % length of separation between label and title + {} % before-code hook + \titlespacing*{\section} + {-1pc}{18pt}{10pt}[10pc] + +That puts the section number in the margin. + + +File: latex2e.info, Node: \subsection, Next: \subsubsection & \paragraph & \subparagraph, Prev: \section, Up: Sectioning + +6.4 '\subsection' +================= + +Synopsis, one of: + + \subsection{TITLE} + \subsection*{TITLE} + \subsection[TOC-TITLE]{TITLE} + + Start a subsection. The standard LaTeX classes 'article', 'book', +and 'report' all have this command. + + This produces a subsection. + + We will show that there are more functions than Turing machines and that + therefore some functions have no associated machine. + + \subsection{Cardinality} We will begin with two paradoxes that + dramatize the challenge to our intuition posed by comparing the sizes of + infinite sets. + + For the standard LaTeX classes 'book' and 'report' the default output +is like '1.2.3 TITLE' (for chapter 1, section 2, subsection 3), alone on +its line and flush left, in boldface and a larger type (the type size is +'\large'). The same holds in 'article' except that there are no +chapters in that class so it looks like '2.3 TITLE'. + + The '*' form shows TITLE. But it does not show the subsection +number, does not increment the 'subsection' counter, and produces no +table of contents entry. + + The optional argument TOC-TITLE will appear as the subsection title +in the table of contents (*note Table of contents etc.::). If it is not +present then TITLE will be there. This shows the full text in the title +of the subsection: + + \subsection[$\alpha,\beta,\gamma$ paper]{\textit{The Origin of + Chemical Elements} by R.A.~Alpher, H.~Bethe, and G.~Gamow} + +but only 'α,β,γ paper' on the contents page. + + For determining which sectional units are numbered and which appear +in the table of contents, the level number of a subsection is 2 (*note +Sectioning/secnumdepth:: and *note Sectioning/tocdepth::). + + The paragraph that follows the subsection title is not indented, as +is a standard typographical practice. One way to get an indent is to +use the package 'indentfirst'. + + There are a number of ways to change the behavior of the +'\subsection' command. One is the '\@startsection' command (*note +\@startsection::). There are also many packages on CTAN that address +this, including 'titlesec'. See the documentation but the example below +gives a sense of what they can do. + + \usepackage{titlesec} % in preamble + \titleformat{\subsection}[runin] + {\normalfont\normalsize\bfseries} % format of the title + {\thesubsection} % label + {0.6em} % space between label and title + {} % before-code hook + +That puts the subsection number and TITLE in the first line of text. + + +File: latex2e.info, Node: \subsubsection & \paragraph & \subparagraph, Next: \appendix, Prev: \subsection, Up: Sectioning + +6.5 '\subsubsection', '\paragraph', '\subparagraph' +=================================================== + +Synopsis, one of: + + \subsubsection{TITLE} + \subsubsection*{TITLE} + \subsubsection[TOC-TITLE]{TITLE} + +or one of: + + \paragraph{TITLE} + \paragraph*{TITLE} + \paragraph[TOC-TITLE]{TITLE} + +or one of: + + \subparagraph{TITLE} + \subparagraph*{TITLE} + \subparagraph[TOC-TITLE]{TITLE} + + Start a subsubsection, paragraph, or subparagraph. The standard +LaTeX classes 'article', 'book', and 'report' all have these commands, +although they are not commonly used. + + This produces a subsubsection. + + \subsubsection{Piston ring compressors: structural performance} + Provide exterior/interior wall cladding assemblies + capable of withstanding the effects of load and stresses from + consumer-grade gasoline engine piston rings. + + The default output of each of the three does not change over the +standard LaTeX classes 'article', 'book', and 'report'. For +'\subsubsection' the TITLE is alone on its line, in boldface and normal +size type. For '\paragraph' the TITLE is inline with the text, not +indented, in boldface and normal size type. For '\subparagraph' the +TITLE is inline with the text, with a paragraph indent, in boldface and +normal size type (Because an 'article' has no chapters its +subsubsections are numbered and so it looks like '1.2.3 TITLE', for +section 1, subsection 2, and subsubsection 3. The other two divisions +are not numbered.) + + The '*' form shows TITLE. But it does not increment the associated +counter and produces no table of contents entry (and does not show the +number for '\subsubsection'). + + The optional argument TOC-TITLE will appear as the division title in +the table of contents (*note Table of contents etc.::). If it is not +present then TITLE will be there. + + For determining which sectional units are numbered and which appear +in the table of contents, the level number of a subsubsection is 3, of a +paragraph is 4, and of a subparagraph is 5 (*note +Sectioning/secnumdepth:: and *note Sectioning/tocdepth::). + + The paragraph that follows the subsubsection title is not indented, +as is a standard typographical practice. One way to get an indent is to +use the package 'indentfirst'. + + There are a number of ways to change the behavior of the these +commands. One is the '\@startsection' command (*note \@startsection::). +There are also many packages on CTAN that address this, including +'titlesec'. See the documentation on CTAN. + + +File: latex2e.info, Node: \appendix, Next: \frontmatter & \mainmatter & \backmatter, Prev: \subsubsection & \paragraph & \subparagraph, Up: Sectioning + +6.6 '\appendix' +=============== + +Synopsis: + + \appendix + + This does not directly produce any output. But in a book or report +it declares that subsequent '\chapter' commands start an appendix. In +an article it does the same, for '\section' commands. It also resets +the 'chapter' and 'section' counters to 0 in a book or report, and in an +article resets the 'section' and 'subsection' counters. + + In this book + + \chapter{One} ... + \chapter{Two} ... + ... + \appendix + \chapter{Three} ... + \chapter{Four} ... + +the first two will generate output numbered 'Chapter 1' and 'Chapter 2'. +After the '\appendix' the numbering will be 'Appendix A' and 'Appendix +B'. *Note Larger book template::, for another example. + + The 'appendix' package adds the command '\appendixpage' to put a +separate 'Appendices' in the document body before the first appendix, +and the command '\addappheadtotoc' to do the same in the table of +contents. You can reset the name 'Appendices' with a command like +'\renewcommand{\appendixname}{Specification}', as well as a number of +other features. See the documentation on CTAN. + + +File: latex2e.info, Node: \frontmatter & \mainmatter & \backmatter, Next: \@startsection, Prev: \appendix, Up: Sectioning + +6.7 '\frontmatter', '\mainmatter', '\backmatter' +================================================ + +Synopsis, one or more of: + + \frontmatter + ... + \mainmatter + ... + \backmatter + ... + + Format a 'book' class document differently according to which part of +the document is being produced. All three commands are optional. + + Traditionally, a book's front matter contains such things as the +title page, an abstract, a table of contents, a preface, a list of +notations, a list of figures, and a list of tables. (Some of these +front matter pages, such as the title page, are traditionally not +numbered.) The back matter may contain such things as a glossary, +notes, a bibliography, and an index. + + The '\frontmatter' command makes the pages numbered in lowercase +roman, and makes chapters not numbered, although each chapter's title +appears in the table of contents; if you use other sectioning commands +here, use the '*'-version (*note Sectioning::). + + The '\mainmatter' command changes the behavior back to the expected +version, and resets the page number. + + The '\backmatter' command leaves the page numbering alone but +switches the chapters back to being not numbered. + + *Note Larger book template::, for an example using these three +commands. + + +File: latex2e.info, Node: \@startsection, Prev: \frontmatter & \mainmatter & \backmatter, Up: Sectioning + +6.8 '\@startsection', typesetting sectional unit headings +========================================================= + +Synopsis: + + \@startsection{NAME}{LEVEL}{INDENT}{BEFORESKIP}{AFTERSKIP}{STYLE} + + Used to help redefine the behavior of commands that start sectioning +divisions such as '\section' or '\subsection'. + + Note that the 'titlesec' package makes manipulation of sectioning +easier. Further, while most requirements for sectioning commands can be +satisfied with '\@startsection', some cannot. For instance, in the +standard LaTeX 'book' and 'report' classes the commands '\chapter' and +'\report' are not constructed in this way. To make such a command you +may want to use the '\secdef' command. + + Technically, '\@startsection' has the form + + \@startsection{NAME} + {LEVEL} + {INDENT} + {BEFORESKIP} + {AFTERSKIP} + {STYLE}*[TOCTITLE]{TITLE} + +so that issuing + + \renewcommand{\section}{\@startsection{NAME} + {LEVEL} + {INDENT} + {BEFORESKIP} + {AFTERSKIP} + {STYLE}} + +redefines '\section' while keeping its standard calling form +'\section*[TOCTITLE]{TITLE}' (in which, for reminder, the star '*' is +optional). *Note Sectioning::. This implies that when you write a +command like '\renewcommand{\section}{...}', the '\@startsection{...}' +must come last in the definition. See the examples below. + +NAME + Name of the counter used to number the sectioning header. This + counter must be defined separately. Most commonly this is either + 'section', 'subsection', or 'paragraph'. Although in those cases + the counter name is the same as the sectioning command itself, you + don't have to use the same name. + + Then '\the'NAME displays the title number and '\'NAME'mark' is for + the page headers. See the third example below. + +LEVEL + An integer giving the depth of the sectioning command. *Note + Sectioning::, for the list of standard level numbers. + + If LEVEL is less than or equal to the value of the counter + 'secnumdepth' then titles for this sectioning command will be + numbered (*note Sectioning/secnumdepth::). For instance, if + 'secnumdepth' is 1 in an 'article' then the command + '\section{Introduction}' will produce output like "1 Introduction" + while '\subsection{Discussion}' will produce output like + "Discussion", without the number prefix. + + If LEVEL is less than or equal to the value of the counter TOCDEPTH + then the table of contents will have an entry for this sectioning + unit (*note Sectioning/tocdepth::). For instance, in an 'article', + if TOCDEPTH is 1 then the table of contents will list sections but + not subsections. + +INDENT + A length giving the indentation of all of the title lines with + respect to the left margin. To have the title flush with the + margin use '0pt'. A negative indentation such as '-\parindent' + will move the title into the left margin. + +BEFORESKIP + The absolute value of this length is the amount of vertical space + that is inserted before this sectioning unit's title. This space + will be discarded if the sectioning unit happens to start at the + beginning of a page. If this number is negative then the first + paragraph following the header is not indented, if it is + non-negative then the first paragraph is indented. (Note that the + negative of '1pt plus 2pt minus 3pt' is '-1pt plus -2pt minus + -3pt'.) + + For example, if BEFORESKIP is '-3.5ex plus -1ex minus -0.2ex' then + to start the new sectioning unit, LaTeX will add about 3.5 times + the height of a letter x in vertical space, and the first paragraph + in the section will not be indented. Using a rubber length, with + 'plus' and 'minus', is good practice here since it gives LaTeX more + flexibility in making up the page (*note Lengths::). + + The full accounting of the vertical space between the baseline of + the line prior to this sectioning unit's header and the baseline of + the header is that it is the sum of the '\parskip' of the text + font, the '\baselineskip' of the title font, and the absolute value + of the BEFORESKIP. This space is typically rubber so it may + stretch or shrink. (If the sectioning unit starts on a fresh page + so that the vertical space is discarded then the baseline of the + header text will be where LaTeX would put the baseline of the first + text line on that page.) + +AFTERSKIP + This is a length. If AFTERSKIP is non-negative then this is the + vertical space inserted after the sectioning unit's title header. + If it is negative then the title header becomes a run-in header, so + that it becomes part of the next paragraph. In this case the + absolute value of the length gives the horizontal space between the + end of the title and the beginning of the following paragraph. + (Note that the negative of '1pt plus 2pt minus 3pt' is '-1pt plus + -2pt minus -3pt'.) + + As with BEFORESKIP, using a rubber length, with 'plus' and 'minus' + components, is good practice here since it gives LaTeX more + flexibility in putting together the page. + + If 'afterskip' is non-negative then the full accounting of the + vertical space between the baseline of the sectioning unit's header + and the baseline of the first line of the following paragraph is + that it is the sum of the '\parskip' of the title font, the + '\baselineskip' of the text font, and the value of AFTER. That + space is typically rubber so it may stretch or shrink. (Note that + because the sign of 'afterskip' changes the sectioning unit + header's from standalone to run-in, you cannot use a negative + 'afterskip' to cancel part of the '\parskip'.) + +STYLE + Controls the styling of the title. See the examples below. + Typical commands to use here are '\centering', '\raggedright', + '\normalfont', '\hrule', or '\newpage'. The last command in STYLE + may be one that takes one argument, such as '\MakeUppercase' or + '\fbox' that takes one argument. The section title will be + supplied as the argument to this command. For instance, setting + STYLE to '\bfseries\MakeUppercase' would produce titles that are + bold and uppercase. + + These are LaTeX's defaults for the first three sectioning units that +are defined with '\@startsection', for the 'article', 'book', and +'report' classes. + + * For 'section': LEVEL is 1, INDENT is 0pt, BEFORESKIP is '-3.5ex + plus -1ex minus -0.2ex', AFTERSKIP is '2.3ex plus 0.2ex', and STYLE + is '\normalfont\Large\bfseries'. + + * For 'subsection': LEVEL is 2, INDENT is 0pt, BEFORESKIP is '-3.25ex + plus -1ex minus -0.2ex', AFTERSKIP is '1.5ex plus 0.2ex', and STYLE + is '\normalfont\large\bfseries'. + + * For 'subsubsection': LEVEL is 3, INDENT is 0pt, BEFORESKIP is + '-3.25ex plus -1ex minus -0.2ex', AFTERSKIP is '1.5ex plus 0.2ex', + and STYLE is '\normalfont\normalsize\bfseries'. + + Some examples follow. These go either in a package or class file or +in the preamble of a LaTeX document. If you put them in the preamble +they must go between a '\makeatletter' command and a '\makeatother'. +(Probably the error message 'You can't use `\spacefactor' in vertical +mode.' means that you forgot this.) *Note \makeatletter & +\makeatother::. + + This will put section titles in large boldface type, centered. It +says '\renewcommand' because LaTeX's standard classes have already +defined a '\section'. For the same reason it does not define a +'section' counter, or the commands '\thesection' and '\l@section'. + + \renewcommand\section{% + \@startsection{section}% *note NAME: \@startsection/name. + {1}% *note LEVEL: \@startsection/level. + {0pt}% *note INDENT: \@startsection/indent. + {-3.5ex plus -1ex minus -.2ex}% *note BEFORESKIP: \@startsection/beforeskip. + {2.3ex plus.2ex}% *note AFTERSKIP: \@startsection/afterskip. + {\centering\normalfont\Large\bfseries}% *note STYLE: \@startsection/style. + } + + This will put 'subsection' titles in small caps type, inline with the +paragraph. + + \renewcommand\subsection{% + \@startsection{subsection}% *note NAME: \@startsection/name. + {2}% *note LEVEL: \@startsection/level. + {0em}% *note INDENT: \@startsection/indent. + {-1ex plus 0.1ex minus -0.05ex}% *note BEFORESKIP: \@startsection/beforeskip. + {-1em plus 0.2em}% *note AFTERSKIP: \@startsection/afterskip. + {\scshape}% *note STYLE: \@startsection/style. + } + + The prior examples redefined existing sectional unit title commands. +This defines a new one, illustrating the needed counter and macros to +display that counter. + + \setcounter{secnumdepth}{6}% show counters this far down + \newcounter{subsubparagraph}[subparagraph]% counter for numbering + \renewcommand{\thesubsubparagraph}% how to display + {\thesubparagraph.\@arabic\c@subsubparagraph}% numbering + \newcommand{\subsubparagraph}{\@startsection + {subsubparagraph}% + {6}% + {0em}% + {\baselineskip}% + {0.5\baselineskip}% + {\normalfont\normalsize}} + \newcommand*\l@subsubparagraph{\@dottedtocline{6}{10em}{5em}}% for toc + \newcommand{\subsubparagraphmark}[1]{}% for page headers + + +File: latex2e.info, Node: Cross references, Next: Environments, Prev: Sectioning, Up: Top + +7 Cross references +****************** + +We often want something like 'See Theorem~31'. But by-hand typing the +31 is poor practice. Instead you should write a "label" such as +'\label{eq:GreensThm}' and then "reference" it, as with 'See +equation~\ref{eq:GreensThm}'. LaTeX will automatically work out the +number, put it into the output, and will change that number later if +needed. + + We will see this with Theorem~\ref{th:GreensThm}. % forward reference + ... + \begin{theorem} \label{th:GreensThm} + ... + \end{theorem} + ... + See Theorem~\ref{th:GreensThm} on page~\pageref{th:GreensThm}. + + LaTeX tracks cross reference information in a file having the +extension '.aux' and with the same base name as the file containing the +'\label'. So if '\label' is in 'calculus.tex' then the information is +in 'calculus.aux'. LaTeX puts the information in that file every time +it runs across a '\label'. + + The behavior described in the prior paragraph results in a quirk that +happens when your document has a "forward reference", a '\ref' that +appears before the associated '\label'. If this is the first time that +you are compiling the document then you will get 'LaTeX Warning: +Label(s) may have changed. Rerun to get cross references right' and in +the output the forward reference will appear as two question marks '??', +in boldface. A similar thing happens if you change some things so the +references changes; you get the same warning and the output contains the +old reference information. In both cases, resolve this by compiling the +document a second time. + + The 'cleveref' package enhances LaTeX's cross referencing features. +You can arrange that if you enter +'\begin{thm}\label{th:Nerode}...\end{thm}' then '\cref{th:Nerode}' will +output 'Theorem 3.21', without you having to enter the "Theorem." + +* Menu: + +* \label:: Assign a symbolic name to a piece of text. +* \pageref:: Refer to a page number. +* \ref:: Refer to a section, figure or similar. +* xr package:: References from another document. + + +File: latex2e.info, Node: \label, Next: \pageref, Up: Cross references + +7.1 '\label' +============ + +Synopsis: + + \label{KEY} + + Assign a reference number to KEY. In ordinary text '\label{KEY}' +assigns to KEY the number of the current sectional unit. Inside an +environment with numbering, such as a 'table' or 'theorem' environment, +'\label{KEY}' assigns to KEY the number of that environment. Retrieve +the assigned number with the '\ref{KEY}' command (*note \ref::). + + A key name can consist of any sequence of letters, digits, or common +punctuation characters. Upper and lowercase letters are distinguished, +as usual. + + A common convention is to use labels consisting of a prefix and a +suffix separated by a colon or period. Thus, '\label{fig:Post}' is a +label for a figure with a portrait of Emil Post. This helps to avoid +accidentally creating two labels with the same name, and makes your +source more readable. Some commonly-used prefixes: + +'ch' + for chapters + +'sec' +'subsec' + for lower-level sectioning commands + +'fig' + for figures + +'tab' + for tables + +'eq' + for equations + + In the auxiliary file the reference information is kept as the text +of a command of the form '\newlabel{LABEL}{{CURRENTLABEL}{PAGENUMBER}}'. +Here CURRENTLABEL is the current value of the macro '\@currentlabel' +that is usually updated whenever you call '\refstepcounter{COUNTER}'. + + Below, the key 'sec:test' will get the number of the current section +and the key 'fig:test' will get the number of the figure. +(Incidentally, put labels after captions in figures and tables.) + + \section{section name} + \label{sec:test} + This is Section~\ref{sec:test}. + \begin{figure} + ... + \caption{caption text} + \label{fig:test} + \end{figure} + See Figure~\ref{fig:test}. + + +File: latex2e.info, Node: \pageref, Next: \ref, Prev: \label, Up: Cross references + +7.2 '\pageref' +============== + +Synopsis: + + \pageref{KEY} + + Produce the page number of the place in the text where the +corresponding '\label'{KEY} command appears. + + If there is no '\label{KEY}' then you get something like 'LaTeX +Warning: Reference `th:GrensThm' on page 1 undefined on input line 11.' + + Below, the '\label{eq:main}' is used both for the formula number and +for the page number. (Note that the two references are forward +references so this document would need to be compiled twice to resolve +those.) + + The main result is formula~\ref{eq:main} on page~\pageref{eq:main}. + ... + \begin{equation} \label{eq:main} + \mathbf{P}=\mathbf{NP} + \end{equation} + + +File: latex2e.info, Node: \ref, Next: xr package, Prev: \pageref, Up: Cross references + +7.3 '\ref' +========== + +Synopsis: + + \ref{KEY} + + Produces the number of the sectional unit, equation, footnote, +figure, ..., of the corresponding '\label' command (*note \label::). It +does not produce any text, such as the word 'Section' or 'Figure', just +the bare number itself. + + If there is no '\label{KEY}' then you get something like 'LaTeX +Warning: Reference `th:GrensThm' on page 1 undefined on input line 11.' + + In this example the '\ref{popular}' produces '2'. Note that it is a +forward reference since it comes before '\label{popular}' so this +document would have to be compiled twice. + + The most widely-used format is item number~\ref{popular}. + \begin{enumerate} + \item Plain \TeX + \item \label{popular} \LaTeX + \item Con\TeX t + \end{enumerate} + + The 'cleveref' package includes text such as 'Theorem' in the +reference. See the documentation on CTAN. + + +File: latex2e.info, Node: xr package, Prev: \ref, Up: Cross references + +7.4 'xr' package +================ + +Synopsis: + + \usepackage{xr} + \externaldocument{DOCUMENT-BASENAME} + +or + + \usepackage{xr} + \externaldocument[REFERENCE-PREFIX]{DOCUMENT-BASENAME} + + Make cross references to the external document +'DOCUMENT-BASENAME.tex'. + + Here is an example. If 'lectures.tex' has this in the preamble + + \usepackage{xr} + \externaldocument{exercises} + \externaldocument[H-]{hints} + \externaldocument{answers} + +then it can use cross reference labels from the other three documents. +Suppose that 'exercises.tex' has an enumerated list containing this, + + \item \label{exer:EulersThm} What if every vertex has odd degree? + +and 'hints.tex' has an enumerated list with this, + + \item \label{exer:EulersThm} Distinguish the case of two vertices. + +and 'answers.tex' has an enumerated list with this, + + \item \label{ans:EulersThm} There is no Euler path, except if there + are exactly two vertices. + + After compiling the exercises, hints, and answers documents, entering +this in the body of 'lectures.tex' will result in the lectures getting +the reference numbers used in the other documents. + + See Exercise~\ref{exer:EulersThm}, with Hint~\ref{H-exer:EulersThm}. + The solution is Answer~\ref{ans:EulersThm}. + + The prefix 'H-' for the reference from the hints file is needed +because the label in the hints file is the same as the label in the +exercises file. Without that prefix, both references would get the +number from the later file. + + Note: if the document uses the 'hyperref' package then in place of +'xr', put '\usepackage{xr-hyper}' before the '\usepackage{hyperref}'. +Also, if any of the multiple documents uses 'hyperref' then they all +must use it. + + +File: latex2e.info, Node: Environments, Next: Line breaking, Prev: Cross references, Up: Top + +8 Environments +************** + +LaTeX provides many environments for delimiting certain behavior. An +environment begins with '\begin' and ends with '\end', like this: + + \begin{ENVIRONMENT-NAME} + ... + \end{ENVIRONMENT-NAME} + + The ENVIRONMENT-NAME at the beginning must exactly match that at the +end. For instance, the input '\begin{table*}...\end{table}' will cause +an error like: '! LaTeX Error: \begin{table*} on input line 5 ended by +\end{table}.' + + Environments are executed within a group. + +* Menu: + +* abstract:: Produce an abstract. +* array:: Math arrays. +* center:: Centered lines. +* description:: Labelled lists. +* displaymath:: Formulas that appear on their own line. +* document:: Enclose the whole document. +* enumerate:: Numbered lists. +* eqnarray:: Sequences of aligned equations. +* equation:: Displayed equation. +* figure:: Floating figures. +* filecontents:: Writing multiple files from the source. +* flushleft:: Flushed left lines. +* flushright:: Flushed right lines. +* itemize:: Bulleted lists. +* letter:: Letters. +* list:: Generic list environment. +* math:: In-line math. +* minipage:: Miniature page. +* picture:: Picture with text, arrows, lines and circles. +* quotation & quote:: Include a quotation. +* tabbing:: Align text arbitrarily. +* table:: Floating tables. +* tabular:: Align text in columns. +* thebibliography:: Bibliography or reference list. +* theorem:: Theorems, lemmas, etc. +* titlepage:: For hand crafted title pages. +* verbatim:: Simulating typed input. +* verse:: For poetry and other things. + + +File: latex2e.info, Node: abstract, Next: array, Up: Environments + +8.1 'abstract' +============== + +Synopsis: + + \begin{abstract} + ... + \end{abstract} + + Produce an abstract, possibly of multiple paragraphs. This +environment is only defined in the 'article' and 'report' document +classes (*note Document classes::). + + Using the example below in the 'article' class produces a displayed +paragraph. Document class option 'titlepage' causes the abstract to be +on a separate page (*note Document class options::); this is the default +only in the 'report' class. + + \begin{abstract} + We compare all known accounts of the proposal made by Porter Alexander + to Robert E Lee at the Appomattox Court House that the army continue + in a guerrilla war, which Lee refused. + \end{abstract} + + The next example produces a one column abstract in a two column +document (for a more flexible solution, use the package 'abstract'). + + \documentclass[twocolumn]{article} + ... + \begin{document} + \title{Babe Ruth as Cultural Progenitor: a Atavistic Approach} + \author{Smith \\ Jones \\ Robinson\thanks{Railroad tracking grant.}} + \twocolumn[ + \begin{@twocolumnfalse} + \maketitle + \begin{abstract} + Ruth was not just the Sultan of Swat, he was the entire swat + team. + \end{abstract} + \end{@twocolumnfalse} + ] + { % by-hand insert a footnote at page bottom + \renewcommand{\thefootnote}{\fnsymbol{footnote}} + \footnotetext[1]{Thanks for all the fish.} + } + + +File: latex2e.info, Node: array, Next: center, Prev: abstract, Up: Environments + +8.2 'array' +=========== + +Synopsis: + + \begin{array}{COLS} + COLUMN 1 ENTRY &COLUMN 2 ENTRY ... &COLUMN N ENTRY \\ + ... + \end{array} + +or: + + \begin{array}[POS]{COLS} + COLUMN 1 ENTRY &COLUMN 2 ENTRY ... &COLUMN N ENTRY \\ + ... + \end{array} + + Produce a mathematical array. This environment can only be used in +math mode (*note Modes::), and normally appears within a displayed +mathematics environment such as 'equation' (*note equation::). Inside +of each row the column entries are separated by an ampersand, ('&'). +Rows are terminated with double-backslashes (*note \\::). + + This example shows a three by three array. + + \begin{equation*} + \chi(x) = + \left| % vertical bar fence + \begin{array}{ccc} + x-a &-b &-c \\ + -d &x-e &-f \\ + -g &-h &x-i + \end{array} + \right| + \end{equation*} + + The required argument COLS describes the number of columns, their +alignment, and the formatting of the intercolumn regions. For instance, +'\begin{array}{rcl}...\end{array}' gives three columns: the first flush +right, the second centered, and the third flush left. See *note +tabular:: for the complete description of COLS and of the other common +features of the two environments, including the optional POS argument. + + There are two ways that 'array' diverges from 'tabular'. The first +is that 'array' entries are typeset in math mode, in textstyle (*note +Math styles::) except if the COLS definition specifies the column with +'p{...}', which causes the entry to be typeset in text mode. The second +is that, instead of 'tabular''s parameter '\tabcolsep', LaTeX's +intercolumn space in an 'array' is governed by '\arraycolsep', which +gives half the width between columns. The default for this is '5pt' so +that between two columns comes 10pt of space. + + To obtain arrays with braces the standard is to use the 'amsmath' +package. It comes with environments 'pmatrix' for an array surrounded +by parentheses '(...)', 'bmatrix' for an array surrounded by square +brackets '[...]', 'Bmatrix' for an array surrounded by curly +braces '{...}', 'vmatrix' for an array surrounded by vertical +bars '|...|', and 'Vmatrix' for an array surrounded by double vertical +bars '||...||', along with a number of other array constructs. + + The next example uses the 'amsmath' package. + + \usepackage{amsmath} % in preamble + + \begin{equation} + \begin{vmatrix}{cc} % array with vert lines + a &b \\ + c &d + \end{vmatrix}=ad-bc + \end{equation} + + There are many packages concerning arrays. The 'array' package has +many useful extensions, including more column types. The 'dcolumn' +package adds a column type to center on a decimal point. For both see +the documentation on CTAN. + + +File: latex2e.info, Node: center, Next: description, Prev: array, Up: Environments + +8.3 'center' +============ + +Synopsis: + + \begin{center} + LINE1 \\ + LINE2 \\ + ... + \end{center} + + Create a new paragraph consisting of a sequence of lines that are +centered within the left and right margins. Use double-backslash, '\\', +to get a line break (*note \\::). If some text is too long to fit on a +line then LaTeX will insert line breaks that avoid hyphenation and avoid +stretching or shrinking any interword space. + + This environment inserts space above and below the text body. See +*note \centering:: to avoid such space, for example inside a 'figure' +environment. + + This example produces three centered lines. There is extra vertical +space between the last two lines. + + \begin{center} + A Thesis Submitted in Partial Fufillment \\ + of the Requirements of \\[0.5ex] + the School of Environmental Engineering + \end{center} + + In this example, depending on the page's line width, LaTeX may choose +a line break for the part before the double backslash. If so, it will +center each of the two lines and if not it will center the single line. +Then LaTeX will break at the double backslash, and will center the +ending. + + \begin{center} + My father considered that anyone who went to chapel and didn't drink + alcohol was not to be tolerated.\\ + I grew up in that belief. ---Richard Burton + \end{center} + + A double backslash after the final line is optional. If present it +doesn't add any vertical space. + + In a two-column document the text is centered in a column, not in the +entire page. + +* Menu: + +* \centering:: Declaration form of the 'center' environment. + + +File: latex2e.info, Node: \centering, Up: center + +8.3.1 '\centering' +------------------ + +Synopsis: + + {\centering ... } + +or + + \begin{group} + \centering ... + \end{group} + + Center the material in its scope. It is most often used inside an +environment such as 'figure', or in a 'parbox'. + + This example's '\centering' declaration causes the graphic to be +horizontally centered. + + \begin{figure} + \centering + \includegraphics[width=0.6\textwidth]{ctan_lion.png} + \caption{CTAN Lion} \label{fig:CTANLion} + \end{figure} + +The scope of this '\centering' ends with the '\end{figure}'. + + Unlike the 'center' environment, the '\centering' command does not +add vertical space above and below the text. That's its advantage in +the above example; there is not an excess of space. + + It also does not start a new paragraph; it simply changes how LaTeX +formats paragraph units. If 'ww {\centering xx \\ yy} zz' is surrounded +by blank lines then LaTeX will create a paragraph whose first line 'ww +xx' is centered and whose second line, not centered, contains 'yy zz'. +Usually what is desired is for the scope of the declaration to contain a +blank line or the '\end' command of an environment such as 'figure' or +'table' that ends the paragraph unit. Thus, if '{\centering xx \\ +yy\par} zz' is surrounded by blank lines then it makes a new paragraph +with two centered lines 'xx' and 'yy', followed by a new paragraph with +'zz' that is formatted as usual. + + +File: latex2e.info, Node: description, Next: displaymath, Prev: center, Up: Environments + +8.4 'description' +================= + +Synopsis: + + \begin{description} + \item[LABEL OF FIRST ITEM] TEXT OF FIRST ITEM + \item[LABEL OF SECOND ITEM] TEXT OF SECOND ITEM + ... + \end{description} + + Environment to make a list of labeled items. Each item's LABEL is +typeset in bold and is flush left, so that long labels continue into the +first line of the item text. There must be at least one item; having +none causes the LaTeX error 'Something's wrong--perhaps a missing +\item'. + + This example shows the environment used for a sequence of +definitions. + + \begin{description} + \item[lama] A priest. + \item[llama] A beast. + \end{description} + +The labels 'lama' and 'llama' are output in boldface, with the left edge +on the left margin. + + Start list items with the '\item' command (*note \item::). Use the +optional labels, as in '\item[Main point]', because there is no sensible +default. Following the '\item' is optional text, which may contain +multiple paragraphs. + + Since the labels are in bold style, if the label text calls for a +font change given in argument style (see *note Font styles::) then it +will come out bold. For instance, if the label text calls for +typewriter with '\item[\texttt{label text}]' then it will appear in bold +typewriter, if that is available. The simplest way around this, in this +example to get non-bold typewriter, is to use declarative style: +'\item[{\tt label text}]'. Similarly, get the standard roman font with +'\item[{\rm label text}]'. + + For other major LaTeX labelled list environments, see *note itemize:: +and *note enumerate::. Unlike those environments, nesting 'description' +environments does not change the default label; it is boldface and flush +left at all levels. + + For information about list layout parameters, including the default +values, and for information about customizing list layout, see *note +list::. The package 'enumitem' is useful for customizing lists. + + This example changes the description labels to small caps. + + \renewcommand{\descriptionlabel}[1]{% + {\hspace{\labelsep}\textsc{#1}}} + + +File: latex2e.info, Node: displaymath, Next: document, Prev: description, Up: Environments + +8.5 'displaymath' +================= + +Synopsis: + + \begin{displaymath} + MATHEMATICAL TEXT + \end{displaymath} + + Environment to typeset the MATHEMATICAL TEXT on its own line, in +display style and centered. To make the text be flush-left use the +global option 'fleqn'; see *note Document class options::. + + In the 'displaymath' environment no equation number is added to the +math text. One way to get an equation number is to use the 'equation' +environment (*note equation::). + + LaTeX will not break the MATH TEXT across lines. + + Note that the 'amsmath' package has significantly more extensive +displayed equation facilities. For example, there are a number of ways +in that package for having math text broken across lines. + + The construct '\[MATH\]' is a synonym for the environment +'\begin{displaymath} MATH \end{displaymath}' but the latter is easier to +work with in the source; for instance, searching for a square bracket +may get false positives but the word 'displaymath' will likely be +unique. + + (Aside: The construct '$$MATH$$' from Plain TeX is sometimes +mistakenly used as a synonym for 'displaymath'. It is not a synonym, +and is not officially supported in LaTeX at all; '$$' doesn't support +the 'fleqn' option (*note Document class options::), has different +vertical spacing, and doesn't perform consistency checks.) + + The output from this example is centered and alone on its line. + + \begin{displaymath} + \int_1^2 x^2\,dx=7/3 + \end{displaymath} + +Also, the integral sign is larger than the inline version '\( \int_1^2 +x^2\,dx=7/3 \)' produces. + + +File: latex2e.info, Node: document, Next: enumerate, Prev: displaymath, Up: Environments + +8.6 'document' +============== + +The 'document' environment encloses the entire body of a document. It +is required in every LaTeX document. *Note Starting and ending::. + +* Menu: + +* \AtBeginDocument:: Hook for commands at the start of the document. +* \AtEndDocument:: Hook for commands at the end of the document. + + +File: latex2e.info, Node: \AtBeginDocument, Next: \AtEndDocument, Up: document + +8.6.1 '\AtBeginDocument' +------------------------ + +Synopsis: + + \AtBeginDocument{CODE} + + Save CODE and execute it when '\begin{document}' is executed, at the +very end of the preamble. The code is executed after the font selection +tables have been set up, so the normal font for the document is the +current font. However, the code is executed as part of the preamble so +you cannot do any typesetting with it. + + You can issue this command more than once; the successive code lines +will be executed in the order that you gave them. + + +File: latex2e.info, Node: \AtEndDocument, Prev: \AtBeginDocument, Up: document + +8.6.2 '\AtEndDocument' +---------------------- + +Synopsis: + + \AtEndDocument{CODE} + + Save CODE and execute it near the end of the document. Specifically, +it is executed when '\end{document}' is executed, before the final page +is finished and before any leftover floating environments are processed. +If you want some of the code to be executed after these two processes +then include a '\clearpage' at the appropriate point in CODE. + + You can issue this command more than once; the successive code lines +will be executed in the order that you gave them. + + +File: latex2e.info, Node: enumerate, Next: eqnarray, Prev: document, Up: Environments + +8.7 'enumerate' +=============== + +Synopsis: + + \begin{enumerate} + \item[OPTIONAL LABEL OF FIRST ITEM] TEXT OF FIRST ITEM + \item[OPTIONAL LABEL OF SECOND ITEM] TEXT OF SECOND ITEM + ... + \end{enumerate} + + Environment to produce a numbered list of items. The format of the +label numbering depends on the nesting level of this environment; see +below. The default top-level numbering is '1.', '2.', etc. Each +'enumerate' list environment must have at least one item; having none +causes the LaTeX error 'Something's wrong--perhaps a missing \item'. + + This example gives the first two finishers in the 1908 Olympic +marathon. As a top-level list the labels would come out as '1.' and +'2.'. + + \begin{enumerate} + \item Johnny Hayes (USA) + \item Charles Hefferon (RSA) + \end{enumerate} + + Start list items with the '\item' command (*note \item::). If you +give '\item' an optional argument by following it with square brackets, +as in '\item[Interstitial label]', then the next item will continue the +interrupted sequence (*note \item::). That is, you will get labels like +'1.', then 'Interstitial label', then '2.'. Following the '\item' is +optional text, which may contain multiple paragraphs. + + Enumerations may be nested within other 'enumerate' environments, or +within any paragraph-making environment such as 'itemize' (*note +itemize::), up to four levels deep. This gives LaTeX's default for the +format at each nesting level, where 1 is the top level, the outermost +level. + + 1. arabic number followed by a period: '1.', '2.', ... + 2. lowercase letter inside parentheses: '(a)', '(b)' ... + 3. lowercase roman numeral followed by a period: 'i.', 'ii.', ... + 4. uppercase letter followed by a period: 'A.', 'B.', ... + + The 'enumerate' environment uses the counters '\enumi' through +'\enumiv' (*note Counters::). + + For other major LaTeX labeled list environments, see *note +description:: and *note itemize::. For information about list layout +parameters, including the default values, and for information about +customizing list layout, see *note list::. The package 'enumitem' is +useful for customizing lists. + + To change the format of the label use '\renewcommand' (*note +\newcommand & \renewcommand::) on the commands '\labelenumi' through +'\labelenumiv'. For instance, this first level list will be labelled +with uppercase letters, in boldface, and without a trailing period. + + \renewcommand{\labelenumi}{\textbf{\Alph{enumi}}} + \begin{enumerate} + \item Shows as boldface A + \item Shows as boldface B + \end{enumerate} + + For a list of counter-labeling commands see *note \alph \Alph \arabic +\roman \Roman \fnsymbol::. + + +File: latex2e.info, Node: eqnarray, Next: equation, Prev: enumerate, Up: Environments + +8.8 'eqnarray' +============== + +The 'eqnarray' environment is obsolete. It has infelicities, including +spacing that is inconsistent with other mathematics elements. (See +"Avoid eqnarray!" by Lars Madsen +). New documents should +include the 'amsmath' package and use the displayed mathematics +environments provided there, such as the 'align' environment. We +include a description only for completeness and for working with old +documents. + + Synopsis: + + \begin{eqnarray} + FIRST FORMULA LEFT &FIRST FORMULA MIDDLE &FIRST FORMULA RIGHT \\ + ... + \end{eqnarray} + +or + + \begin{eqnarray*} + FIRST FORMULA LEFT &FIRST FORMULA MIDDLE &FIRST FORMULA RIGHT \\ + ... + \end{eqnarray*} + + Display a sequence of equations or inequalities. The left and right +sides are typeset in display mode, while the middle is typeset in text +mode. + + It is similar to a three-column 'array' environment, with items +within a row separated by an ampersand ('&'), and with rows separated by +double backslash '\\'). The starred form of line break ('\\*') can +also be used to separate equations, and will disallow a page break there +(*note \\::). + + The unstarred form 'eqnarray' places an equation number on every line +(using the 'equation' counter), unless that line contains a '\nonumber' +command. The starred form 'eqnarray*' omits equation numbering, while +otherwise being the same. + + The command '\lefteqn' is used for splitting long formulas across +lines. It typesets its argument in display style flush left in a box of +zero width. + + This example shows three lines. The first two lines make an +inequality, while the third line has not entry on the left side. + + \begin{eqnarray*} + \lefteqn{x_1+x_2+\cdots+x_n} \\ + &\leq &y_1+y_2+\cdots+y_n \\ + &= &z+y_3+\cdots+y_n + \end{eqnarray*} + + +File: latex2e.info, Node: equation, Next: figure, Prev: eqnarray, Up: Environments + +8.9 'equation' +============== + +Synopsis: + + \begin{equation} + MATHEMATICAL TEXT + \end{equation} + + The same as a 'displaymath' environment (*note displaymath::) except +that LaTeX puts an equation number flush to the right margin. The +equation number is generated using the 'equation' counter. + + You should have no blank lines between '\begin{equation}' and +'\begin{equation}', or LaTeX will tell you that there is a missing +dollar sign. + + The package 'amsmath' package has extensive displayed equation +facilities. New documents should include this package. + + +File: latex2e.info, Node: figure, Next: filecontents, Prev: equation, Up: Environments + +8.10 'figure' +============= + +Synopsis: + + \begin{figure}[PLACEMENT] + FIGURE BODY + \caption[LOFTITLE]{TITLE} % optional + \label{LABEL} % optional + \end{figure} + +or: + + \begin{figure*}[PLACEMENT] + FIGURE BODY + \caption[LOFTITLE]{TITLE} % optional + \label{LABEL} % optional + \end{figure*} + + Figures are for material that is not part of the normal text. An +example is material that you cannot have split between two pages, such +as a graphic. Because of this, LaTeX does not typeset figures in +sequence with normal text but instead "floats" them to a convenient +place, such as the top of a following page (*note Floats::). + + The FIGURE BODY can consist of imported graphics (*note Graphics::), +or text, LaTeX commands, etc. It is typeset in a 'parbox' of width +'\textwidth'. + + The possible values of PLACEMENT are 'h' for 'here', 't' for 'top', +'b' for 'bottom', and 'p' for 'on a separate page of floats'. For the +effect of these options on the float placement algorithm, see *note +Floats::. + + The starred form 'figure*' is used when a document is in +double-column mode (*note \twocolumn::). It produces a figure that +spans both columns, at the top of the page. To add the possibility of +placing at a page bottom see the discussion of PLACEMENT 'b' in *note +Floats::. + + The label is optional; it is used for cross references (*note Cross +references::). The optional '\caption' command specifies caption text +for the figure (*note \caption::). By default it is numbered. If +LOFTITLE is present, it is used in the list of figures instead of TITLE +(*note Table of contents etc.::). + + This example makes a figure out of a graphic. LaTeX will place that +graphic and its caption at the top of a page or, if it is pushed to the +end of the document, on a page of floats. + + \usepackage{graphicx} % in preamble + ... + \begin{figure}[t] + \centering + \includegraphics[width=0.5\textwidth]{CTANlion.png} + \caption{The CTAN lion, by Duane Bibby} + \end{figure} + + +File: latex2e.info, Node: filecontents, Next: flushleft, Prev: figure, Up: Environments + +8.11 'filecontents' +=================== + +Synopsis: + + \begin{filecontents}[OPTION]{FILENAME} + TEXT + \end{filecontents} + +or + + \begin{filecontents*}[OPTION]{FILENAME} + TEXT + \end{filecontents*} + + Create a file named FILENAME in the current directory (or the output +directory, if specified; *note output directory::) and write TEXT to it. +By default, an existing file is not overwritten. + + The unstarred version of the environment 'filecontents' prefixes the +content of the created file with a header of TeX comments; see the +example below. The starred version 'filecontents*' does not include the +header. + + The possible options are: + +'force' +'overwrite' + Overwrite an existing file. + +'noheader' + Omit the header. Equivalent to using 'filecontents*'. + +'nosearch' + Only check the current directory (and the output directory, if + specified) for an existing file, not the entire search path. + + These options were added in a 2019 release of LaTeX. + + This environment can be used anywhere in the preamble, although it +often appears before the '\documentclass' command. It is commonly used +to create a '.bib' or other such data file from the main document +source, to make the source file self-contained. Similarly, it can be +used to create a custom style or class file, again making the source +self-contained. + + For example, this document: + + \documentclass{article} + \begin{filecontents}{JH.sty} + \newcommand{\myname}{Jim Hef{}feron} + \end{filecontents} + \usepackage{JH} + \begin{document} + Article by \myname. + \end{document} + +produces this file 'JH.sty': + + %% LaTeX2e file `JH.sty' + %% generated by the `filecontents' environment + %% from source `test' on 2015/10/12. + %% + \newcommand{\myname}{Jim Hef{}feron} + + +File: latex2e.info, Node: flushleft, Next: flushright, Prev: filecontents, Up: Environments + +8.12 'flushleft' +================ + +Synopsis: + + \begin{flushleft} + LINE1 \\ + LINE2 \\ + ... + \end{flushleft} + + An environment that creates a paragraph whose lines are flush to the +left-hand margin, and ragged right. If you have lines that are too long +then LaTeX will linebreak them in a way that avoids hyphenation and +stretching or shrinking interword spaces. To force a new line use a +double backslash, '\\'. For the declaration form see *note +\raggedright::. + + This creates a box of text that is at most 3 inches wide, with the +text flush left and ragged right. + + \noindent\begin{minipage}{3in} + \begin{flushleft} + A long sentence that will be broken by \LaTeX{} + at a convenient spot. \\ + And, a fresh line forced by the double backslash. + \end{flushleft} + \end{minipage} + +* Menu: + +* \raggedright:: Declaration form of the 'flushleft' environment. + + +File: latex2e.info, Node: \raggedright, Up: flushleft + +8.12.1 '\raggedright' +--------------------- + +Synopses: + + {\raggedright ... } + +or + + \begin{ENVIRONMENT} \raggedright + ... + \end{ENVIRONMENT} + + A declaration which causes lines to be flush to the left margin and +ragged right. It can be used inside an ENVIRONMENT such as 'quote' or +in a 'parbox'. For the environment form see *note flushleft::. + + Unlike the 'flushleft' environment, the '\raggedright' command does +not start a new paragraph; it only changes how LaTeX formats paragraph +units. To affect a paragraph unit's format, the scope of the +declaration must contain the blank line or '\end' command that ends the +paragraph unit. + + Here '\raggedright' in each second column keeps LaTeX from doing very +awkward typesetting to fit the text into the narrow column. Note that +'\raggedright' is inside the curly braces '{...}' to delimit its effect. + + \begin{tabular}{rp{2in}} + Team alpha &{\raggedright This team does all the real work.} \\ + Team beta &{\raggedright This team ensures that the water + cooler is never empty.} \\ + \end{tabular} + + +File: latex2e.info, Node: flushright, Next: itemize, Prev: flushleft, Up: Environments + +8.13 'flushright' +================= + + \begin{flushright} + LINE1 \\ + LINE2 \\ + ... + \end{flushright} + + An environment that creates a paragraph whose lines are flush to the +right-hand margin and ragged left. If you have lines that are too long +to fit the margins then LaTeX will linebreak them in a way that avoids +hyphenation and stretching or shrinking inter-word spaces. To force a +new line use a double backslash, '\\'. For the declaration form +see *note \raggedleft::. + + For an example related to this environment, see *note flushleft::, +where one just have mutatis mutandis to replace 'flushleft' by +'flushright'. + +* Menu: + +* \raggedleft:: Declaration form of the 'flushright' environment. + + +File: latex2e.info, Node: \raggedleft, Up: flushright + +8.13.1 '\raggedleft' +-------------------- + +Synopses: + + {\raggedleft ... } + +or + + \begin{ENVIRONMENT} \raggedleft + ... + \end{ENVIRONMENT} + + A declaration which causes lines to be flush to the right margin and +ragged left. It can be used inside an ENVIRONMENT such as 'quote' or in +a 'parbox'. For the environment form see *note flushright::. + + Unlike the 'flushright' environment, the '\raggedleft' command does +not start a new paragraph; it only changes how LaTeX formats paragraph +units. To affect a paragraph unit's format, the scope of the +declaration must contain the blank line or '\end' command that ends the +paragraph unit. + + For an example related to this environment, see *note \raggedright::, +where one just have mutatis mutandis to replace '\raggedright' by +'\raggedleft'. + + +File: latex2e.info, Node: itemize, Next: letter, Prev: flushright, Up: Environments + +8.14 'itemize' +============== + +Synopsis: + + \begin{itemize} + \item[OPTIONAL LABEL OF FIRST ITEM] TEXT OF FIRST ITEM + \item[OPTIONAL LABEL OF SECOND ITEM] TEXT OF SECOND ITEM + ... + \end{itemize} + + Produce an "unordered list", sometimes called a bullet list. There +must be at least one '\item' within the environment; having none causes +the LaTeX error 'Something's wrong--perhaps a missing \item'. + + This gives a two-item list. + + \begin{itemize} + \item Pencil and watercolor sketch by Cassandra + \item Rice portrait + \end{itemize} + +With the default locale--without loading e.g. 'babel' package with +another language than USenglish--as a top-level list each label would +come out as a bullet, *. The format of the labeling depends on the +nesting level; see below. + + Start list items with the '\item' command (*note \item::). If you +give '\item' an optional argument by following it with square brackets, +as in '\item[OPTIONAL LABEL]', then by default OPTIONAL LABEL will +appear in bold and be flush right, so it could extend into the left +margin. For labels that are flush left see the *note description:: +environment. Following the '\item' is the text of the item, which may +be empty or contain multiple paragraphs. + + Unordered lists can be nested within one another, up to four levels +deep. They can also be nested within other paragraph-making +environments, such as 'enumerate' (*note enumerate::). + + The 'itemize' environment uses the commands '\labelitemi' through +'\labelitemiv' to produce the default label (note the convention of +lowercase roman numerals at the end of the command names that signify +the nesting level). These are the default marks at each level. + + 1. * (bullet, from '\textbullet') + 2. -- (bold en-dash, from '\normalfont\bfseries\textendash') + 3. * (asterisk, from '\textasteriskcentered') + 4. . (vertically centered dot, rendered here as a period, from + '\textperiodcentered') + + Change the labels with '\renewcommand'. For instance, this makes the +first level use diamonds. + + \renewcommand{\labelitemi}{$\diamond$} + + The distance between the left margin of the enclosing environment and +the left margin of the 'itemize' list is determined by the parameters +'\leftmargini' through '\leftmarginvi'. (This also uses the convention +of using lowercase roman numerals a the end of the command name to +denote the nesting level.) The defaults are: '2.5em' in level 1 ('2em' +in two-column mode), '2.2em' in level 2, '1.87em' in level 3, and +'1.7em' in level 4, with smaller values for more deeply nested levels. + + For other major LaTeX labeled list environments, see *note +description:: and *note enumerate::. The 'itemize', 'enumerate' and +'description' environment use the same list layout parameters. For a +description, including the default values, and for information about +customizing list layout, see *note list::. The package 'enumitem' is +useful for customizing lists. + + This example greatly reduces the margin space for outermost itemized +lists. + + \setlength{\leftmargini}{1.25em} % default 2.5em + + Especially for lists with short items, it may be desirable to elide +space between items. Here is an example defining an 'itemize*' +environment with no extra spacing between items, or between paragraphs +within a single item ('\parskip' is not list-specific, *note \parindent +& \parskip::): + + \newenvironment{itemize*}% + {\begin{itemize}% + \setlength{\itemsep}{0pt}% + \setlength{\parsep}{0pt}}% + \setlength{\parskip}{0pt}}% + {\end{itemize}} + + +File: latex2e.info, Node: letter, Next: list, Prev: itemize, Up: Environments + +8.15 'letter' environment: writing letters +========================================== + +This environment is used for creating letters. *Note Letters::. + + +File: latex2e.info, Node: list, Next: math, Prev: letter, Up: Environments + +8.16 'list' +=========== + +Synopsis: + + \begin{list}{LABELING}{SPACING} + \item[OPTIONAL LABEL OF FIRST ITEM] TEXT OF FIRST ITEM + \item[OPTIONAL LABEL OF SECOND ITEM] TEXT OF SECOND ITEM + ... + \end{list} + + An environment for constructing lists. + + Note that this environment does not typically appear in the document +body. Most lists created by LaTeX authors are the ones that come +standard: the 'description', 'enumerate', and 'itemize' environments +(*note description::, *note enumerate::, and *note itemize::). + + Instead, the 'list' environment is most often used in macros. For +example, many standard LaTeX environments that do not immediately appear +to be lists are in fact constructed using 'list', including 'quotation', +'quote', and 'center' (*note quotation & quote::, *note center::). + + This uses the 'list' environment to define a new custom environment. + + \newcounter{namedlistcounter} % number the items + \newenvironment{named} + {\begin{list} + {Item~\Roman{namedlistcounter}.} % labeling + {\usecounter{namedlistcounter} % set counter + \setlength{\leftmargin}{3.5em}} % set spacing + } + {\end{list}} + + \begin{named} + \item Shows as ``Item~I.'' + \item[Special label.] Shows as ``Special label.'' + \item Shows as ``Item~II.'' + \end{named} + + The mandatory first argument LABELING specifies the default labeling +of list items. It can contain text and LaTeX commands, as above where +it contains both 'Item' and '\Roman{...}'. LaTeX forms the label by +putting the LABELING argument in a box of width '\labelwidth'. If the +label is wider than that, the additional material extends to the right. +When making an instance of a 'list' you can override the default +labeling by giving '\item' an optional argument by including square +braces and the text, as in the above '\item[Special label.]'; *note +\item::. + + The mandatory second argument SPACING has a list of commands. This +list can be empty. A command that can go in here is +'\usecounter{COUNTERNAME}' (*note \usecounter::). Use this to tell +LaTeX to number the items using the given counter. The counter will be +reset to zero each time LaTeX enters the environment, and the counter is +incremented by one each time LaTeX encounters an '\item' that does not +have an optional argument. + + Another command that can go in SPACING is '\makelabel', which +constructs the label box. By default it puts the contents flush right. +Its only argument is the label, which it typesets in LR mode (*note +Modes::). One example of changing its definition is that to the above +'named' example, before the definition of the environment add +'\newcommand{\namedmakelabel}[1]{\textsc{#1}}', and between the +'\setlength' command and the parenthesis that closes the SPACING +argument also add '\let\makelabel\namedmakelabel'. Then the labels will +be typeset in small caps. Similarly, changing the second code line to +'\let\makelabel\fbox' puts the labels inside a framed box. Another +example of the '\makelabel' command is below, in the definition of the +'redlabel' environment. + + Also often in SPACING are commands to redefine the spacing for the +list. Below are the spacing parameters with their default values. +(Default values for derived environments such as 'itemize' can be +different than the values shown here.) See also the figure that follows +the list. Each is a length (*note Lengths::). The vertical spaces are +normally rubber lengths, with 'plus' and 'minus' components, to give TeX +flexibility in setting the page. Change each with a command such as +'\setlength{itemsep}{2pt plus1pt minus1pt}'. For some effects these +lengths should be zero or negative. + +'\itemindent' + Extra horizontal space indentation, beyond 'leftmargin', of the + first line each item. Its default value is '0pt'. + +'\itemsep' + Vertical space between items, beyond the '\parsep'. The defaults + for the first three levels in LaTeX's 'article', 'book', and + 'report' classes at 10 point size are: '4pt plus2pt minus1pt', + '\parsep' (that is, '2pt plus1pt minus1pt'), and '\topsep' (that + is, '2pt plus1pt minus1pt'). The defaults at 11 point are: '4.5pt + plus2pt minus1pt', '\parsep' (that is, '2pt plus1pt minus1pt'), and + '\topsep' (that is, '2pt plus1pt minus1pt'). The defaults at 12 + point are: '5pt plus2.5pt minus1pt', '\parsep' (that is, '2.5pt + plus1pt minus1pt'), and '\topsep' (that is, '2.5pt plus1pt + minus1pt'). + +'\labelsep' + Horizontal space between the label and text of an item. The + default for LaTeX's 'article', 'book', and 'report' classes is + '0.5em'. + +'\labelwidth' + Horizontal width. The box containing the label is nominally this + wide. If '\makelabel' returns text that is wider than this then + the first line of the item will be indented to make room for this + extra material. If '\makelabel' returns text of width less than or + equal to '\labelwidth' then LaTeX's default is that the label is + typeset flush right in a box of this width. + + The left edge of the label box is + '\leftmargin'+'\itemindent'-'\labelsep'-'\labelwidth' from the left + margin of the enclosing environment. + + The default for LaTeX's 'article', 'book', and 'report' classes at + the top level is '\leftmargini'-'\labelsep', (which is '2em' in one + column mode and '1.5em' in two column mode). At the second level + it is '\leftmarginii'-'\labelsep', and at the third level it is + '\leftmarginiii'-'\labelsep'. These definitions make the label's + left edge coincide with the left margin of the enclosing + environment. + +'\leftmargin' + Horizontal space between the left margin of the enclosing + environment (or the left margin of the page if this is a top-level + list), and the left margin of this list. It must be non-negative. + + In the standard LaTeX document classes, a top-level list has this + set to the value of '\leftmargini', while a list that is nested + inside a top-level list has this margin set to '\leftmarginii'. + More deeply nested lists get the values of '\leftmarginiii' through + '\leftmarginvi'. (Nesting greater than level five generates the + error message 'Too deeply nested'.) + + The defaults for the first three levels in LaTeX's 'article', + 'book', and 'report' classes are: '\leftmargini' is '2.5em' (in two + column mode, '2em'), '\leftmarginii' is '2.2em', and + '\leftmarginiii' is '1.87em'. + +'\listparindent' + Horizontal space of additional line indentation, beyond + '\leftmargin', for second and subsequent paragraphs within a list + item. A negative value makes this an "outdent". Its default value + is '0pt'. + +'\parsep' + Vertical space between paragraphs within an item. The defaults for + the first three levels in LaTeX's 'article', 'book', and 'report' + classes at 10 point size are: '4pt plus2pt minus1pt', '2pt plus1pt + minus1pt', and '0pt'. The defaults at 11 point size are: '4.5pt + plus2pt minus1pt', '2pt plus1pt minus1pt', and '0pt'. The defaults + at 12 point size are: '5pt plus2.5pt minus1pt', '2.5pt plus1pt + minus1pt', and '0pt'. + +'\partopsep' + Vertical space added, beyond '\topsep'+'\parskip', to the top and + bottom of the entire environment if the list instance is preceded + by a blank line. (A blank line in the LaTeX source before the list + changes spacing at both the top and bottom of the list; whether the + line following the list is blank does not matter.) + + The defaults for the first three levels in LaTeX's 'article', + 'book', and 'report' classes at 10 point size are: '2pt plus1 + minus1pt', '2pt plus1pt minus1pt', and '1pt plus0pt minus1pt'. The + defaults at 11 point are: '3pt plus1pt minus1pt', '3pt plus1pt + minus1pt', and '1pt plus0pt minus1pt'). The defaults at 12 point + are: '3pt plus2pt minus3pt', '3pt plus2pt minus2pt', and '1pt + plus0pt minus1pt'. + +'\rightmargin' + Horizontal space between the right margin of the list and the right + margin of the enclosing environment. Its default value is '0pt'. + It must be non-negative. + +'\topsep' + Vertical space added to both the top and bottom of the list, in + addition to '\parskip' (*note \parindent & \parskip::). The + defaults for the first three levels in LaTeX's 'article', 'book', + and 'report' classes at 10 point size are: '8pt plus2pt minus4pt', + '4pt plus2pt minus1pt', and '2pt plus1pt minus1pt'. The defaults + at 11 point are: '9pt plus3pt minus5pt', '4.5pt plus2pt minus1pt', + and '2pt plus1pt minus1pt'. The defaults at 12 point are: '10pt + plus4pt minus6pt', '5pt plus2.5pt minus1pt', and '2.5pt plus1pt + minus1pt'. + + This shows the horizontal and vertical distances. + +[image src="latex2e-figures/list.png" text="....................................................................... +..............Surrounding environment, preceding material.............. - + | v0 + |---h0--|-h1-| | + ..Label.. .....First item..................................... - + ............................................................ + ............................................................ - + | v1 + ........First item, second paragraph .................. - + |-h2-| | v2 + | + ..Label.. .....Second item.................................... - + ............................................................ + ............................................................ - +|-h3--|-h4-| |-h5-| |v3 + | +..............Surrounding environment, following material.............. - +......................................................................."] + + + The lengths shown are listed below. The key relationship is that the +right edge of the bracket for H1 equals the right edge of the bracket +for H4, so that the left edge of the label box is at H3+H4-(H0+H1). + +V0 + '\topsep' + '\parskip' if the list environment does not start a new + paragraph, and '\topsep'+'\parskip'+'\partopsep' if it does + +V1 + '\parsep' + +V2 + '\itemsep'+'\parsep' + +V3 + Same as V0. (This space is affected by whether a blank line + appears in the source above the environment; whether a blank line + appears in the source below the environment does not matter.) + +H0 + '\labelwidth' + +H1 + '\labelsep' + +H2 + '\listparindent' + +H3 + '\leftmargin' + +H4 + '\itemindent' + +H5 + '\rightmargin' + + The list's left and right margins, shown above as H3 and H5, are with +respect to the ones provided by the surrounding environment, or with +respect to the page margins for a top-level list. The line width used +for typesetting the list items is '\linewidth' (*note Page layout +parameters::). For instance, set the list's left margin to be one +quarter of the distance between the left and right margins of the +enclosing environment with '\setlength{\leftmargin}{0.25\linewidth}'. + + Page breaking in a list structure is controlled by the three +parameters below. For each, the LaTeX default is '-\@lowpenalty', that +is, '-51'. Because it is negative, it somewhat encourages a page break +at each spot. Change it with, e.g., '\@beginparpenalty=9999'; a value +of 10000 prohibits a page break. + +'\@beginparpenalty' + The page breaking penalty for breaking before the list (default + '-51'). + +'\@itempenalty' + The page breaking penalty for breaking before a list item (default + '-51'). + +'\@endparpenalty' + The page breaking penalty for breaking after a list (default + '-51'). + + The package 'enumitem' is useful for customizing lists. + + This example has the labels in red. They are numbered, and the left +edge of the label lines up with the left edge of the item text. *Note +\usecounter::. + + \usepackage{color} + \newcounter{cnt} + \newcommand{\makeredlabel}[1]{\textcolor{red}{#1.}} + \newenvironment{redlabel} + {\begin{list} + {\arabic{cnt}} + {\usecounter{cnt} + \setlength{\labelwidth}{0em} + \setlength{\labelsep}{0.5em} + \setlength{\leftmargin}{1.5em} + \setlength{\itemindent}{0.5em} % equals \labelwidth+\labelsep + \let\makelabel=\makeredlabel + } + } + {\end{list}} + +* Menu: + +* \item:: An entry in a list. +* trivlist:: A restricted form of 'list'. + + +File: latex2e.info, Node: \item, Next: trivlist, Up: list + +8.16.1 '\item': An entry in a list +---------------------------------- + +Synopsis: + + \item text of item + +or + + \item[OPTIONAL-LABEL] text of item + + An entry in a list. The entries are prefixed by a label, whose +default depends on the list type. + + Because the optional label is surrounded by square brackets '[...]', +if you have an item whose text starts with [, you have to hide the +bracket inside curly braces, as in: '\item {[} is an open square +bracket'; otherwise, LaTeX will think it marks the start of an optional +label. + + Similarly, if the item does have the optional label and you need a +close square bracket inside that label, you must hide it in the same +way: '\item[Close square bracket, {]}]'. *Note LaTeX command syntax::. + + In this example the enumerate list has two items that use the default +label and one that uses the optional label. + + \begin{enumerate} + \item Moe + \item[sometimes] Shemp + \item Larry + \end{enumerate} + + The first item is labelled '1.', the second item is labelled +'sometimes', and the third item is labelled '2.'. Because of the +optional label in the second item, the third item is not labelled '3.'. + + +File: latex2e.info, Node: trivlist, Prev: \item, Up: list + +8.16.2 'trivlist': A restricted form of 'list' +---------------------------------------------- + +Synopsis: + + \begin{trivlist} + ... + \end{trivlist} + + A restricted version of the list environment, in which margins are +not indented and an '\item' without an optional argument produces no +text. It is most often used in macros, to define an environment where +the '\item' command is part of the environment's definition. For +instance, the 'center' environment is defined essentially like this: + + \newenvironment{center} + {\begin{trivlist}\centering\item\relax} + {\end{trivlist}} + + Using 'trivlist' in this way allows the macro to inherit some common +code: combining vertical space of two adjacent environments; detecting +whether the text following the environment should be considered a new +paragraph or a continuation of the previous one; adjusting the left and +right margins for possible nested list environments. + + Specifically, 'trivlist' uses the current values of the list +parameters (*note list::), except that '\parsep' is set to the value of +'\parskip', and '\leftmargin', '\labelwidth', and '\itemindent' are set +to zero. + + This example outputs the items as two paragraphs, except that (by +default) they have no paragraph indent and are vertically separated. + + \begin{trivlist} + \item The \textit{Surprise} is not old; no one would call her old. + \item She has a bluff bow, lovely lines. + \end{trivlist} + + +File: latex2e.info, Node: math, Next: minipage, Prev: list, Up: Environments + +8.17 'math' +=========== + +Synopsis: + + \begin{math} + MATH + \end{math} + + The 'math' environment inserts given MATH material within the running +text. '\(...\)' and '$...$' are synonyms. *Note Math formulas::. + + +File: latex2e.info, Node: minipage, Next: picture, Prev: math, Up: Environments + +8.18 'minipage' +=============== + +Synopses: + + \begin{minipage}{WIDTH} + CONTENTS + \end{minipage} + +or + + \begin{minipage}[POSITION][HEIGHT][INNER-POS]{WIDTH} + CONTENTS + \end{minipage} + + Put CONTENTS into a box that is WIDTH wide. This is like a small +version of a page; it can contain its own footnotes, itemized lists, +etc. (There are some restrictions, including that it cannot have +floats.) This box will not be broken across pages. So 'minipage' is +similar to '\parbox' (*note \parbox::) but here you can have paragraphs. + + This example will be 3 inches wide, and has two paragraphs. + + \begin{minipage}{3in} + Stephen Kleene was a founder of the Theory of Computation. + + He was a student of Church, wrote three influential texts, + was President of the Association for Symbolic Logic, + and won the National Medal of Science. + \end{minipage} + +See below for a discussion of the paragraph indent inside a 'minipage'. + + The required argument WIDTH is a rigid length (*note Lengths::). It +gives the width of the box into which CONTENTS are typeset. + + There are three optional arguments, POSITION, HEIGHT, and INNER-POS. +You need not include all three. For example, get the default POSITION +and set the HEIGHT with '\begin{minipage}[c][2.54cm]{\columnwidth} +CONTENTS \end{minipage}'. (Get the natural height with an empty +argument, '[]'.) + + The optional argument POSITION governs how the 'minipage' vertically +aligns with the surrounding material. + +'c' + (synonym 'm') Default. Positions the 'minipage' so its vertical + center lines up with the center of the adjacent text line. + +'t' + Align the baseline of the top line in the 'minipage' with the + baseline of the surrounding text (plain TeX's '\vtop'). + +'b' + Align the baseline of the bottom line in the 'minipage' with the + baseline of the surrounding text (plain TeX's '\vbox'). + + To see the effects of these, contrast running this + + ---\begin{minipage}[c]{0.25in} + first\\ second\\ third + \end{minipage} + +with the results of changing 'c' to 'b' or 't'. + + The optional argument HEIGHT is a rigid length (*note Lengths::). It +sets the height of the 'minipage'. You can enter any value larger than, +or equal to, or smaller than the 'minipage''s natural height and LaTeX +will not give an error or warning. You can also set it to a height of +zero or a negative value. + + The final optional argument INNER-POS controls the placement of +CONTENTS inside the box. These are the possible values are (the default +is the value of POSITION). + +'t' + Place CONTENTS at the top of the box. + +'c' + Place it in the vertical center. + +'b' + Place it at the box bottom. + +'s' + Stretch CONTENTS out vertically; it must contain vertically + stretchable space. + + The INNER-POS argument makes sense when the HEIGHT option is set to a +value larger than the 'minipage''s natural height. To see the effect of +the options, run this example with the various choices in place of 'b'. + + Text before + \begin{center} + ---\begin{minipage}[c][3in][b]{0.25\textwidth} + first\\ second\\ third + \end{minipage} + \end{center} + Text after + + By default paragraphs are not indented in a 'minipage'. Change that +with a command such as '\setlength{\parindent}{1pc}' at the start of +CONTENTS. + + Footnotes in a 'minipage' environment are handled in a way that is +particularly useful for putting footnotes in figures or tables. A +'\footnote' or '\footnotetext' command puts the footnote at the bottom +of the minipage instead of at the bottom of the page, and it uses the +'\mpfootnote' counter instead of the ordinary 'footnote' counter (*note +Counters::). + + This puts the footnote at the bottom of the table, not the bottom of +the page. + + \begin{center} % center the minipage on the line + \begin{minipage}{2.5in} + \begin{center} % center the table inside the minipage + \begin{tabular}{ll} + \textsc{Monarch} &\textsc{Reign} \\ \hline + Elizabeth II &63 years\footnote{to date} \\ + Victoria &63 years \\ + George III &59 years + \end{tabular} + \end{center} + \end{minipage} + \end{center} + + If you nest minipages then there is an oddness when using footnotes. +Footnotes appear at the bottom of the text ended by the next +'\end{minipage}' which may not be their logical place. + + This puts a table containing data side by side with a map graphic. +They are vertically centered. + + % siunitx to have the S column specifier, + % which aligns numbers on their decimal point. + \usepackage{siunitx} + \newcommand*{\vcenteredhbox}[1]{\begin{tabular}{@{}c@{}}#1\end{tabular}} + ... + \begin{center} + \vcenteredhbox{\includegraphics[width=0.3\textwidth]{nyc.png}} + \hspace{0.1\textwidth} + \begin{minipage}{0.5\textwidth} + \begin{tabular}{r|S} + % \multicolumn to remove vertical bar between column headers + \multicolumn{1}{r}{Borough} & + % braces to prevent siunitx from misinterpreting the + % period as a decimal separator + {Pop. (million)} \\ \hline + The Bronx &1.5 \\ + Brooklyn &2.6 \\ + Manhattan &1.6 \\ + Queens &2.3 \\ + Staten Island &0.5 + \end{tabular} + \end{minipage} + \end{center} + + +File: latex2e.info, Node: picture, Next: quotation & quote, Prev: minipage, Up: Environments + +8.19 'picture' +============== + +Synopses: + \begin{picture}(WIDTH,HEIGHT) + PICTURE COMMAND + \end{picture} + +or + + \begin{picture}(WIDTH,HEIGHT)(XOFFSET,YOFFSET) + PICTURE COMMAND + \end{picture} + +Where there may be any number of PICTURE COMMAND's. + + An environment to create simple pictures containing lines, arrows, +boxes, circles, and text. This environment is not obsolete, but new +documents typically use much more powerful graphics creation systems, +such as TikZ, PSTricks, MetaPost, or Asymptote. None of these are +covered in this document; see CTAN. + + To start, here's an example showing the parallelogram law for adding +vectors. + + \setlength{\unitlength}{1cm} + \begin{picture}(6,6) % picture box will be 6cm wide by 6cm tall + \put(0,0){\vector(2,1){4}} % for every 2 over this vector goes 1 up + \put(2,1){\makebox(0,0)[l]{\ first leg}} + \put(4,2){\vector(1,2){2}} + \put(5,4){\makebox(0,0)[l]{\ second leg}} + \put(0,0){\vector(1,1){6}} + \put(3,3){\makebox(0,0)[r]{sum\ }} + \end{picture} + + The 'picture' environment has one required argument, a pair of +positive real numbers (WIDTH,HEIGHT). Multiply these by the value +'\unitlength' to get the nominal size of the output, i.e. the space that +LaTeX reserves on the output page. This nominal size need not be how +large the picture really is; LaTeX will draw things from the picture +outside the picture's box. + + This environment also has an optional argument (XOFFSET,YOFFSET). It +is used to shift the origin. Unlike most optional arguments, this one +is not contained in square brackets. As with the required argument, it +consists of a pair of two real numbers, but these may also be negative +or null. Multiply these by '\unitlength' to get the coordinates of the +point at the lower-left corner of the picture. + + For example, if '\unitlength' has been set to '1mm', the command + + \begin{picture}(100,200)(10,20) + +produces a box of width 100 millimeters and height 200 millimeters. The +picture's origin is the point (10mm,20mm) and so the lower-left corner +is there, and the upper-right corner is at (110mm,220mm). When you +first draw a picture you typically omit the optional argument, leaving +the origin at the lower-left corner. If you then want to modify your +picture by shifting everything, you can just add the appropriate +optional argument. + + Each PICTURE COMMAND tells LaTeX where to put something by providing +its position. A "position" is a pair such as '(2.4,-5)' giving the x- +and y-coordinates. A "coordinate" is a not a length, it is a real +number (it may have a decimal point or a minus sign). It specifies a +length in multiples of the unit length '\unitlength', so if +'\unitlength' has been set to '1cm', then the coordinate '2.54' +specifies a length of 2.54 centimeters. + + LaTeX's default for '\unitlength' is '1pt'. It is a rigid length +(*note Lengths::). Change it with the '\setlength' command (*note +\setlength::). Make this change only outside of a 'picture' +environment. + + The 'picture' environment supports using standard arithmetic +expressions as well as numbers. + + Coordinates are given with respect to an origin, which is by default +at the lower-left corner of the picture. Note that when a position +appears as an argument, as with '\put(1,2){...}', it is not enclosed in +braces since the parentheses serve to delimit the argument. Also, +unlike in some computer graphics systems, larger y-coordinates are +further up the page, for example, y = 1 is _above_ y = 0. + + There are four ways to put things in a picture: '\put', '\multiput', +'\qbezier', and '\graphpaper'. The most often used is '\put'. This + + \put(11.3,-0.3){...} + +places the object with its reference point at coordinates (11.3,-0.3). +The reference points for various objects will be described below. The +'\put' command creates an "LR box" (*note Modes::). Anything that can +go in an '\mbox' (*note \mbox & \makebox::) can go in the text argument +of the '\put' command. The reference point will be the lower left +corner of the box. In this picture + + \setlength{\unitlength}{1cm} + ...\begin{picture}(1,1) + \put(0,0){\line(1,0){1}} + \put(0,0){\line(1,1){1}} + \end{picture} + +the three dots are just slightly left of the point of the angle formed +by the two lines. (Also, '\line(1,1){1}' does not call for a line of +length one; rather the line has a change in the x coordinate of 1.) + + The '\multiput', 'qbezier', and 'graphpaper' commands are described +below. + + You can also use this environment to place arbitrary material at an +exact location. For example: + + \usepackage{color,graphicx} % in preamble + ... + \begin{center} + \setlength{\unitlength}{\textwidth} + \begin{picture}(1,1) % leave space, \textwidth wide and tall + \put(0,0){\includegraphics[width=\textwidth]{desertedisland.jpg}} + \put(0.25,0.35){\textcolor{red}{X Treasure here}} + \end{picture} + \end{center} + +The red X will be precisely a quarter of the '\textwidth' from the left +margin, and '0.35\textwidth' up from the bottom of the picture. Another +example of this usage is to put similar code in the page header to get +repeat material on each of a document's pages. + +* Menu: + +* \put:: Place an object at a specified place. +* \multiput:: Draw multiple instances of an object. +* \qbezier:: Draw a quadratic Bézier curve. +* \graphpaper:: Draw graph paper. +* \line:: Draw a straight line. +* \linethickness:: Set thickness of horizontal and vertical lines. +* \thinlines:: The default line thickness. +* \thicklines:: A heavier line thickness. +* \circle:: Draw a circle. +* \oval:: Draw an oval. +* \shortstack:: Make a stack of objects. +* \vector:: Draw a line with an arrow. +* \makebox (picture):: Draw a box of the specified size. +* \framebox (picture):: Draw a box with a frame around it. +* \frame:: Draw a frame around an object. +* \dashbox:: Draw a dashed box. + + +File: latex2e.info, Node: \put, Next: \multiput, Up: picture + +8.19.1 '\put' +------------- + +Synopsis: + + \put(XCOORD,YCOORD){CONTENT} + + Place CONTENT at the coordinate (XCOORD,YCOORD). See the discussion +of coordinates and '\unitlength' in *note picture::. The CONTENT is +processed in LR mode (*note Modes::) so it cannot contain line breaks. + + This includes the text into the 'picture'. + + \put(4.5,2.5){Apply the \textit{unpoke} move} + + The reference point, the location (4.5,2.5), is the lower left of the +text, at the bottom left of the 'A'. + + +File: latex2e.info, Node: \multiput, Next: \qbezier, Prev: \put, Up: picture + +8.19.2 '\multiput' +------------------ + +Synopsis: + + \multiput(X,Y)(DELTA_X,DELTA_Y){NUM-COPIES}{OBJ} + + Copy OBJ a total of NUM-COPIES times, with an increment of +DELTA_X,DELTA_Y. The OBJ first appears at position (x,y), then at +(x+\delta_x,y+\delta_y), and so on. + + This draws a simple grid with every fifth line in bold (see also +*note \graphpaper::). + + \begin{picture}(10,10) + \linethickness{0.05mm} + \multiput(0,0)(1,0){10}{\line(0,1){10}} + \multiput(0,0)(0,1){10}{\line(1,0){10}} + \linethickness{0.5mm} + \multiput(0,0)(5,0){3}{\line(0,1){10}} + \multiput(0,0)(0,5){3}{\line(1,0){10}} + \end{picture} + + +File: latex2e.info, Node: \qbezier, Next: \graphpaper, Prev: \multiput, Up: picture + +8.19.3 '\qbezier' +----------------- + +Synopsis: + + \qbezier(X1,Y1)(X2,Y2)(X3,Y3) + \qbezier[NUM](X1,Y1)(X2,Y2)(X3,Y3) + + Draw a quadratic Bezier curve whose control points are given by the +three required arguments '(X1,Y1)', '(X2,Y2)', and '(X3,Y3)'. That is, +the curve runs from (X1,Y1) to (X3,Y3), is quadratic, and is such that +the tangent line at (X1,Y1) passes through (X2,Y2), as does the tangent +line at (X3,Y3). + + This draws a curve from the coordinate (1,1) to (1,0). + + \qbezier(1,1)(1.25,0.75)(1,0) + +The curve's tangent line at (1,1) contains (1.25,0.75), as does the +curve's tangent line at (1,0). + + The optional argument NUM gives the number of calculated intermediate +points. The default is to draw a smooth curve whose maximum number of +points is '\qbeziermax' (change this value with '\renewcommand'). + + This draws a rectangle with a wavy top, using '\qbezier' for that +curve. + + \begin{picture}(8,4) + \put(0,0){\vector(1,0){8}} % x axis + \put(0,0){\vector(0,1){4}} % y axis + \put(2,0){\line(0,1){3}} % left side + \put(4,0){\line(0,1){3.5}} % right side + \qbezier(2,3)(2.5,2.9)(3,3.25) + \qbezier(3,3.25)(3.5,3.6)(4,3.5) + \thicklines % below here, lines are twice as thick + \put(2,3){\line(4,1){2}} + \put(4.5,2.5){\framebox{Trapezoidal Rule}} + \end{picture} + + +File: latex2e.info, Node: \graphpaper, Next: \line, Prev: \qbezier, Up: picture + +8.19.4 '\graphpaper' +-------------------- + +Synopsis: + + \graphpaper(X_INIT,Y_INIT)(X_DIMEN,Y_DIMEN) + \graphpaper[SPACING](X_INIT,Y_INIT)(X_DIMEN,Y_DIMEN) + + Draw a coordinate grid. Requires the 'graphpap' package. The grid's +origin is '(X_INIT,Y_INIT)'. Grid lines come every SPACING units (the +default is 10). The grid extends X_DIMEN units to the right and Y_DIMEN +units up. All arguments must be positive integers. + + This make a grid with seven vertical lines and eleven horizontal +lines. + + \usepackage{graphpap} % in preamble + ... + \begin{picture}(6,20) % in document body + \graphpaper[2](0,0)(12,20) + \end{picture} + +The lines are numbered every ten units. + + +File: latex2e.info, Node: \line, Next: \linethickness, Prev: \graphpaper, Up: picture + +8.19.5 '\line' +-------------- + +Synopsis: + + \line(X_RUN,Y_RISE){TRAVEL} + + Draw a line. It slopes such that it vertically rises Y_RISE for +every horizontal X_RUN. The TRAVEL is the total horizontal change--it +is not the length of the vector, it is the change in x. In the special +case of vertical lines, where (X_RUN,Y_RISE)=(0,1), the TRAVEL gives the +change in y. + + This draws a line starting at coordinates (1,3). + + \put(1,3){\line(2,5){4}} + +For every over 2, this line will go up 5. Because TRAVEL specifies that +this goes over 4, it must go up 10. Thus its endpoint is +(1,3)+(4,10)=(5,13). In particular, note that TRAVEL=4 is not the +length of the line, it is the change in x. + + The arguments X_RUN and Y_RISE are integers that can be positive, +negative, or zero. (If both are 0 then LaTeX treats the second as 1.) +With '\put(X_INIT,Y_INIT){\line(X_RUN,Y_RISE){TRAVEL}}', if X_RUN is +negative then the line's ending point has a first coordinate that is +less than X_INIT. If Y_RISE is negative then the line's ending point +has a second coordinate that is less than Y_INIT. + + If TRAVEL is negative then you get 'LaTeX Error: Bad \line or \vector +argument.' + + Standard LaTeX can only draw lines with a limited range of slopes +because these lines are made by putting together line segments from +pre-made fonts. The two numbers X_RUN and Y_RISE must have integer +values from -6 through 6. Also, they must be relatively prime, so that +(X_RUN,Y_RISE) can be (2,1) but not (4,2) (if you choose the latter then +instead of lines you get sequences of arrowheads; the solution is to +switch to the former). To get lines of arbitrary slope and plenty of +other shapes in a system like 'picture', see the package 'pict2e' +(). Another solution is to use a +full-featured graphics system such as TikZ, PSTricks, MetaPost, or +Asymptote. + + +File: latex2e.info, Node: \linethickness, Next: \thinlines, Prev: \line, Up: picture + +8.19.6 '\linethickness' +----------------------- + +Synopsis: + + \linethickness{DIM} + + Declares the thickness of subsequent horizontal and vertical lines in +a picture to be DIM, which must be a positive length (*note Lengths::). +It differs from '\thinlines' and '\thicklines' in that it does not +affect the thickness of slanted lines, circles, or ovals (*note +\oval::). + + +File: latex2e.info, Node: \thinlines, Next: \thicklines, Prev: \linethickness, Up: picture + +8.19.7 '\thinlines' +------------------- + +Declaration to set the thickness of subsequent lines, circles, and ovals +in a picture environment to be 0.4pt. This is the default thickness, so +this command is unnecessary unless the thickness has been changed with +either *note \linethickness:: or *note \thicklines::. + + +File: latex2e.info, Node: \thicklines, Next: \circle, Prev: \thinlines, Up: picture + +8.19.8 '\thicklines' +-------------------- + +Declaration to set the thickness of subsequent lines, circles, and ovals +in a picture environment to be 0.8pt. See also *note \linethickness:: +and *note \thinlines::. This command is illustrated in the Trapezoidal +Rule example of *note \qbezier::. + + +File: latex2e.info, Node: \circle, Next: \oval, Prev: \thicklines, Up: picture + +8.19.9 '\circle' +---------------- + +Synopsis: + + \circle{DIAMETER} + \circle*{DIAMETER} + + Produces a circle with a diameter as close as possible to the +specified one. The '*' form produces a filled-in circle. + + This draws a circle of radius 6, centered at '(5,7)'. + + \put(5,7){\circle{6}} + + The available radii for '\circle' are, in points, the even numbers +from 2 to 20, inclusive. For '\circle*' they are all the integers from +1 to 15. + + +File: latex2e.info, Node: \oval, Next: \shortstack, Prev: \circle, Up: picture + +8.19.10 '\oval' +--------------- + +Synopsis: + + \oval(WIDTH,HEIGHT) + \oval(WIDTH,HEIGHT)[PORTION] + + Produce a rectangle with rounded corners, hereinafter referred to as +an "oval". The optional argument PORTION allows you to produce only +half or a quarter of the oval. For half an oval take PORTION to be one +of these. + +'t' + top half +'b' + bottom half +'r' + right half +'l' + left half + + Produce only one quarter of the oval by setting PORTION to 'tr', +'br', 'bl', or 'tl'. + + This draws the top half of an oval that is 3 wide and 7 tall. + + \put(5,7){\oval(3,7)[t]} + +The (5,7) is the center of the entire oval, not just the center of the +top half. + + These shapes are not ellipses. They are rectangles whose corners are +made with quarter circles. These circles have a maximum radius of 20pt +(*note \circle:: for the sizes). Thus large ovals are just frames with +a small amount of corner rounding. + + +File: latex2e.info, Node: \shortstack, Next: \vector, Prev: \oval, Up: picture + +8.19.11 '\shortstack' +--------------------- + +Synopsis: + + \shortstack[POSITION]{LINE 1 \\ ... } + + Produce a vertical stack of objects. + + This labels the y axis by writing the word 'y' above the word 'axis'. + + \setlength{\unitlength}{1cm} + \begin{picture}(5,2.5)(-0.75,0) + \put(0,0){\vector(1,0){4}} % x axis + \put(0,0){\vector(0,1){2}} % y + \put(-0.2,2){\makebox(0,0)[r]{\shortstack[r]{$y$\\ axis}}} + \end{picture} + +For a short stack, the reference point is the lower left of the stack. +In the above example the '\makebox' (*note \mbox & \makebox::) puts the +stack flush right in a zero width box so in total the short stack sits +slightly to the left of the y axis. + + The valid positions are: + +'r' + Make objects flush right +'l' + Make objects flush left +'c' + Center objects (default) + + Separate objects into lines with '\\'. These stacks are short in +that, unlike in a 'tabular' or 'array' environment, here the rows are +not spaced out to be of even baseline skips. Thus, in +'\shortstack{X\\o\\o\\X}' the first and last rows are taller than the +middle two, and therefore the baseline skip between the two middle rows +is smaller than that between the third and last row. You can adjust row +heights and depths either by putting in the usual interline spacing with +'\shortstack{X\\ \strut o\\o\\X}' (*note \strut::), or explicitly, via +an zero-width box '\shortstack{X \\ \rule{0pt}{12pt} o\\o\\X}' or by +using '\\''s optional argument '\shortstack{X\\[2pt] o\\o\\X}'. + + The '\shortstack' command is also available outside the 'picture' +environment. + + +File: latex2e.info, Node: \vector, Next: \makebox (picture), Prev: \shortstack, Up: picture + +8.19.12 '\vector' +----------------- + +Synopsis: + + \vector(X_RUN,Y_RISE){TRAVEL} + + Draw a line ending in an arrow. The slope of that line is: it +vertically rises Y_RISE for every horizontal X_RUN. The TRAVEL is the +total horizontal change--it is not the length of the vector, it is the +change in x. In the special case of vertical vectors, if +(X_RUN,Y_RISE)=(0,1), then TRAVEL gives the change in y. + + For an example see *note picture::. + + For elaboration on X_RUN and Y_RISE see *note \line::. As there, the +values of X_RUN and Y_RISE are limited. For '\vector' you must chooses +integers between -4 and 4, inclusive. Also, the two you choose must be +relatively prime. Thus, '\vector(2,1){4}' is acceptable but +'\vector(4,2){4}' is not (if you use the latter then you get a sequence +of arrowheads). + + +File: latex2e.info, Node: \makebox (picture), Next: \framebox (picture), Prev: \vector, Up: picture + +8.19.13 '\makebox' (picture) +---------------------------- + +Synopsis: + + \makebox(REC-WIDTH,REC-HEIGHT){TEXT} + \makebox(REC-WIDTH,REC-HEIGHT)[POSITION]{TEXT} + + Make a box to hold TEXT. This command fits with the 'picture' +environment, although you can use it outside of there, because REC-WIDTH +and REC-HEIGHT are numbers specifying distances in terms of the +'\unitlength' (*note picture::). This command is similar to the normal +'\makebox' command (*note \mbox & \makebox::) except here that you must +specify the width and height. This command is fragile (*note +\protect::). + + This makes a box of length 3.5 times '\unitlength' and height 4 times +'\unitlength'. + + \put(1,2){\makebox(3.5,4){...}} + + The optional argument 'POSITION' specifies where in the box the TEXT +appears. The default is to center it, both horizontally and vertically. +To place it somewhere else, use a string with one or two of these +letters. + +'t' + Puts TEXT the top of the box. + +'b' + Put TEXT at the bottom. + +'l' + Put TEXT on the left. + +'r' + Put TEXT on the right. + + +File: latex2e.info, Node: \framebox (picture), Next: \frame, Prev: \makebox (picture), Up: picture + +8.19.14 '\framebox' (picture) +----------------------------- + +Synopsis: + + \framebox(REC-WIDTH,REC-HEIGHT){TEXT} + \framebox(REC-WIDTH,REC-HEIGHT)[POSITION]{TEXT} + + This is the same as *note \makebox (picture):: except that it puts a +frame around the outside of the box that it creates. The reference +point is the bottom left corner of the frame. This command fits with +the 'picture' environment, although you can use it outside of there, +because lengths are numbers specifying the distance in terms of the +'\unitlength' (*note picture::). This command is fragile (*note +\protect::). + + This example creates a frame 2.5 inches by 3 inches and puts the text +in the center. + + \setlength{\unitlength}{1in} + \framebox(2.5,3){test text} + + The required arguments are that the rectangle has overall width +RECT-WIDTH units and height RECT-HEIGHT units. + + The optional argument POSITION specifies the position of TEXT; see +*note \makebox (picture):: for the values that it can take. + + The rule has thickness '\fboxrule' and there is a blank space +'\fboxsep' between the frame and the contents of the box. + + For this command, you must specify the WIDTH and HEIGHT. If you want +to just put a frame around some contents whose dimension is determined +in some other way then either use '\fbox' (*note \fbox & \framebox::) or +'\frame' (*note \frame::). + + +File: latex2e.info, Node: \frame, Next: \dashbox, Prev: \framebox (picture), Up: picture + +8.19.15 '\frame' +---------------- + +Synopsis: + + \frame{CONTENTS} + + Puts a rectangular frame around CONTENTS. The reference point is the +bottom left corner of the frame. In contrast to '\framebox' (*note +\framebox (picture)::), this command puts no extra space is put between +the frame and the object. It is fragile (*note \protect::). + + +File: latex2e.info, Node: \dashbox, Prev: \frame, Up: picture + +8.19.16 '\dashbox' +------------------ + +Synopsis: + + \dashbox{DASH-LEN}(RECT-WIDTH,RECT-HEIGHT){TEXT} + \dashbox{DASH-LEN}(RECT-WIDTH,RECT-HEIGHT)[POSITION]{TEXT} + + Create a dashed rectangle around TEXT. This command fits with the +'picture' environment, although you can use it outside of there, because +lengths are numbers specifying the distance in terms of the +'\unitlength' (*note picture::). + + The required arguments are: dashes are DASH-LEN units long, with the +same length gap, and the rectangle has overall width RECT-WIDTH units +and height RECT-HEIGHT units. + + The optional argument POSITION specifies the position of TEXT; see +*note \makebox (picture):: for the values that it can take. + + This shows that you can use non-integer value for DASH-LEN. + + \put(0,0){\dashbox{0.1}(5,0.5){My hovercraft is full of eels.}} + +Each dash will be '0.1\unitlength' long, the box's width is +'5\unitlength' and its height is '0.5\unitlength'. + + As in that example, a dashed box looks best when RECT-WIDTH and +RECT-HEIGHT are multiples of the DASH-LEN. + + +File: latex2e.info, Node: quotation & quote, Next: tabbing, Prev: picture, Up: Environments + +8.20 'quotation' & 'quote' +========================== + +Synopsis: + + \begin{quotation} + TEXT + \end{quotation} + +or + + \begin{quote} + TEXT + \end{quote} + + Include a quotation. Both environments indent margins on both sides +by '\leftmargin' and the text is right-justified. + + They differ in how they treat paragraphs. In the 'quotation' +environment, paragraphs are indented by 1.5em and the space between +paragraphs is small, '0pt plus 1pt'. In the 'quote' environment, +paragraphs are not indented and there is vertical space between +paragraphs (it is the rubber length '\parsep'). + + \begin{quotation} \small\it + Four score and seven years ago + ... shall not perish from the earth. + \hspace{1em plus 1fill}---Abraham Lincoln + \end{quotation} + + +File: latex2e.info, Node: tabbing, Next: table, Prev: quotation & quote, Up: Environments + +8.21 'tabbing' +============== + +Synopsis: + + \begin{tabbing} + ROW1COL1 \= ROW1COL2 ... \\ + ROW2COL1 \> ROW2COL2 ... \\ + ... + \end{tabbing} + + Align text in columns, by setting tab stops and tabbing to them much +as was done on a typewriter. This is less often used than the +environments 'tabular' (*note tabular::) or 'array' (*note array::) +because in those the width of each column need not be constant and need +not be known in advance. + + This example has a first line where the tab stops are set to explicit +widths, ended by a '\kill' command (which is described below): + + \begin{tabbing} + \hspace{1.2in}\=\hspace{1in}\=\kill + Ship \>Guns \>Year \\ + \textit{Sophie} \>14 \>1800 \\ + \textit{Polychrest} \>24 \>1803 \\ + \textit{Lively} \>38 \>1804 \\ + \textit{Surprise} \>28 \>1805 \\ + \end{tabbing} + + Both the 'tabbing' environment and the more widely-used 'tabular' +environment put text in columns. The most important distinction is that +in 'tabular' the width of columns is determined automatically by LaTeX, +while in 'tabbing' the user sets the tab stops. Another distinction is +that 'tabular' generates a box, but 'tabbing' can be broken across +pages. Finally, while 'tabular' can be used in any mode, 'tabbing' can +be used only in paragraph mode and it always starts a new paragraph, +without indentation. + + Moreover, as shown in the example above, there is no need to use the +starred form of the '\hspace' command at the beginning of a tabbed row. +The right margin of the 'tabbing' environment is the end of line, so +that the width of the environment is '\linewidth'. + + The 'tabbing' environment contains a sequence of "tabbed rows". The +first tabbed row begins immediately after '\begin{tabbing}' and each row +ends with '\\' or '\kill'. The last row may omit the '\\' and end with +just '\end{tabbing}'. + + At any point the 'tabbing' environment has a "current tab stop +pattern", a sequence of N > 0 tab stops, numbered 0, 1, etc. These +create N corresponding columns. Tab stop 0 is always the left margin, +defined by the enclosing environment. Tab stop number I is set if it is +assigned a horizontal position on the page. Tab stop number I can only +be set if all the stops 0, ..., i-1 have already been set; normally +later stops are to the right of earlier ones. + + By default any text typeset in a 'tabbing' environment is typeset +ragged right and left-aligned on the current tab stop. Typesetting is +done in LR mode (*note Modes::). + + The following commands can be used inside a 'tabbing' environment. +They are all fragile (*note \protect::). + +'\\ (tabbing)' + End a tabbed line and typeset it. + +'\= (tabbing)' + Sets a tab stop at the current position. + +'\> (tabbing)' + Advances to the next tab stop. + +'\<' + Put following text to the left of the local margin (without + changing the margin). Can only be used at the start of the line. + +'\+' + Moves the left margin of the next and all the following commands + one tab stop to the right, beginning tabbed line if necessary. + +'\-' + Moves the left margin of the next and all the following commands + one tab stop to the left, beginning tabbed line if necessary. + +'\' (tabbing)' + Moves everything that you have typed so far in the current column, + i.e., everything from the most recent '\>', '\<', '\'', '\\', or + '\kill' command, to the previous column and aligned to the right, + flush against the current column's tab stop. + +'\` (tabbing)' + Allows you to put text flush right against any tab stop, including + tab stop 0. However, it can't move text to the right of the last + column because there's no tab stop there. The '\`' command moves + all the text that follows it, up to the '\\' or '\end{tabbing}' + command that ends the line, to the right margin of the 'tabbing' + environment. There must be no '\>' or '\'' command between the + '\`' and the '\\' or '\end{tabbing}' command that ends the line. + +'\a (tabbing)' + In a 'tabbing' environment, the commands '\=', '\'' and '\`' do not + produce accents as usual (*note Accents::). Instead, use the + commands '\a=', '\a'' and '\a`'. + +'\kill' + Sets tab stops without producing text. Works just like '\\' except + that it throws away the current line instead of producing output + for it. Any '\=', '\+' or '\-' commands in that line remain in + effect. + +'\poptabs' + Restores the tab stop positions saved by the last '\pushtabs'. + +'\pushtabs' + Saves all current tab stop positions. Useful for temporarily + changing tab stop positions in the middle of a 'tabbing' + environment. + +'\tabbingsep' + Distance of the text moved by '\'' to left of current tab stop. + + This example typesets a Pascal function: + + \begin{tabbing} + function \= fact(n : integer) : integer;\\ + \> begin \= \+ \\ + \> if \= n > 1 then \+ \\ + fact := n * fact(n-1) \- \\ + else \+ \\ + fact := 1; \-\- \\ + end;\\ + \end{tabbing} + +The output looks like this. + + function fact(n : integer) : integer; + begin + if n > 1 then + fact := n * fact(n-1); + else + fact := 1; + end; + +This example is just for illustration of the environment. To actually +typeset computer code in typewriter like this, a verbatim environment +(*note verbatim::) would normally be best. For pretty-printed code, +there are quite a few packages, including 'algorithm2e', 'fancyvrb', +'listings', and 'minted'. + + +File: latex2e.info, Node: table, Next: tabular, Prev: tabbing, Up: Environments + +8.22 'table' +============ + +Synopsis: + + \begin{table}[PLACEMENT] + TABLE BODY + \caption[LOFTITLE]{TITLE} % optional + \label{LABEL} % also optional + \end{table} + + A class of floats (*note Floats::). They cannot be split across +pages and so they are not typeset in sequence with the normal text but +instead are floated to a convenient place, such as the top of a +following page. + + This example 'table' environment contains a 'tabular' + + \begin{table} + \centering\small + \begin{tabular}{ll} + \multicolumn{1}{c}{\textit{Author}} + &\multicolumn{1}{c}{\textit{Piece}} \\ \hline + Bach &Cello Suite Number 1 \\ + Beethoven &Cello Sonata Number 3 \\ + Brahms &Cello Sonata Number 1 + \end{tabular} + \caption{Top cello pieces} + \label{tab:cello} + \end{table} + +but you can put many different kinds of content in a 'table': the TABLE +BODY may contain text, LaTeX commands, graphics, etc. It is typeset in +a 'parbox' of width '\textwidth'. + + For the possible values of PLACEMENT and their effect on the float +placement algorithm, see *note Floats::. + + The label is optional; it is used for cross references (*note Cross +references::). The '\caption' command is also optional. It specifies +caption text TITLE for the table (*note \caption::). By default it is +numbered. If its optional LOTTITLE is present then that text is used in +the list of tables instead of TITLE (*note Table of contents etc.::). + + In this example the table and caption will float to the bottom of a +page, unless it is pushed to a float page at the end. + + \begin{table}[b] + \centering + \begin{tabular}{r|p{2in}} \hline + One &The loneliest number \\ + Two &Can be as sad as one. + It's the loneliest number since the number one. + \end{tabular} + \caption{Cardinal virtues} + \label{tab:CardinalVirtues} + \end{table} + + +File: latex2e.info, Node: tabular, Next: thebibliography, Prev: table, Up: Environments + +8.23 'tabular' +============== + +Synopsis: + + \begin{tabular}[POS]{COLS} + COLUMN 1 ENTRY &COLUMN 2 ENTRY ... &COLUMN N ENTRY \\ + ... + \end{tabular} + +or + + \begin{tabular*}{WIDTH}[POS]{COLS} + COLUMN 1 ENTRY &COLUMN 2 ENTRY ... &COLUMN N ENTRY \\ + ... + \end{tabular*} + + Produce a table, a box consisting of a sequence of horizontal rows. +Each row consists of items that are aligned vertically in columns. This +illustrates many of the features. + + \begin{tabular}{l|l} + \textit{Player name} &\textit{Career home runs} \\ + \hline + Hank Aaron &755 \\ + Babe Ruth &714 + \end{tabular} + +The output will have two left-aligned columns with a vertical bar +between them. This is specified in 'tabular''s argument '{l|l}'. Put +the entries into different columns by separating them with an ampersand, +'&'. The end of each row is marked with a double backslash, '\\'. Put +a horizontal rule below a row, after a double backslash, with '\hline'. +After the last row the '\\' is optional, unless an '\hline' command +follows to put a rule below the table. + + The required and optional arguments to 'tabular' consist of: + +POS + Optional. Specifies the table's vertical position. The default is + to align the table so its vertical center matches the baseline of + the surrounding text. There are two other possible alignments: 't' + aligns the table so its top row matches the baseline of the + surrounding text, and 'b' aligns on the bottom row. + + This only has an effect if there is other text. In the common case + of a 'tabular' alone in a 'center' environment this option makes no + difference. + +COLS + Required. Specifies the formatting of columns. It consists of a + sequence of the following specifiers, corresponding to the types of + column and intercolumn material. + + 'l' + A column of left-aligned items. + + 'r' + A column of right-aligned items. + + 'c' + A column of centered items. + + '|' + A vertical line the full height and depth of the environment. + + '@{TEXT OR SPACE}' + Insert TEXT OR SPACE at this location in every row. The TEXT + OR SPACE material is typeset in LR mode. This text is fragile + (*note \protect::). + + If between two columns there is no @-expression then LaTeX's + 'book', 'article', and 'report' classes will put on either + side of each column a space of length '\tabcolsep', which by + default is 6pt. That is, by default adjacent columns are + separated by 12pt (so '\tabcolsep' is misleadingly named since + it is only half of the separation between tabular columns). + In addition, a space of 6pt also comes before the first column + and after the final column, unless you put a '@{...}' or '|' + there. + + If you override the default and use an @-expression then LaTeX + does not insert '\tabcolsep' so you must insert any desired + space yourself, as in '@{\hspace{1em}}'. + + An empty expression '@{}' will eliminate the space. In + particular, sometimes you want to eliminate the space before + the first column or after the last one, as in the example + below where the tabular lines need to lie on the left margin. + + \begin{flushleft} + \begin{tabular}{@{}l} + ... + \end{tabular} + \end{flushleft} + + The next example shows text, a decimal point between the + columns, arranged so the numbers in the table are aligned on + it. + + \begin{tabular}{r@{$.$}l} + $3$ &$14$ \\ + $9$ &$80665$ + \end{tabular} + + An '\extracolsep{WD}' command in an @-expression causes an + extra space of width WD to appear to the left of all + subsequent columns, until countermanded by another + '\extracolsep'. Unlike ordinary intercolumn space, this extra + space is not suppressed by an @-expression. An '\extracolsep' + command can be used only in an @-expression in the 'cols' + argument. Below, LaTeX inserts the right amount of + intercolumn space to make the entire table 4 inches wide. + + \begin{tabular*}{4in}{l@{\extracolsep{\fill}}l} + Seven times down, eight times up \ldots + &such is life! + \end{tabular*} + + To insert commands that are automatically executed before a + given column, load the 'array' package and use the '>{...}' + specifier. + + 'p{WD}' + Each item in the column is typeset in a parbox of width WD, as + if it were the argument of a '\parbox[t]{wd}{...}' command. + + A line break double backslash '\\' may not appear in the item, + except inside an environment like 'minipage', 'array', or + 'tabular', or inside an explicit '\parbox', or in the scope of + a '\centering', '\raggedright', or '\raggedleft' declaration + (when used in a 'p'-column element these declarations must + appear inside braces, as with '{\centering .. \\ ..}'). + Otherwise LaTeX will misinterpret the double backslash as + ending the tabular row. Instead, to get a line break in there + use '\newline' (*note \newline::). + + '*{NUM}{COLS}' + Equivalent to NUM copies of COLS, where NUM is a positive + integer and COLS is a list of specifiers. Thus the specifier + '\begin{tabular}{|*{3}{l|r}|}' is equivalent to the specifier + '\begin{tabular}{|l|rl|rl|r|}'. Note that COLS may contain + another '*'-expression. + +WIDTH + Required for 'tabular*', not allowed for 'tabular'. Specifies the + width of the 'tabular*' environment. The space between columns + should be rubber, as with '@{\extracolsep{\fill}}', to allow the + table to stretch or shrink to make the specified width, or else you + are likely to get the 'Underfull \hbox (badness 10000) in alignment + ...' warning. + + Parameters that control formatting: + +'\arrayrulewidth' + A length that is the thickness of the rule created by '|', + '\hline', and '\vline' in the 'tabular' and 'array' environments. + The default is '.4pt'. Change it as in + '\setlength{\arrayrulewidth}{0.8pt}'. + +'\arraystretch' + A factor by which the spacing between rows in the 'tabular' and + 'array' environments is multiplied. The default is '1', for no + scaling. Change it as '\renewcommand{\arraystretch}{1.2}'. + +'\doublerulesep' + A length that is the distance between the vertical rules produced + by the '||' specifier. The default is '2pt'. + +'\tabcolsep' + A length that is half of the space between columns. The default is + '6pt'. Change it with '\setlength'. + + The following commands can be used inside the body of a 'tabular' +environment, the first two inside an entry and the second two between +lines: + +* Menu: + +* \multicolumn:: Make an item spanning several columns. +* \vline:: Draw a vertical line. +* \cline:: Draw a horizontal line spanning some columns. +* \hline:: Draw a horizontal line spanning all columns. + + +File: latex2e.info, Node: \multicolumn, Next: \vline, Up: tabular + +8.23.1 '\multicolumn' +--------------------- + +Synopsis: + + \multicolumn{NUMCOLS}{COLS}{TEXT} + + Make an 'array' or 'tabular' entry that spans several columns. The +first argument NUMCOLS gives the number of columns to span. The second +argument COLS specifies the formatting of the entry, with 'c' for +centered, 'l' for flush left, or 'r' for flush right. The third +argument TEXT gives the contents of that entry. + + In this example, in the first row, the second and third columns are +spanned by the single heading 'Name'. + + \begin{tabular}{lccl} + \textit{ID} &\multicolumn{2}{c}{\textit{Name}} &\textit{Age} \\ + \hline + 978-0-393-03701-2 &O'Brian &Patrick &55 \\ + ... + \end{tabular} + + What counts as a column is: the column format specifier for the +'array' or 'tabular' environment is broken into parts, where each part +(except the first) begins with 'l', 'c', 'r', or 'p'. So from +'\begin{tabular}{|r|ccp{1.5in}|}' the parts are '|r|', 'c', 'c', +and 'p{1.5in}|'. + + The COLS argument overrides the 'array' or 'tabular' environment's +intercolumn area default adjoining this multicolumn entry. To affect +that area, this argument can contain vertical bars '|' indicating the +placement of vertical rules, and '@{...}' expressions. Thus if COLS is +'|c|' then this multicolumn entry will be centered and a vertical rule +will come in the intercolumn area before it and after it. This table +details the exact behavior. + + \begin{tabular}{|cc|c|c|} + \multicolumn{1}{r}{w} % entry one + &\multicolumn{1}{|r|}{x} % entry two + &\multicolumn{1}{|r}{y} % entry three + &z % entry four + \end{tabular} + +Before the first entry the output will not have a vertical rule because +the '\multicolumn' has the COLS specifier 'r' with no initial vertical +bar. Between entry one and entry two there will be a vertical rule; +although the first COLS does not have an ending vertical bar, the second +COLS does have a starting one. Between entry two and entry three there +is a single vertical rule; despite that the COLS in both of the +surrounding 'multicolumn''s call for a vertical rule, you only get one +rule. Between entry three and entry four there is no vertical rule; the +default calls for one but the COLS in the entry three '\multicolumn' +leaves it out, and that takes precedence. Finally, following entry four +there is a vertical rule because of the default. + + The number of spanned columns NUMCOLS can be 1. Besides giving the +ability to change the horizontal alignment, this also is useful to +override for one row the 'tabular' definition's default intercolumn area +specification, including the placement of vertical rules. + + In the example below, in the 'tabular' definition the first column is +specified to default to left justified but in the first row the entry is +centered with '\multicolumn{1}{c}{\textsc{Period}}'. Also in the first +row, the second and third columns are spanned by a single entry with +'\multicolumn{2}{c}{\textsc{Span}}', overriding the specification to +center those two columns on the page range en-dash. + + \begin{tabular}{l|r@{--}l} + \multicolumn{1}{c}{\textsc{Period}} + &\multicolumn{2}{c}{\textsc{Span}} \\ \hline + Baroque &1600 &1760 \\ + Classical &1730 &1820 \\ + Romantic &1780 &1910 \\ + Impressionistic &1875 &1925 + \end{tabular} + +Although the 'tabular' specification by default puts a vertical rule +between the first and second columns, no such vertical rule appears in +the first row here. That's because there is no vertical bar in the COLS +part of the first row's first '\multicolumn' command. + + +File: latex2e.info, Node: \vline, Next: \cline, Prev: \multicolumn, Up: tabular + +8.23.2 '\vline' +--------------- + +Draw a vertical line in a 'tabular' or 'array' environment extending the +full height and depth of an entry's row. Can also be used in an +@-expression, although its synonym vertical bar '|' is more common. +This command is rarely used in the body of a table; typically a table's +vertical lines are specified in 'tabular''s COLS argument and overridden +as needed with '\multicolumn' (*note tabular::). + + The example below illustrates some pitfalls. In the first row's +second entry the '\hfill' moves the '\vline' to the left edge of the +cell. But that is different than putting it halfway between the two +columns, so between the first and second columns there are two vertical +rules, with the one from the '{c|cc}' specifier coming before the one +produced by the '\vline\hfill'. In contrast, the first row's third +entry shows the usual way to put a vertical bar between two columns. In +the second row, the 'ghi' is the widest entry in its column so in the +'\vline\hfill' the '\hfill' has no effect and the vertical line in that +entry appears immediately next to the 'g', with no whitespace. + + \begin{tabular}{c|cc} + x &\vline\hfill y &\multicolumn{1}{|r}{z} \\ % row 1 + abc &def &\vline\hfill ghi % row 2 + \end{tabular} + + +File: latex2e.info, Node: \cline, Next: \hline, Prev: \vline, Up: tabular + +8.23.3 '\cline' +--------------- + +Synopsis: + + \cline{I-J} + + In an 'array' or 'tabular' environment, draw a horizontal rule +beginning in column I and ending in column J. The dash, '-', must +appear in the mandatory argument. To span a single column use the +number twice, as with '\cline{2-2}'. + + This example puts two horizontal lines between the first and second +rows, one line in the first column only, and the other spanning the +third and fourth columns. The two lines are side-by-side, at the same +height. + + \begin{tabular}{llrr} + a &b &c &d \\ \cline{1-1} \cline{3-4} + e &f &g &h + \end{tabular} + + +File: latex2e.info, Node: \hline, Prev: \cline, Up: tabular + +8.23.4 '\hline' +--------------- + +Draw a horizontal line the width of the enclosing 'tabular' or 'array' +environment. It's most commonly used to draw a line at the top, bottom, +and between the rows of a table. + + In this example the top of the table has two horizontal rules, one +above the other, that span both columns. The bottom of the table has a +single rule spanning both columns. Because of the '\hline', the +'tabular' second row's line ending double backslash '\\' is required. + + \begin{tabular}{ll} \hline\hline + Baseball &Red Sox \\ + Basketball &Celtics \\ \hline + \end{tabular} + + +File: latex2e.info, Node: thebibliography, Next: theorem, Prev: tabular, Up: Environments + +8.24 'thebibliography' +====================== + +Synopsis: + + \begin{thebibliography}{WIDEST-LABEL} + \bibitem[LABEL]{CITE_KEY} + ... + \end{thebibliography} + + Produce a bibliography or reference list. There are two ways to +produce bibliographic lists. This environment is suitable when you have +only a few references and can maintain the list by hand. *Note Using +BibTeX::, for a more sophisticated approach. + + This shows the environment with two entries. + + This work is based on \cite{latexdps}. + Together they are \cite{latexdps, texbook}. + ... + \begin{thebibliography}{9} + \bibitem{latexdps} + Leslie Lamport. + \textit{\LaTeX{}: a document preparation system}. + Addison-Wesley, Reading, Massachusetts, 1993. + \bibitem{texbook} + Donald Ervin Knuth. + \textit{The \TeX book}. + Addison-Wesley, Reading, Massachusetts, 1983. + \end{thebibliography} + +This styles the first reference as '[1] Leslie ...', and so that '... +based on \cite{latexdps}' produces the matching '... based on [1]'. The +second '\cite' produces '[1, 2]'. You must compile the document twice +to resolve these references. + + The mandatory argument WIDEST-LABEL is text that, when typeset, is as +wide as the widest item label produced by the '\bibitem' commands. The +tradition is to use '9' for bibliographies with less than 10 references, +'99' for ones with less than 100, etc. + + The bibliographic list is headed by a title such as 'Bibliography'. +To change it there are two cases. In the 'book' and 'report' classes, +where the top level sectioning is '\chapter' and the default title is +'Bibliography', that title is in the macro '\bibname'. For 'article', +where the class's top level sectioning is '\section' and the default is +'References', the title is in macro '\refname'. Change it by redefining +the command, as with '\renewcommand{\refname}{Cited references}', after +'\begin{document}'. + + Language support packages such as 'babel' will automatically redefine +'\refname' or '\bibname' to fit the selected language. + + *Note list::, for the list layout control parameters. + +* Menu: + +* \bibitem:: Specify a bibliography item. +* \cite:: Refer to a bibliography item. +* \nocite:: Include an item in the bibliography. +* Using BibTeX:: Automatic generation of bibliographies. + + +File: latex2e.info, Node: \bibitem, Next: \cite, Up: thebibliography + +8.24.1 '\bibitem' +----------------- + +Synopsis: + + \bibitem{CITE_KEY} + +or + + \bibitem[LABEL]{CITE_KEY} + + Generate an entry labeled by default by a number generated using the +'enumi' counter. The "citation key" CITE_KEY can be any string of +letters, numbers, and punctuation symbols (but not comma). + + *Note thebibliography::, for an example. + + When provided, the optional LABEL becomes the entry label and the +'enumi' counter is not incremented. With this + + \begin{thebibliography} + \bibitem[Lamport 1993]{latexdps} + Leslie Lamport. + \textit{\LaTeX{}: a document preparation system}. + Addison-Wesley, Reading, Massachusetts, 1993. + \bibitem{texbook} + Donald Ervin Knuth. + \textit{The \TeX book}. + Addison-Wesley, Reading, Massachusetts, 1983. + \end{thebibliography} + +the first entry will be styled as '[Lamport 1993] Leslie ...' (The +amount of horizontal space that LaTeX leaves for the label depends on +the WIDEST-LABEL argument of the 'thebibliography' environment; see +*note thebibliography::.) Similarly, '... based on \cite{latexdps}' +will produce '... based on [Lamport 1994]'. + + If you mix '\bibitem' entries having a LABEL with those that do not +then LaTeX will number the unlabelled ones sequentially. In the example +above the 'texbook' entry will appear as '[1] Donald ...', despite that +it is the second entry. + + If you use the same CITE_KEY twice then you get 'LaTeX Warning: There +were multiply-defined labels'. + + Under the hood, LaTeX remembers the CITE_KEY and LABEL information +because '\bibitem' writes it to the auxiliary file 'JOBNAME.aux' (*note +Jobname::). For instance, the above example causes the two +'\bibcite{latexdps}{Lamport, 1993}' and '\bibcite{texbook}{1}' to appear +in that file. The '.aux' file is read by the '\begin{document}' command +and then the information is available for '\cite' commands. This +explains why you need to run LaTeX twice to resolve references: once to +write it out and once to read it in. + + Because of this two-pass algorithm, when you add a '\bibitem' or +change its CITE_KEY you may get 'LaTeX Warning: Label(s) may have +changed. Rerun to get cross-references right'. Fix it by recompiling. + + +File: latex2e.info, Node: \cite, Next: \nocite, Prev: \bibitem, Up: thebibliography + +8.24.2 '\cite' +-------------- + +Synopsis: + + \cite{KEYS} + +or + + \cite[SUBCITE]{KEYS} + + Generate as output a citation to the references associated with KEYS. +The mandatory KEYS is a citation key, or a comma-separated list of +citation keys (*note \bibitem::). + + This + + The ultimate source is \cite{texbook}. + ... + \begin{thebibliography} + \bibitem{texbook} + Donald Ervin Knuth. + \textit{The \TeX book}. + Addison-Wesley, Reading, Massachusetts, 1983. + \end{thebibliography} + +produces output like '... source is [1]'. You can change the appearance +of the citation and of the reference by using bibliography styles if you +generate automatically the 'thebibliography' environment. More +information in *note Using BibTeX::. + + The optional argument SUBCITE is appended to the citation. For +example, 'See 14.3 in \cite[p.~314]{texbook}' might produce 'See 14.3 in +[1, p. 314]'. + + In addition to what appears in the output, '\cite' writes information +to the auxiliary file 'JOBNAME.aux' (*note Jobname::). For instance, +'\cite{latexdps}' writes '\citation{latexdps}' to that file. This +information is used by BibTeX to include in your reference list only +those works that you have actually cited; see *note \nocite:: also. + + If KEYS is not in your bibliography information then you get 'LaTeX +Warning: There were undefined references', and in the output the +citation shows as a boldface question mark between square brackets. +There are two possible causes. If you have mistyped something, as in +'\cite{texbok}' then you need to correct the spelling. On the other +hand, if you have just added or modified the bibliographic information +and so changed the '.aux' file (*note \bibitem::) then the fix may be to +run LaTeX again. + + +File: latex2e.info, Node: \nocite, Next: Using BibTeX, Prev: \cite, Up: thebibliography + +8.24.3 '\nocite' +---------------- + +Synopsis: + + \nocite{KEYS} + + Produces no output but writes KEYS to the auxiliary file +'JOBNAME.aux' (*note Jobname::). + + The mandatory argument KEYS is a comma-separated list of one or more +citation keys (*note \bibitem::). This information is used by BibTeX to +include these works in your reference list even though you have not +explicitly cited them (*note \cite::). + + +File: latex2e.info, Node: Using BibTeX, Prev: \nocite, Up: thebibliography + +8.24.4 Using BibTeX +------------------- + +As described in 'thebibliography' (*note thebibliography::), a +sophisticated approach to managing bibliographies is provided by the +BibTeX program. This is only an introduction; see the full +documentation on CTAN (*note CTAN::). + + With BibTeX, you don't use the 'thebibliography' environment directly +(*note thebibliography::). Instead, include these lines: + + \bibliographystyle{BIBSTYLE} + \bibliography{BIBFILE1, BIBFILE2, ...} + +The BIBSTYLE refers to a file 'BIBSTYLE.bst', which defines how your +citations will look. The standard BIBSTYLE's distributed with BibTeX +are: + +'alpha' + Labels are formed from name of author and year of publication. The + bibliographic items are sorted alphabetically. +'plain' + Labels are integers. Sort the bibliographic items alphabetically. +'unsrt' + Like 'plain', but entries are in order of citation. +'abbrv' + Like 'plain', but more compact labels. + +Many, many other BibTeX style files exist, tailored to the demands of +various publications. See the CTAN topic +. + + The '\bibliography' command is what actually produces the +bibliography. Its argument is a comma-separated list, referring to +files named 'BIBFILE1.bib', 'BIBFILE2.bib', ... These contain your +database in BibTeX format. This shows a typical couple of entries in +that format. + + @book{texbook, + title = {The {{\TeX}}book}, + author = {D.E. Knuth}, + isbn = {0201134489}, + series = {Computers \& typesetting}, + year = {1983}, + publisher = {Addison-Wesley} + } + @book{sexbook, + author = {W.H. Masters and V.E. Johnson}, + title = {Human Sexual Response}, + year = {1966}, + publisher = {Bantam Books} + } + + Only the bibliographic entries referred to via '\cite' and '\nocite' +will be listed in the document's bibliography. Thus you can keep all +your sources together in one file, or a small number of files, and rely +on BibTeX to include in this document only those that you used. + + With BibTeX, the KEYS argument to '\nocite' can also be the single +character '*'. This means to implicitly cite all entries from all given +bibliographies. + +* Menu: + +* BibTeX error messages:: + + +File: latex2e.info, Node: BibTeX error messages, Up: Using BibTeX + +8.24.4.1 BibTeX error messages +.............................. + +If you forget to use '\bibliography' or '\bibliographystyle' in your +document (or, less likely, any '\cite' or '\nocite' command), BibTeX +will issue an error message. Because BibTeX can be used with any +program, not just LaTeX, the error messages refer to the internal +commands read by BibTeX (from the '.aux' file), rather than the +user-level commands described above. + + Here is a table showing internal commands mentioned in the BibTeX +errors, and the corresponding user-level commands. + +'\bibdata' + '\bibliography' + +'\bibstyle' + '\bibliographystyle' + +'\citation' + '\cite', '\nocite' + + For example, if your document has no '\bibliographystyle' command, +BibTeX complains as follows: + + I found no \bibstyle command---while reading file DOCUMENT.aux + + +File: latex2e.info, Node: theorem, Next: titlepage, Prev: thebibliography, Up: Environments + +8.25 'theorem' +============== + +Synopsis: + + \begin{theorem} + THEOREM BODY + \end{theorem} + + Produces 'Theorem N' in boldface followed by THEOREM BODY in italics. +The numbering possibilities for N are described under '\newtheorem' +(*note \newtheorem::). + + \newtheorem{lem}{Lemma} % in preamble + \newtheorem{thm}{Theorem} + ... + \begin{lem} % in document body + text of lemma + \end{lem} + + The next result follows immediately. + \begin{thm}[Gauss] % put `Gauss' in parens after theorem head + text of theorem + \end{thm} + + Most new documents use the packages 'amsthm' and 'amsmath' from the +American Mathematical Society. Among other things these packages +include a large number of options for theorem environments, such as +styling options. + + +File: latex2e.info, Node: titlepage, Next: verbatim, Prev: theorem, Up: Environments + +8.26 'titlepage' +================ + +Synopsis: + + \begin{titlepage} + ... text and spacing ... + \end{titlepage} + + Create a title page, a page with no printed page number or heading +and with succeeding pages numbered starting with page one. + + In this example all formatting, including vertical spacing, is left +to the author. + + \begin{titlepage} + \vspace*{\stretch{1}} + \begin{center} + {\huge\bfseries Thesis \\[1ex] + title} \\[6.5ex] + {\large\bfseries Author name} \\ + \vspace{4ex} + Thesis submitted to \\[5pt] + \textit{University name} \\[2cm] + in partial fulfilment for the award of the degree of \\[2cm] + \textsc{\Large Doctor of Philosophy} \\[2ex] + \textsc{\large Mathematics} \\[12ex] + \vfill + Department of Mathematics \\ + Address \\ + \vfill + \today + \end{center} + \vspace{\stretch{2}} + \end{titlepage} + + To instead produce a standard title page without a 'titlepage' +environment, use '\maketitle' (*note \maketitle::). + + +File: latex2e.info, Node: verbatim, Next: verse, Prev: titlepage, Up: Environments + +8.27 'verbatim' +=============== + +Synopsis: + + \begin{verbatim} + LITERAL-TEXT + \end{verbatim} + + A paragraph-making environment in which LaTeX produces as output +exactly what you type as input. For instance inside LITERAL-TEXT the +backslash '\' character does not start commands, it produces a printed +'\', and carriage returns and blanks are taken literally. The output +appears in a monospaced typewriter-like font ('\tt'). + + \begin{verbatim} + Symbol swearing: %&$#?!. + \end{verbatim} + + The only restriction on 'literal-text' is that it cannot include the +string '\end{verbatim}'. + + You cannot use the verbatim environment in the argument to macros, +for instance in the argument to a '\section'. This is not the same as +commands being fragile (*note \protect::), instead it just cannot work, +as the 'verbatim' environment changes the catcode regime before +processing its contents, and restore it immediately afterward, +nevertheless with a macro argument the content of the argument has +already be converted to a token list along the catcode regime in effect +when the macro was called. However, the 'cprotect' package can help +with this. + + One common use of verbatim input is to typeset computer code. There +are packages that are an improvement the 'verbatim' environment. For +instance, one improvement is to allow the verbatim inclusion of external +files, or parts of those files. Such packages include 'listings', and +'minted'. + + A package that provides many more options for verbatim environments +is 'fancyvrb'. Another is 'verbatimbox'. + + For a list of all the relevant packages, see CTAN (*note CTAN::). + +* Menu: + +* \verb:: The macro form of the 'verbatim' environment. + + +File: latex2e.info, Node: \verb, Up: verbatim + +8.27.1 '\verb' +-------------- + +Synopsis: + + \verb CHAR LITERAL-TEXT CHAR + \verb* CHAR LITERAL-TEXT CHAR + + Typeset LITERAL-TEXT as it is input, including special characters and +spaces, using the typewriter ('\tt') font. + + This example shows two different invocations of '\verb'. + + This is \verb!literally! the biggest pumpkin ever. + And this is the best squash, \verb+literally!+ + +The first '\verb' has its LITERAL-TEXT surrounded with exclamation +point, '!'. The second instead uses plus, '+', because the exclamation +point is part of 'literal-text'. + + The single-character delimiter CHAR surrounds LITERAL-TEXT--it must +be the same character before and after. No spaces come between '\verb' +or '\verb*' and CHAR, or between CHAR and LITERAL-TEXT, or between +LITERAL-TEXT and the second occurrence of CHAR (the synopsis shows a +space only to distinguish one component from the other). The delimiter +must not appear in LITERAL-TEXT. The LITERAL-TEXT cannot include a line +break. + + The '*'-form differs only in that spaces are printed with a visible +space character. + + The output from this will include a visible space on both side of +word 'with': + + The command's first argument is \verb*!filename with extension! and ... + + For typesetting Internet addresses, urls, the package 'url' is a +better option than the '\verb' command, since it allows line breaks. + + For computer code there are many packages with advantages over +'\verb'. One is 'listings', another is 'minted'. + + You cannot use '\verb' in the argument to a macro, for instance in +the argument to a '\section'. It is not a question of '\verb' being +fragile (*note \protect::), instead it just cannot work, as the '\verb' +command changes the catcode regime before reading its argument, and +restore it immediately afterward, nevertheless with a macro argument the +content of the argument has already be converted to a token list along +the catcode regime in effect when the macro was called. However, the +'cprotect' package can help with this. + + +File: latex2e.info, Node: verse, Prev: verbatim, Up: Environments + +8.28 'verse' +============ + +Synopsis: + + \begin{verse} + LINE1 \\ + LINE2 \\ + ... + \end{verse} + + An environment for poetry. + + Here are two lines from Shakespeare's Romeo and Juliet. + + Then plainly know my heart's dear love is set \\ + On the fair daughter of rich Capulet. + + Separate the lines of each stanza with '\\', and use one or more +blank lines to separate the stanzas. + + \begin{verse} + \makebox[\linewidth][c]{\textit{Shut Not Your Doors} ---Walt Whitman} + \\[1\baselineskip] + Shut not your doors to me proud libraries, \\ + For that which was lacking on all your well-fill'd shelves, \\ + \qquad yet needed most, I bring, \\ + Forth from the war emerging, a book I have made, \\ + The words of my book nothing, the drift of it every thing, \\ + A book separate, not link'd with the rest nor felt by the intellect, \\ + But you ye untold latencies will thrill to every page. + \end{verse} + +The output has margins indented on the left and the right, paragraphs +are not indented, and the text is not right-justified. + + +File: latex2e.info, Node: Line breaking, Next: Page breaking, Prev: Environments, Up: Top + +9 Line breaking +*************** + +The first thing LaTeX does when processing ordinary text is to translate +your input file into a sequence of glyphs and spaces. To produce a +printed document, this sequence must be broken into lines (and these +lines must be broken into pages). + + LaTeX usually does the line (and page) breaking in the text body for +you but in some environments you manually force line breaks. + + A common workflow is to get a final version of the document content +before taking a final pass through and considering line breaks (and page +breaks). This differs from word processing, where you are formatting +text as you input it. Putting these off until the end prevents a lot of +fiddling with breaks that will change anyway. + +* Menu: + +* \\:: Start a new line. +* \obeycr & \restorecr:: Make each input line start a new output line. +* \newline:: Break the line +* \- (hyphenation):: Insert explicit hyphenation. +* \discretionary:: Explicit control of the hyphen character. +* \fussy & \sloppy:: Be more or less particular with line breaking. +* \hyphenation:: Tell LaTeX how to hyphenate a word. +* \linebreak & \nolinebreak:: Forcing & avoiding line breaks. + + +File: latex2e.info, Node: \\, Next: \obeycr & \restorecr, Up: Line breaking + +9.1 '\\' +======== + +Synopsis, one of: + + \\ + \\[MORESPACE] + +or one of: + + \\* + \\*[MORESPACE] + + End the current line. The optional argument MORESPACE specifies +extra vertical space to be inserted before the next line. This is a +rubber length (*note Lengths::) and can be negative. The text before +the line break is set at its normal length, that is, it is not stretched +to fill out the line width. This command is fragile (*note \protect::). + + \title{My story: \\[0.25in] + a tale of woe} + +The starred form, '\\*', tells LaTeX not to start a new page between the +two lines, by issuing a '\nobreak'. + + Explicit line breaks in the main text body are unusual in LaTeX. In +particular, don't start new paragraphs with '\\'. Instead leave a blank +line between the two paragraphs. And don't put in a sequence of '\\''s +to make vertical space. Instead use '\vspace{LENGTH}', or +'\leavevmode\vspace{LENGTH}', or '\vspace*{LENGTH}' if you want the +space to not be thrown out at the top of a new page (*note \vspace::). + + The '\\' command is mostly used outside of the main flow of text such +as in a 'tabular' or 'array' environment or in an equation environment. + + The '\\' command is a synonym for '\newline' (*note \newline::) under +ordinary circumstances (an example of an exception is the 'p{...}' +column in a 'tabular' environment; *note tabular::). + + The '\\' command is a macro, and its definition changes by context so +that its definition in normal text, a 'center' environment, a +'flushleft' environment, and a 'tabular' are all different. In normal +text when it forces a linebreak it is essentially a shorthand for +'\newline'. It does not end horizontal mode or end the paragraph, it +just inserts some glue and penalties so that when the paragraph does end +a linebreak will occur at that point, with the short line padded with +white space. + + You get 'LaTeX Error: There's no line here to end' if you use '\\' to +ask for a new line, rather than to end the current line. An example is +if you have '\begin{document}\\' or, more likely, something like this. + + \begin{center} + \begin{minipage}{0.5\textwidth} + \\ + In that vertical space put your mark. + \end{minipage} + \end{center} + +Fix it by replacing the double backslash with something like +'\vspace{\baselineskip}'. + + +File: latex2e.info, Node: \obeycr & \restorecr, Next: \newline, Prev: \\, Up: Line breaking + +9.2 '\obeycr' & '\restorecr' +============================ + +The '\obeycr' command makes a return in the input file ('^^M', +internally) the same as '\\', followed by '\relax'. So each new line in +the input will also be a new line in the output. The '\restorecr' +command restores normal line-breaking behavior. + + This is not the way to show verbatim text or computer code. Use +'verbatim' (*note verbatim::) instead. + + With LaTeX's usual defaults, this + + aaa + bbb + + \obeycr + ccc + ddd + eee + + \restorecr + fff + ggg + + hhh + iii + +produces output like this. + + aaa bbb + ccc + ddd + eee + + fff ggg + hhh iii + +The indents are paragraph indents. + + +File: latex2e.info, Node: \newline, Next: \- (hyphenation), Prev: \obeycr & \restorecr, Up: Line breaking + +9.3 '\newline' +============== + +In ordinary text, this ends a line in a way that does not right-justify +it, so the text before the end of line is not stretched. That is, in +paragraph mode (*note Modes::), the '\newline' command is equivalent to +double-backslash (*note \\::). This command is fragile (*note +\protect::). + + However, the two commands are different inside a 'tabular' or 'array' +environment. In a column with a specifier producing a paragraph box +such as typically 'p{...}', '\newline' will insert a line end inside of +the column; that is, it does not break the entire tabular row. To break +the entire row use '\\' or its equivalent '\tabularnewline'. + + This will print 'Name:' and 'Address:' as two lines in a single cell +of the table. + + \begin{tabular}{p{1in}@{\hspace{2in}}p{1in}} + Name: \newline Address: &Date: \\ \hline + \end{tabular} + +The 'Date:' will be baseline-aligned with 'Name:'. + + +File: latex2e.info, Node: \- (hyphenation), Next: \discretionary, Prev: \newline, Up: Line breaking + +9.4 '\-' (discretionary hyphen) +=============================== + +Tell LaTeX that it may hyphenate the word at that point. When you +insert '\-' commands in a word, the word will only be hyphenated at +those points and not at any of the other hyphenation points that LaTeX +might otherwise have chosen. This command is robust (*note \protect::). + + LaTeX is good at hyphenating and usually finds most of the correct +hyphenation points, while almost never using an incorrect one. The '\-' +command is for exceptional cases. + + For example, LaTeX does not ordinarily hyphenate words containing a +hyphen. Below, the long and hyphenated word means LaTeX has to put in +unacceptably large spaces to set the narrow column. + + \begin{tabular}{rp{1.75in}} + Isaac Asimov &The strain of + anti-intellectualism + % an\-ti-in\-tel\-lec\-tu\-al\-ism + has been a constant thread winding its way through our + political and cultural life, nurtured by + the false notion that democracy means that + `my ignorance is just as good as your knowledge'. + \end{tabular} + +Commenting out the third line and uncommenting the fourth makes a much +better fit. + + The '\-' command only allows LaTeX to break there, it does not +require that it break there. You can force a split with something like +'Hef-\linebreak feron'. Of course, if you later change the text then +this forced break may look very odd, so this approach requires care. + + +File: latex2e.info, Node: \discretionary, Next: \fussy & \sloppy, Prev: \- (hyphenation), Up: Line breaking + +9.5 '\discretionary' (generalized hyphenation point) +==================================================== + +Synopsis: + + \discretionary{PRE-BREAK}{POST-BREAK}{NO-BREAK} + + Handle word changes around hyphens. This command is not often used +in LaTeX documents. + + If a line break occurs at the point where '\discretionary' appears +then TeX puts PRE-BREAK at the end of the current line and puts +POST-BREAK at the start of the next line. If there is no line break +here then TeX puts NO-BREAK. + + In 'difficult' the three letters 'ffi' form a ligature. But TeX can +nonetheless break between the two 'f''s with this. + + di\discretionary{f-}{fi}{ffi}cult + + Note that users do not have to do this. It is typically handled +automatically by TeX's hyphenation algorithm. + + +File: latex2e.info, Node: \fussy & \sloppy, Next: \hyphenation, Prev: \discretionary, Up: Line breaking + +9.6 '\fussy' & '\sloppy' +======================== + +Declarations to make TeX more picky or less picky about line breaking. +Declaring '\fussy' usually avoids too much space between words, at the +cost of an occasional overfull box. Conversely, '\sloppy' avoids +overfull boxes while suffering loose interword spacing. + + The default is '\fussy'. Line breaking in a paragraph is controlled +by whichever declaration is current at the end of the paragraph, i.e., +at the blank line or '\par' or displayed equation ending that paragraph. +So to affect the line breaks, include that paragraph-ending material in +the scope of the command. + +* Menu: + +* sloppypar:: Environment version of \sloppy command. + + +File: latex2e.info, Node: sloppypar, Up: \fussy & \sloppy + +9.6.1 'sloppypar' +----------------- + +Synopsis: + + \begin{sloppypar} + ... paragraphs ... + \end{sloppypar} + + Typeset the paragraphs with '\sloppy' in effect (*note \fussy & +\sloppy::). Use this to locally adjust line breaking, to avoid +'Overfull box' or 'Underfull box' errors. + + The example is simple. + + \begin{sloppypar} + Her plan for the morning thus settled, she sat quietly down to her + book after breakfast, resolving to remain in the same place and the + same employment till the clock struck one; and from habitude very + little incommoded by the remarks and ejaculations of Mrs.\ Allen, + whose vacancy of mind and incapacity for thinking were such, that + as she never talked a great deal, so she could never be entirely + silent; and, therefore, while she sat at her work, if she lost her + needle or broke her thread, if she heard a carriage in the street, + or saw a speck upon her gown, she must observe it aloud, whether + there were anyone at leisure to answer her or not. + \end{sloppypar} + + +File: latex2e.info, Node: \hyphenation, Next: \linebreak & \nolinebreak, Prev: \fussy & \sloppy, Up: Line breaking + +9.7 '\hyphenation' +================== + +Synopsis: + + \hyphenation{WORD1 ...} + + Declares allowed hyphenation points within the words in the list. +The words in that list are separated by spaces. Show permitted points +for hyphenation with a dash character, '-'. + + Here is an example: + + \hyphenation{hat-er il-lit-e-ra-ti tru-th-i-ness} + + Use lowercase letters. TeX will only hyphenate if the word matches +exactly, no inflections are tried. Multiple '\hyphenation' commands +accumulate. + + +File: latex2e.info, Node: \linebreak & \nolinebreak, Prev: \hyphenation, Up: Line breaking + +9.8 '\linebreak' & '\nolinebreak' +================================= + +Synopses, one of: + + \linebreak + \linebreak[ZERO-TO-FOUR] + +or one of these. + + \nolinebreak + \nolinebreak[ZERO-TO-FOUR] + + Encourage or discourage a line break. The optional ZERO-TO-FOUR is +an integer lying between 0 and 4 that allows you to soften the +instruction. The default is 4, so that without the optional argument +these commands entirely force or prevent the break. But for instance, +'\nolinebreak[1]' is a suggestion that another place may be better. The +higher the number, the more insistent the request. Both commands are +fragile (*note \protect::). + + Here we tell LaTeX that a good place to put a linebreak is after the +standard legal text. + + \boilerplatelegal{} \linebreak[2] + We especially encourage applications from members of traditionally + underrepresented groups. + + When you issue '\linebreak', the spaces in the line are stretched out +so that the break point reaches the right margin. *Note \\:: and *note +\newline::, to have the spaces not stretched out. + + +File: latex2e.info, Node: Page breaking, Next: Footnotes, Prev: Line breaking, Up: Top + +10 Page breaking +**************** + +Ordinarily LaTeX automatically takes care of breaking output into pages +with its usual aplomb. But if you are writing commands, or tweaking the +final version of a document, then you may need to understand how to +influence its actions. + + LaTeX's algorithm for splitting a document into pages is more complex +than just waiting until there is enough material to fill a page and +outputting the result. Instead, LaTeX typesets more material than would +fit on the page and then chooses a break that is optimal in some way (it +has the smallest badness). An example of the advantage of this approach +is that if the page has some vertical space that can be stretched or +shrunk, such as with rubber lengths between paragraphs, then LaTeX can +use that to avoid widow lines (where a new page starts with the last +line of a paragraph; LaTeX can squeeze the extra line onto the first +page) and orphans (where the first line of paragraph is at the end of a +page; LaTeX can stretch the material of the first page so the extra line +falls on the second page). Another example is where LaTeX uses +available vertical shrinkage to fit on a page not just the header for a +new section but also the first two lines of that section. + + But LaTeX does not optimize over the entire document's set of page +breaks. So it can happen that the first page break is great but the +second one is lousy; to break the current page LaTeX doesn't look as far +ahead as the next page break. So occasionally you may want to influence +page breaks while preparing a final version of a document. + + *Note Layout::, for more material that is relevant to page breaking. + +* Menu: + +* \clearpage & \cleardoublepage:: Start a new page; eject floats. +* \newpage:: Start a new page. +* \enlargethispage:: Enlarge the current page a bit. +* \pagebreak & \nopagebreak:: Forcing & avoiding page breaks. + + +File: latex2e.info, Node: \clearpage & \cleardoublepage, Next: \newpage, Up: Page breaking + +10.1 '\clearpage' & '\cleardoublepage' +====================================== + +Synopsis: + + \clearpage + +or + + \cleardoublepage + + End the current page and output all of the pending floating figures +and tables (*note Floats::). If there are too many floats to fit on the +page then LaTeX will put in extra pages containing only floats. In +two-sided printing, '\cleardoublepage' also makes the next page of +content a right-hand page, an odd-numbered page, if necessary inserting +a blank page. The '\clearpage' command is robust while +'\cleardoublepage' is fragile (*note \protect::). + + LaTeX's page breaks are optimized so ordinarily you only use this +command in a document body to polish the final version, or inside +commands. + + The '\cleardoublepage' command will put in a blank page, but it will +have the running headers and footers. To get a really blank page, use +this command. + + \let\origdoublepage\cleardoublepage + \newcommand{\clearemptydoublepage}{% + \clearpage + {\pagestyle{empty}\origdoublepage}% + } + +If you want LaTeX's standard '\chapter' command to do this then add the +line '\let\cleardoublepage\clearemptydoublepage'. + + The command '\newpage' (*note \newpage::) also ends the current page, +but without clearing pending floats. And, if LaTeX is in two-column +mode then '\newpage' ends the current column while '\clearpage' and +'\cleardoublepage' end the current page. + + +File: latex2e.info, Node: \newpage, Next: \enlargethispage, Prev: \clearpage & \cleardoublepage, Up: Page breaking + +10.2 '\newpage' +=============== + +Synopsis: + + \newpage + + End the current page. This command is robust (*note \protect::). + + LaTeX's page breaks are optimized so ordinarily you only use this +command in a document body to polish the final version, or inside +commands. + + While the commands '\clearpage' and '\cleardoublepage' also end the +current page, in addition they clear pending floats (*note \clearpage & +\cleardoublepage::). And, if LaTeX is in two-column mode then +'\clearpage' and '\cleardoublepage' end the current page, possibly +leaving an empty column, while '\newpage' only ends the current column. + + In contrast with '\pagebreak' (*note \pagebreak & \nopagebreak::), +the '\newpage' command will cause the new page to start right where +requested. This + + Four score and seven years ago our fathers brought forth on this + continent, + \newpage + \noindent a new nation, conceived in Liberty, and dedicated to the + proposition that all men are created equal. + +makes a new page start after 'continent', and the cut-off line is not +right justified. In addition, '\newpage' does not vertically stretch +out the page, as '\pagebreak' does. + + +File: latex2e.info, Node: \enlargethispage, Next: \pagebreak & \nopagebreak, Prev: \newpage, Up: Page breaking + +10.3 '\enlargethispage' +======================= + +Synopsis, one of: + + \enlargethispage{size} + \enlargethispage*{size} + + Enlarge the '\textheight' for the current page. The required +argument SIZE must be a rigid length (*note Lengths::). It may be +positive or negative. This command is fragile (*note \protect::). + + A common strategy is to wait until you have the final text of a +document, and then pass through it tweaking line and page breaks. This +command allows you some page size leeway. + + This will allow one extra line on the current page. + + \enlargethispage{\baselineskip} + + The starred form '\enlargesthispage*' tries to squeeze the material +together on the page as much as possible, for the common use case of +getting one more line on the page. This is often used together with an +explicit '\pagebreak'. + + +File: latex2e.info, Node: \pagebreak & \nopagebreak, Prev: \enlargethispage, Up: Page breaking + +10.4 '\pagebreak' & '\nopagebreak' +================================== + +Synopses: + + \pagebreak + \pagebreak[ZERO-TO-FOUR] + +or + + \nopagebreak + \nopagebreak[ZERO-TO-FOUR] + + Encourage or discourage a page break. The optional ZERO-TO-FOUR is +an integer that allows you to soften the request. The default is 4, so +that without the optional argument these commands entirely force or +prevent the break. But for instance '\nopagebreak[1]' suggests to LaTeX +that another spot might be preferable. The higher the number, the more +insistent the request. Both commands are fragile (*note \protect::). + + LaTeX's page endings are optimized so ordinarily you only use this +command in a document body to polish the final version, or inside +commands. + + If you use these inside a paragraph, they apply to the point +following the line in which they appear. So this + + Four score and seven years ago our fathers brought forth on this + continent, + \pagebreak + a new nation, conceived in Liberty, and dedicated to the proposition + that all men are created equal. + +does not give a page break at 'continent', but instead at 'nation', +since that is where LaTeX breaks that line. In addition, with +'\pagebreak' the vertical space on the page is stretched out where +possible so that it extends to the normal bottom margin. This can look +strange, and if '\flushbottom' is in effect this can cause you to get +'Underfull \vbox (badness 10000) has occurred while \output is active'. +*Note \newpage::, for a command that does not have these effects. + + (There is an obsolete declaration '\samepage', which tries to only +allow a break between two paragraphs. There is a related environment +'samepage', also obsolete. Neither of these work reliably. For more on +keeping material on the same page, see the FAQ entry +.) + + +File: latex2e.info, Node: Footnotes, Next: Definitions, Prev: Page breaking, Up: Top + +11 Footnotes +************ + +Place a footnote at the bottom of the current page, as here. + + Noël Coward quipped that having to read a footnote is like having + to go downstairs to answer the door, while in the midst of making + love.\footnote{% + I wouldn't know, I don't read footnotes.} + + You can put multiple footnotes on a page. If the footnote text +becomes too long then it will flow to the next page. + + You can also produce footnotes by combining the '\footnotemark' and +the '\footnotetext' commands, which is useful in special circumstances. + + To make bibliographic references come out as footnotes you need to +include a bibliographic style with that behavior (*note Using BibTeX::). + +* Menu: + +* \footnote:: Insert a footnote. +* \footnotemark:: Insert footnote mark only. +* \footnotetext:: Insert footnote text only. +* Footnotes in section headings:: Chapter or section titles. +* Footnotes in a table:: Table footnotes. +* Footnotes of footnotes:: Multiple classes of footnotes. + + +File: latex2e.info, Node: \footnote, Next: \footnotemark, Up: Footnotes + +11.1 '\footnote' +================ + +Synopsis, one of: + + \footnote{TEXT} + \footnote[NUMBER]{TEXT} + + Place a footnote TEXT at the bottom of the current page. + + There are over a thousand footnotes in Gibbon's + \textit{Decline and Fall of the Roman Empire}.\footnote{% + After reading an early version with endnotes David Hume complained, + ``One is also plagued with his Notes, according to the present Method + of printing the Book'' and suggested that they ``only to be printed + at the Margin or the Bottom of the Page.''} + + The optional argument NUMBER allows you to specify the number of the +footnote. If you use this then LaTeX does not increment the 'footnote' +counter. + + By default, LaTeX uses arabic numbers as footnote markers. Change +this with something like +'\renewcommand{\thefootnote}{\fnsymbol{footnote}}', which uses a +sequence of symbols (*note \alph \Alph \arabic \roman \Roman +\fnsymbol::). To make this change global put that in the preamble. If +you make the change local then you may want to reset the counter with +'\setcounter{footnote}{0}'. + + LaTeX determines the spacing of footnotes with two parameters. + +'\footnoterule' + Produces the rule separating the main text on a page from the + page's footnotes. Default dimensions in the standard document + classes (except 'slides', where it does not appear) is: vertical + thickness of '0.4pt', and horizontal size of '0.4\columnwidth' + long. Change the rule with something like this. + + \renewcommand{\footnoterule}{% Kerns avoid vertical space + \kern -3pt % This -3 is negative + \hrule width \textwidth height 1pt % of the sum of this 1 + \kern 2pt} % and this 2 + +'\footnotesep' + The height of the strut placed at the beginning of the footnote + (*note \strut::). By default, this is set to the normal strut for + '\footnotesize' fonts (*note Font sizes::), therefore there is no + extra space between footnotes. This is '6.65pt' for '10pt', + '7.7pt' for '11pt', and '8.4pt' for '12pt'. Change it as with + '\setlength{\footnotesep}{11pt}'. + + The '\footnote' command is fragile (*note \protect::). + + LaTeX's default puts many restrictions on where you can use a +'\footnote'; for instance, you cannot use it in an argument to a +sectioning command such as '\chapter' (it can only be used in outer +paragraph mode; *note Modes::). There are some workarounds; see +following sections. + + In a 'minipage' environment the '\footnote' command uses the +'mpfootnote' counter instead of the 'footnote' counter, so they are +numbered independently. They are shown at the bottom of the +environment, not at the bottom of the page. And by default they are +shown alphabetically. *Note minipage:: and *note Footnotes in a +table::. + + +File: latex2e.info, Node: \footnotemark, Next: \footnotetext, Prev: \footnote, Up: Footnotes + +11.2 '\footnotemark' +==================== + +Synopsis, one of: + + \footnotemark + \footnotemark[NUMBER] + + Put the current footnote mark in the text. To specify associated +text for the footnote see *note \footnotetext::. The optional argument +NUMBER causes the command to use that number to determine the footnote +mark. This command can be used in inner paragraph mode (*note Modes::). + + If you use '\footnotemark' without the optional argument then it +increments the 'footnote' counter, but if you use the optional NUMBER +then it does not. The next example produces several consecutive +footnote markers referring to the same footnote. + + The first theorem\footnote{Due to Gauss.} + and the second theorem\footnotemark[\value{footnote}] + and the third theorem.\footnotemark[\value{footnote}] + + If there are intervening footnotes then you must remember the value +of the number of the common mark. This example gives the same +institutional affiliation to both the first and third authors ('\thanks' +is a version of '\footnote'), by-hand giving the number of the footnote. + + \title{A Treatise on the Binomial Theorem} + \author{J Moriarty\thanks{University of Leeds} + \and A C Doyle\thanks{Durham University} + \and S Holmes\footnotemark[1]} + \begin{document} + \maketitle + + This uses a counter to remember the footnote number. The third +sentence is followed by the same footnote marker as the first. + + \newcounter{footnoteValueSaver} + All babies are illogical.\footnote{% + Lewis Carroll.}\setcounter{footnoteValueSaver}{\value{footnote}} + Nobody is despised who can manage a crocodile.\footnote{% + Captain Hook.} + Illogical persons are despised.\footnotemark[\value{footnoteValueSaver}] + Therefore, anyone who can manage a crocodile is not a baby. + + This example accomplishes the same by using the package 'cleveref'. + + \usepackage{cleveref}[2012/02/15] % in preamble + \crefformat{footnote}{#2\footnotemark[#1]#3} + ... + The theorem is from Evers.\footnote{\label{fn:TE}Tinker, Evers, 1994.} + The corollary is from Chance.\footnote{Evers, Chance, 1990.} + But the key lemma is from Tinker.\cref{fn:TE} + + It will work with the package 'hyperref'. + + +File: latex2e.info, Node: \footnotetext, Next: Footnotes in section headings, Prev: \footnotemark, Up: Footnotes + +11.3 '\footnotetext' +==================== + +Synopsis, one of: + + \footnotetext{TEXT} + \footnotetext[NUMBER]{TEXT} + + Place TEXT at the bottom of the page as a footnote. It pairs with +'\footnotemark' (*note \footnotemark::) and can come anywhere after that +command, but must appear in outer paragraph mode (*note Modes::). The +optional argument NUMBER changes the number of the footnote mark. + + *Note \footnotemark:: and *note Footnotes in a table::, for usage +examples. + + +File: latex2e.info, Node: Footnotes in section headings, Next: Footnotes in a table, Prev: \footnotetext, Up: Footnotes + +11.4 Footnotes in section headings +================================== + +Putting a footnote in a section heading, as in: + + \section{Full sets\protect\footnote{This material due to ...}} + +causes the footnote to appear at the bottom of the page where the +section starts, as usual, but also at the bottom of the table of +contents, where it is not likely to be desired. The simplest way to +have it not appear on the table of contents is to use the optional +argument to '\section' + + \section[Please]{Please\footnote{% + Don't footnote in chapter and section headers!}} + +No '\protect' is needed in front of '\footnote' here because what gets +moved to the table of contents is the optional argument. + + +File: latex2e.info, Node: Footnotes in a table, Next: Footnotes of footnotes, Prev: Footnotes in section headings, Up: Footnotes + +11.5 Footnotes in a table +========================= + +Inside a 'tabular' or 'array' environment the '\footnote' command does +not work; there is a footnote mark in the table cell but the footnote +text does not appear. The solution is to use a 'minipage' environment +as here (*note minipage::). + + \begin{center} + \begin{minipage}{\textwidth} \centering + \begin{tabular}{l|l} + \textsc{Ship} &\textsc{Book} \\ \hline + \textit{HMS Sophie} &Master and Commander \\ + \textit{HMS Polychrest} &Post Captain \\ + \textit{HMS Lively} &Post Captain \\ + \textit{HMS Surprise} &A number of books\footnote{% + Starting with \textit{HMS Surprise}.} + \end{tabular} + \end{minipage} + \end{center} + + Inside a 'minipage', footnote marks are lowercase letters. Change +that with something like +'\renewcommand{\thempfootnote}{\arabic{mpfootnote}}' (*note \alph \Alph +\arabic \roman \Roman \fnsymbol::). + + The footnotes in the prior example appear at the bottom of the +'minipage'. To have them appear at the bottom of the main page, as part +of the regular footnote sequence, use the '\footnotemark' and +'\footnotetext' pair and make a new counter. + + \newcounter{mpFootnoteValueSaver} + \begin{center} + \begin{minipage}{\textwidth} + \setcounter{mpFootnoteValueSaver}{\value{footnote}} \centering + \begin{tabular}{l|l} + \textsc{Woman} &\textsc{Relationship} \\ \hline + Mona &Attached\footnotemark \\ + Diana Villiers &Eventual wife \\ + Christine Hatherleigh Wood &Fiance\footnotemark + \end{tabular} + \end{minipage}% percent sign keeps footnote text close to minipage + \stepcounter{mpFootnoteValueSaver}% + \footnotetext[\value{mpFootnoteValueSaver}]{% + Little is known other than her death.}% + \stepcounter{mpFootnoteValueSaver}% + \footnotetext[\value{mpFootnoteValueSaver}]{% + Relationship is unresolved in XXI.} + \end{center} + + For a floating 'table' environment (*note table::), use the +'tablefootnote' package. + + \usepackage{tablefootnote} % in preamble + ... + \begin{table} + \centering + \begin{tabular}{l|l} + \textsc{Date} &\textsc{Campaign} \\ \hline + 1862 &Fort Donelson \\ + 1863 &Vicksburg \\ + 1865 &Army of Northern Virginia\tablefootnote{% + Ending the war.} + \end{tabular} + \caption{Forces captured by US Grant} + \end{table} + +The footnote appears at the page bottom and is numbered in sequence with +other footnotes. + + +File: latex2e.info, Node: Footnotes of footnotes, Prev: Footnotes in a table, Up: Footnotes + +11.6 Footnotes of footnotes +=========================== + +Particularly in the humanities, authors can have multiple classes of +footnotes, including having footnotes of footnotes. The package +'bigfoot' extends LaTeX's default footnote mechanism in many ways, +including allow these two, as in this example. + + \usepackage{bigfoot} % in preamble + \DeclareNewFootnote{Default} + \DeclareNewFootnote{from}[alph] % create class \footnotefrom{} + ... + The third theorem is a partial converse of the + second.\footnotefrom{% + Noted in Wilson.\footnote{Second edition only.}} + + +File: latex2e.info, Node: Definitions, Next: Counters, Prev: Footnotes, Up: Top + +12 Definitions +************** + +LaTeX has support for making new commands of many different kinds. + +* Menu: + +* \newcommand & \renewcommand:: (Re)define a new command. +* \providecommand:: Define a new command, if name not used. +* \makeatletter & \makeatother:: Change the status of the at-sign character. +* \@ifstar:: Define your own commands with *-variants. +* \newcounter:: Define a new counter. +* \newlength:: Define a new length. +* \newsavebox:: Define a new box. +* \newenvironment & \renewenvironment:: Define a new environment. +* \newtheorem:: Define a new theorem-like environment. +* \newfont:: Define a new font name. +* \protect:: Using tricky commands. +* \ignorespaces & \ignorespacesafterend:: Discard extra spaces. +* xspace package:: Space after a macro, conditionally. + + +File: latex2e.info, Node: \newcommand & \renewcommand, Next: \providecommand, Up: Definitions + +12.1 '\newcommand' & '\renewcommand' +==================================== + +Synopses, one of: + + \newcommand{\CMD}{DEFN} + \newcommand{\CMD}[NARGS]{DEFN} + \newcommand{\CMD}[NARGS][OPTARGDEFAULT]{DEFN} + \newcommand*{\CMD}{DEFN} + \newcommand*{\CMD}[NARGS]{DEFN} + \newcommand*{\CMD}[NARGS][OPTARGDEFAULT]{DEFN} + +or one of these. + + \renewcommand{\CMD}[NARGS]{DEFN} + \renewcommand{\CMD}[NARGS]{DEFN} + \renewcommand{\CMD}[NARGS][OPTARGDEFAULT]{DEFN} + \renewcommand*{\CMD}{DEFN} + \renewcommand*{\CMD}[NARGS]{DEFN} + \renewcommand*{\CMD}[NARGS][OPTARGDEFAULT]{DEFN} + + Define or redefine a command (see also the discussion of +'\DeclareRobustCommand' in *note Class and package commands::). + + The starred form of these two forbids the arguments from containing +multiple paragraphs of text (in plain TeX terms, the commands are not +'\long'). With the default form, arguments can be multiple paragraphs. + + These are the parameters: + +CMD + + Required; '\CMD' is the command name. It must begin with a + backslash, '\', and must not begin with the four character string + '\end'. For '\newcommand', it must not be already defined. For + '\renewcommand', this name must already be defined. + +NARGS + Optional; an integer from 0 to 9, specifying the number of + arguments that the command takes, including any optional argument. + Omitting this argument is the same as specifying 0, meaning that + the command has no arguments. If you redefine a command, the new + version can have a different number of arguments than the old + version. + +OPTARGDEFAULT + Optional; if this argument is present then the first argument of + '\CMD' is optional, with default value OPTARGDEFAULT (which may be + the empty string). If OPTARGSDEFAULT is not present then '\CMD' + does not take an optional argument. + + That is, if '\CMD' is used with square brackets, as in + '\CMD[OPTVAL]{...}...', then within DEFN the parameter '#1' is set + to the value of OPTVAL. On the other hand, if '\CMD' is called + without the square brackets then within DEFN the parameter '#1' is + set to the value of OPTARGDEFAULT. In either case, the required + arguments start with '#2'. + + Omitting '[OPTARGDEFAULT]' is different from having the square + brackets with no contents, as in '[]'. The former sets '#1' to the + value of OPTARGDEFAULT; the latter sets '#1' to the empty string. + +DEFN + Required; the text to be substituted for every occurrence of + '\CMD'. The parameters '#1', '#2', ... '#NARGS' are replaced by + the values that you supply when you call the command (or by the + default value if there is an optional argument and you don't + exercise the option). + + TeX ignores blanks in the source following a control word (*note +Control sequences::), as in '\cmd '. If you actually want a space +there, one solution is to type '{}' after the command ('\cmd{} ', and +another solution is to use an explicit control space ('\cmd\ '). + + A simple example of defining a new command: '\newcommand{\RS}{Robin +Smith}' results in '\RS' being replaced by the longer text. Redefining +an existing command is similar: '\renewcommand{\qedsymbol}{{\small +QED}}'. + + If you try to define a command and the name has already been used +then you get something like 'LaTeX Error: Command \fred already defined. +Or name \end... illegal, see p.192 of the manual'. If you try to +redefine a command and the name has not yet been used then you get +something like 'LaTeX Error: \hank undefined'. + + Here the first definition creates a command with no arguments, and +the second, one with one required argument. + + \newcommand{\student}{Ms~O'Leary} + \newcommand{\defref}[1]{Definition~\ref{#1}} + +Use the first as in 'I highly recommend \student{} to you'. The second +has a variable argument, so that '\defref{def:basis}' expands to +'Definition~\ref{def:basis}', which ultimately expands to something like +'Definition~3.14'. + + Similarly, but with two required arguments: +'\newcommand{\nbym}[2]{$#1 \times #2$}' is invoked as '\nbym{2}{k}'. + + This example has an optional argument. + + \newcommand{\salutation}[1][Sir or Madam]{Dear #1:} + +Then '\salutation' gives 'Dear Sir or Madam:' while '\salutation[John]' +gives 'Dear John:'. And '\salutation[]' gives 'Dear :'. + + This example has an optional argument and two required arguments. + + \newcommand{\lawyers}[3][company]{#2, #3, and~#1} + I employ \lawyers[Howe]{Dewey}{Cheatem}. + +The output is 'I employ Dewey, Cheatem, and Howe'. The optional +argument, the 'Howe', is associated with '#1', while 'Dewey' and +'Cheatem' are associated with '#2' and '#3'. Because of the optional +argument, '\lawyers{Dewey}{Cheatem}' will give the output 'I employ +Dewey, Cheatem, and company'. + + The braces around DEFN do not define a group, that is, they do not +delimit the scope of the result of expanding DEFN. For example, with +'\newcommand{\shipname}[1]{\it #1}', in this sentence, + + The \shipname{Monitor} met the \shipname{Merrimac}. + +the words 'met the' would incorrectly be in italics. The solution is to +put another pair of braces inside the definition: +'\newcommand{\shipname}[1]{{\it #1}}'. + +* Menu: + +* Control sequences:: Control sequence, control word and control symbol. + + +File: latex2e.info, Node: Control sequences, Up: \newcommand & \renewcommand + +12.1.1 Control sequence, control word and control symbol +-------------------------------------------------------- + +When reading input TeX converts the sequences of read characters into a +sequence of "tokens". When TeX sees a backslash '\', it will handle the +following characters in a special way in order to make a "control +sequence" token. + + The control sequences fall into two categories: + + * "control word", when the control sequence is gathered from a '\' + followed by at least one ASCII letter, followed by at least one + blank. The sequence of at least one ASCII letter is called the + "control sequence name". + * "control symbol", when the control sequence is gathered from a '\' + followed by one non-letter character. + + Blanks after a control word are ignored and do not produce any +whitespace in the output (*note \newcommand & \renewcommand:: and *note +\(SPACE)::). + + Just as the '\relax' command does nothing, the following will print +'Hello!'--if you use the Emacs info viewer, turn on the whitespace-mode +minor mode to see the trailing spaces: + + Hel\relax + lo! + +This is because blanks after '\relax', including the newline are +ignored, and blanks at the beginning of a line are also ignored (*note +Leading blanks::). + + +File: latex2e.info, Node: \providecommand, Next: \makeatletter & \makeatother, Prev: \newcommand & \renewcommand, Up: Definitions + +12.2 '\providecommand' +====================== + +Synopses, one of: + + \providecommand{\CMD}{DEFN} + \providecommand{\CMD}[NARGS]{DEFN} + \providecommand{\CMD}[NARGS][OPTARGDEFAULT]{DEFN} + \providecommand*{\CMD}{DEFN} + \providecommand*{\CMD}[NARGS]{DEFN} + \providecommand*{\CMD}[NARGS][OPTARGDEFAULT]{DEFN} + + Defines a command, as long as no command of this name already exists. +If no command of this name already exists then this has the same effect +as '\newcommand'. If a command of this name already exists then this +definition does nothing. This is particularly useful in a file that may +be loaded more than once, such as a style file. *Note \newcommand & +\renewcommand::, for the description of the arguments. + + This example + + \providecommand{\myaffiliation}{Saint Michael's College} + \providecommand{\myaffiliation}{Lyc\'ee Henri IV} + From \myaffiliation. + +outputs 'From Saint Michael's College'. Unlike '\newcommand', the +repeated use of '\providecommand' does not give an error. + + +File: latex2e.info, Node: \makeatletter & \makeatother, Next: \@ifstar, Prev: \providecommand, Up: Definitions + +12.3 '\makeatletter' & '\makeatother' +===================================== + +Synopsis: + + \makeatletter + ... definition of commands with @ in their name .. + \makeatother + + Use this pair when you redefine LaTeX commands that are named with an +at-sign character ''@''. The '\makeatletter' declaration makes the +at-sign character have the category code of a letter, code 11. The +'\makeatother' declaration sets the category code of the at-sign to +code 12, its default value. + + As TeX reads characters, it assigns each one a category code, or +"catcode". For instance, it assigns the backslash character ''\'' the +catcode 0. Command names consist of a category 0 character, ordinarily +backslash, followed by letters, category 11 characters (except that a +command name can also consist of a category 0 character followed by a +single non-letter symbol). + + LaTeX's source code has the convention that some commands use '@' in +their name. These commands are mainly intended for package or class +writers. The convention prevents authors who are just using a package +or class from accidentally replacing such a command with one of their +own, because by default the at-sign has catcode 12. + + Use the pair '\makeatletter' and '\makeatother' inside a '.tex' file, +typically in the preamble, when you are defining or redefining commands +named with '@', by having them surround your definition. Don't use +these inside '.sty' or '.cls' files since the '\usepackage' and +'\documentclass' commands already arrange that the at-sign has the +character code of a letter, catcode 11. + + For a comprehensive list of macros with an at-sign in their names see +. + + In this example the class file has a command '\thesis@universityname' +that the user wants to change. These three lines should go in the +preamble, before the '\begin{document}'. + + \makeatletter + \renewcommand{\thesis@universityname}{Saint Michael's College} + \makeatother + + +File: latex2e.info, Node: \@ifstar, Next: \newcounter, Prev: \makeatletter & \makeatother, Up: Definitions + +12.4 '\@ifstar' +=============== + +Synopsis: + + \newcommand{\mycmd}{\@ifstar{\mycmd@star}{\mycmd@nostar}} + \newcommand{\mycmd@nostar}[NOSTAR-NUM-ARGS]{NOSTAR-BODY} + \newcommand{\mycmd@star}[STAR-NUM-ARGS]{STAR-BODY} + + Many standard LaTeX environments or commands have a variant with the +same name but ending with a star character '*', an asterisk. Examples +are the 'table' and 'table*' environments and the '\section' and +'\section*' commands. + + When defining environments, following this pattern is straightforward +because '\newenvironment' and '\renewenvironment' allow the environment +name to contain a star. So you just have to write +'\newenvironment{MYENV}' or '\newenvironment{MYENV*}' and continue the +definition as usual. For commands the situation is more complex as the +star not being a letter cannot be part of the command name. As in the +synopsis above, there will be a user-called command, given above as +'\mycmd', which peeks ahead to see if it is followed by a star. For +instance, LaTeX does not really have a '\section*' command; instead, the +'\section' command peeks ahead. This command does not accept arguments +but instead expands to one of two commands that do accept arguments. In +the synopsis these two are '\mycmd@nostar' and '\mycmd@star'. They +could take the same number of arguments or a different number, or no +arguments at all. As always, in a LaTeX document a command using an +at-sign '@' in its name must be enclosed inside a '\makeatletter ... +\makeatother' block (*note \makeatletter & \makeatother::). + + This example of '\@ifstar' defines the command '\ciel' and a variant +'\ciel*'. Both have one required argument. A call to '\ciel{blue}' +will return "not starry blue sky" while '\ciel*{night}' will return +"starry night sky". + + \makeatletter + \newcommand*{\ciel@unstarred}[1]{not starry #1 sky} + \newcommand*{\ciel@starred}[1]{starry #1 sky} + \newcommand*{\ciel}{\@ifstar{\ciel@starred}{\ciel@unstarred}} + \makeatother + + In the next example, the starred variant takes a different number of +arguments than the unstarred one. With this definition, Agent 007's +'``My name is \agentsecret*{Bond}, \agentsecret{James}{Bond}.''' is +equivalent to entering the commands '``My name is \textsc{Bond}, +\textit{James} textsc{Bond}.''' + + \newcommand*{\agentsecret@unstarred}[2]{\textit{#1} \textsc{#2}} + \newcommand*{\agentsecret@starred}[1]{\textsc{#1}} + \newcommand*{\agentsecret}{% + \@ifstar{\agentsecret@starred}{\agentsecret@unstarred}} + + After a command name, a star is handled similarly to an optional +argument. (This differs from environment names in which the star is +part of the name itself and as such could be in any position.) Thus, it +is technically possible to put any number of spaces between the command +and the star. Thus '\agentsecret*{Bond}' and '\agentsecret *{Bond}' are +equivalent. However, the standard practice is not to insert any such +spaces. + + There are two alternative ways to accomplish the work of '\@ifstar'. +(1) The 'suffix' package allows the construct +'\newcommand\mycommand{UNSTARRED-VARIANT}' followed by +'\WithSuffix\newcommand\mycommand*{STARRED-VARIANT}'. (2) LaTeX +provides the 'xparse' package, which allows this code: + + \NewDocumentCommand\foo{s}{\IfBooleanTF#1 + {STARRED-VARIANT}% + {UNSTARRED-VARIANT}% + } + + +File: latex2e.info, Node: \newcounter, Next: \newlength, Prev: \@ifstar, Up: Definitions + +12.5 '\newcounter': Allocating a counter +======================================== + +Synopsis, one of: + + \newcounter{COUNTERNAME} + \newcounter{COUNTERNAME}[SUPERCOUNTER] + + Globally defines a new counter named COUNTERNAME and initialize it to +zero (*note Counters::). + + The name COUNTERNAME must consist of letters only. It does not begin +with a backslash. This name must not already be in use by another +counter. + + When you use the optional argument '[SUPERCOUNTER]' then the counter +COUNTERNAME will be reset to zero whenever SUPERCOUNTER is incremented. +For example, ordinarily 'subsection' is numbered within 'section' so +that any time you increment SECTION, either with '\stepcounter' (*note +\stepcounter::) or '\refstepcounter' (*note \refstepcounter::), then +LaTeX will reset SUBSECTION to zero. + + This example + + \newcounter{asuper} \setcounter{asuper}{1} + \newcounter{asub}[asuper] \setcounter{asub}{3} % Note `asuper' + The value of asuper is \arabic{asuper} and of asub is \arabic{asub}. + \stepcounter{asuper} + Now asuper is \arabic{asuper} while asub is \arabic{asub}. + + produces 'The value of asuper is 1 and that of asub is 3' and 'Now +asuper is 2 while asub is 0'. + + If the counter already exists, for instance by entering 'asuper' +twice, then you get something like 'LaTeX Error: Command \c@asuper +already defined. Or name \end... illegal, see p.192 of the manual.'. + + If you use the optional argument then the super counter must already +exist. Entering '\newcounter{jh}[lh]' when 'lh' is not a defined +counter will get you 'LaTeX Error: No counter 'lh' defined.' + + +File: latex2e.info, Node: \newlength, Next: \newsavebox, Prev: \newcounter, Up: Definitions + +12.6 '\newlength' +================= + +Synopsis: + + \newlength{\LEN} + + Allocate a new length register (*note Lengths::). The required +argument '\LEN' has to be a control sequence (*note Control +sequences::), and as such must begin with a backslash, '\' under normal +circumstances. The new register holds rubber lengths such as '72.27pt' +or '1in plus.2in minus.1in' (a LaTeX length register is what plain TeX +calls a 'skip' register). The initial value is zero. The control +sequence '\LEN' must not be already defined. + + An example: + + \newlength{\graphichgt} + + If you forget the backslash then you get 'Missing control sequence +inserted'. If the control sequence already exists then you get +something like 'LaTeX Error: Command \graphichgt already defined. Or +name \end... illegal, see p.192 of the manual'. + + +File: latex2e.info, Node: \newsavebox, Next: \newenvironment & \renewenvironment, Prev: \newlength, Up: Definitions + +12.7 '\newsavebox' +================== + +Synopsis: + + \newsavebox{\CMD} + + Define \CMD, the string consisting of a backslash followed by CMD, to +refer to a new bin for storing material. These bins hold material that +has been typeset, to use multiple times or to measure or manipulate +(*note Boxes::). The bin name \CMD is required, must start with a +backslash, \, and must not already be a defined command. This command +is fragile (*note \protect::). + + This allocates a bin and then puts typeset material into it. + + \newsavebox{\logobox} + \savebox{\logobox}{LoGo} + Our logo is \usebox{\logobox}. + +The output is 'Our logo is LoGo'. + + If there is an already defined bin then you get something like 'LaTeX +Error: Command \logobox already defined. Or name \end... illegal, see +p.192 of the manual'. + + The allocation of a box is global. + + +File: latex2e.info, Node: \newenvironment & \renewenvironment, Next: \newtheorem, Prev: \newsavebox, Up: Definitions + +12.8 '\newenvironment' & '\renewenvironment' +============================================ + +Synopses, one of: + + \newenvironment{ENV}{BEGDEF}{ENDDEF} + \newenvironment{ENV}[NARGS]{BEGDEF}{ENDDEF} + \newenvironment{ENV}[NARGS][OPTARGDEFAULT]{BEGDEF}{ENDDEF} + \newenvironment*{ENV}{BEGDEF}{ENDDEF} + \newenvironment*{ENV}[NARGS]{BEGDEF}{ENDDEF} + \newenvironment*{ENV}[NARGS][OPTARGDEFAULT]{BEGDEF}{ENDDEF} + +or one of these. + + \renewenvironment{ENV}{BEGDEF}{ENDDEF} + \renewenvironment{ENV}[NARGS]{BEGDEF}{ENDDEF} + \renewenvironment{ENV}[NARGS][OPTARGDEFAULT]{BEGDEF}{ENDDEF} + \renewenvironment*{ENV}{BEGDEF}{ENDDEF} + \renewenvironment*{ENV}[NARGS]{BEGDEF}{ENDDEF} + \renewenvironment*{ENV}[NARGS][OPTARGDEFAULT]{BEGDEF}{ENDDEF} + + Define or redefine the environment ENV, that is, create the construct +'\begin{ENV} ... BODY ... \end{ENV}'. + + The starred form of these commands requires that the arguments not +contain multiple paragraphs of text. However, the body of these +environments can contain multiple paragraphs. + +ENV + Required; the environment name. It consists only of letters or the + '*' character, and thus does not begin with backslash, '\'. It + must not begin with the string 'end'. For '\newenvironment', the + name ENV must not be the name of an already existing environment, + and also the command '\ENV' must be undefined. For + '\renewenvironment', ENV must be the name of an existing + environment. + +NARGS + Optional; an integer from 0 to 9 denoting the number of arguments + of that the environment takes. When you use the environment these + arguments appear after the '\begin', as in '\begin{ENV}{ARG1} ... + {ARGN}'. Omitting this is equivalent to setting it to 0; the + environment will have no arguments. When redefining an + environment, the new version can have a different number of + arguments than the old version. + +OPTARGDEFAULT + Optional; if this is present then the first argument of the defined + environment is optional, with default value OPTARGDEFAULT (which + may be the empty string). If this is not in the definition then + the environment does not take an optional argument. + + That is, when OPTARGDEFAULT is present in the definition of the + environment then you can start the environment with square + brackets, as in '\begin{ENV}[OPTVAL]{...} ... \end{ENV}'. In this + case, within BEGDEFN the parameter '#1' is set to the value of + OPTVAL. If you call '\begin{ENV}' without square brackets, then + within BEGDEFN the parameter '#1' is set to the value of the + default OPTARGDEFAULT. In either case, any required arguments + start with '#2'. + + Omitting '[MYVAL]' in the call is different than having the square + brackets with no contents, as in '[]'. The former results in '#1' + expanding to OPTARGDEFAULT; the latter results in '#1' expanding to + the empty string. + +BEGDEF + Required; the text expanded at every occurrence of '\begin{ENV}'. + Within BEGDEF, the parameters '#1', '#2', ... '#NARGS', are + replaced by the values that you supply when you call the + environment; see the examples below. + +ENDDEF + Required; the text expanded at every occurrence of '\end{ENV}'. + This may not contain any parameters, that is, you cannot use '#1', + '#2', etc., here (but see the final example below). + + All environments, that is to say the BEGDEF code, the environment +body, and the ENDDEF code, are processed within a group. Thus, in the +first example below, the effect of the '\small' is limited to the quote +and does not extend to material following the environment. + + If you try to define an environment and the name has already been +used then you get something like 'LaTeX Error: Command \fred already +defined. Or name \end... illegal, see p.192 of the manual'. If you try +to redefine an environment and the name has not yet been used then you +get something like 'LaTeX Error: Environment hank undefined.'. + + This example gives an environment like LaTeX's 'quotation' except +that it will be set in smaller type. + + \newenvironment{smallquote}{% + \small\begin{quotation} + }{% + \end{quotation} + } + + This has an argument, which is set in boldface at the start of a +paragraph. + + \newenvironment{point}[1]{% + \noindent\textbf{#1} + }{% + } + + This one shows the use of a optional argument; it gives a quotation +environment that cites the author. + + \newenvironment{citequote}[1][Shakespeare]{% + \begin{quotation} + \noindent\textit{#1}: + }{% + \end{quotation} + } + +The author's name is optional, and defaults to 'Shakespeare'. In the +document, use the environment like this. + + \begin{citequote}[Lincoln] + ... + \end{citequote} + + The final example shows how to save the value of an argument to use +in ENDDEF, in this case in a box (*note \sbox & \savebox::). + + \newsavebox{\quoteauthor} + \newenvironment{citequote}[1][Shakespeare]{% + \sbox\quoteauthor{#1}% + \begin{quotation} + }{% + \hspace{1em plus 1fill}---\usebox{\quoteauthor} + \end{quotation} + } + + +File: latex2e.info, Node: \newtheorem, Next: \newfont, Prev: \newenvironment & \renewenvironment, Up: Definitions + +12.9 '\newtheorem' +================== + +Synopses: + + \newtheorem{NAME}{TITLE} + \newtheorem{NAME}{TITLE}[NUMBERED_WITHIN] + \newtheorem{NAME}[NUMBERED_LIKE]{TITLE} + + Define a new theorem-like environment. You can specify one of +NUMBERED_WITHIN and NUMBERED_LIKE, or neither, but not both. + + The first form, '\newtheorem{NAME}{TITLE}', creates an environment +that will be labelled with TITLE; see the first example below. + + The second form, '\newtheorem{NAME}{TITLE}[NUMBERED_WITHIN]', creates +an environment whose counter is subordinate to the existing counter +NUMBERED_WITHIN, so this counter will be reset when NUMBERED_WITHIN is +reset. See the second example below. + + The third form '\newtheorem{NAME}[NUMBERED_LIKE]{TITLE}', with +optional argument between the two required arguments, creates an +environment whose counter will share the previously defined counter +NUMBERED_LIKE. See the third example. + + This command creates a counter named NAME. In addition, unless the +optional argument NUMBERED_LIKE is used, inside of the theorem-like +environment the current '\ref' value will be that of +'\theNUMBERED_WITHIN' (*note \ref::). + + This declaration is global. It is fragile (*note \protect::). + + Arguments: + +NAME + The name of the environment. It is a string of letters. It must + not begin with a backslash, '\'. It must not be the name of an + existing environment, and the command name '\NAME' must not already + be defined. + +TITLE + The text to be printed at the beginning of the environment, before + the number. For example, 'Theorem'. + +NUMBERED_WITHIN + Optional; the name of an already defined counter, usually a + sectional unit such as 'chapter' or 'section'. When the + NUMBERED_WITHIN counter is reset then the NAME environment's + counter will also be reset. + + If this optional argument is not used then the command '\theNAME' + is set to '\arabic{NAME}'. + +NUMBERED_LIKE + Optional; the name of an already defined theorem-like environment. + The new environment will be numbered in sequence with + NUMBERED_LIKE. + + Without any optional arguments the environments are numbered +sequentially. The example below has a declaration in the preamble that +results in 'Definition 1' and 'Definition 2' in the output. + + \newtheorem{defn}{Definition} + \begin{document} + \section{...} + \begin{defn} + First def + \end{defn} + + \section{...} + \begin{defn} + Second def + \end{defn} + + This example has the same document body as the prior one. But here +'\newtheorem''s optional argument NUMBERED_WITHIN is given as 'section', +so the output is like 'Definition 1.1' and 'Definition 2.1'. + + \newtheorem{defn}{Definition}[section] + \begin{document} + \section{...} + \begin{defn} + First def + \end{defn} + + \section{...} + \begin{defn} + Second def + \end{defn} + + In the next example there are two declarations in the preamble, the +second of which calls for the new 'thm' environment to use the same +counter as 'defn'. It gives 'Definition 1.1', followed by 'Theorem 2.1' +and 'Definition 2.2'. + + \newtheorem{defn}{Definition}[section] + \newtheorem{thm}[defn]{Theorem} + \begin{document} + \section{...} + \begin{defn} + First def + \end{defn} + + \section{...} + \begin{thm} + First thm + \end{thm} + + \begin{defn} + Second def + \end{defn} + + +File: latex2e.info, Node: \newfont, Next: \protect, Prev: \newtheorem, Up: Definitions + +12.10 '\newfont' +================ + +This command is obsolete. This description is here only to help with +old documents. New documents should define fonts in families through +the New Font Selection Scheme which allows you to, for example, +associate a boldface with a roman (*note Fonts::). + + Synopsis: + + \newfont{\CMD}{FONT DESCRIPTION} + + Define a command '\CMD' that will change the current font. The +control sequence must not already be defined. It must begin with a +backslash, '\'. + + The FONT DESCRIPTION consists of a FONTNAME and an optional "at +clause". LaTeX will look on your system for a file named +'FONTNAME.tfm'. The at clause can have the form either 'at DIMEN' or +'scaled FACTOR', where a FACTOR of '1000' means no scaling. For LaTeX's +purposes, all this does is scale all the character and other font +dimensions relative to the font's design size, which is a value defined +in the '.tfm' file. + + This defines two equivalent fonts and typesets a few characters in +each. + + \newfont{\testfontat}{cmb10 at 11pt} + \newfont{\testfontscaled}{cmb10 scaled 1100} + \testfontat abc + \testfontscaled abc + + +File: latex2e.info, Node: \protect, Next: \ignorespaces & \ignorespacesafterend, Prev: \newfont, Up: Definitions + +12.11 '\protect' +================ + +All LaTeX commands are either "fragile" or "robust". A fragile command +can break when it is used in the argument to certain other commands. +Commands that contain data that LaTeX writes to an auxiliary file and +re-reads later are fragile. This includes material that goes into a +table of contents, list of figures, list of tables, etc. Fragile +commands also include line breaks, any command that has an optional +argument, and many more. To prevent such commands from breaking, one +solution is to preceded them with the command '\protect'. + + For example, when LaTeX runs the '\section{SECTION NAME}' command it +writes the SECTION NAME text to the '.aux' auxiliary file, moving it +there for use elsewhere in the document such as in the table of +contents. Any argument that is internally expanded by LaTeX without +typesetting it directly is referred to as a "moving argument". A +command is fragile if it can expand during this process into invalid TeX +code. Some examples of moving arguments are those that appear in the +'\caption{...}' command (*note figure::), in the '\thanks{...}' command +(*note \maketitle::), and in @-expressions in the 'tabular' and 'array' +environments (*note tabular::). + + If you get strange errors from commands used in moving arguments, try +preceding it with '\protect'. Every fragile commands must be protected +with their own '\protect'. + + Although usually a '\protect' command doesn't hurt, length commands +are robust and should not be preceded by a '\protect' command. Nor can +a '\protect' command be used in the argument to '\addtocounter' or +'\setcounter' command. + + In this example the '\caption' command gives a mysterious error about +an extra curly brace. Fix the problem by preceding each '\raisebox' +command with '\protect'. + + \begin{figure} + ... + \caption{Company headquarters of A\raisebox{1pt}{B}\raisebox{-1pt}{C}} + \end{figure} + + In the next example the '\tableofcontents' command gives an error +because the '\(..\)' in the section title expands to illegal TeX in the +'.toc' file. You can solve this by changing '\(..\)' to +'\protect\(..\protect\)'. + + \begin{document} + \tableofcontents + ... + \section{Einstein's \( e=mc^2 \)} + ... + + +File: latex2e.info, Node: \ignorespaces & \ignorespacesafterend, Next: xspace package, Prev: \protect, Up: Definitions + +12.12 '\ignorespaces & \ignorespacesafterend' +============================================= + +Synopsis: + + \ignorespaces + +or + + \ignorespacesafterend + + Both commands cause LaTeX to ignore blanks (that is, characters of +catcode 10 such as space or tabulation) after the end of the command up +to the first box or non-blank character. The first is a command from +plain TeX, and the second is LaTeX-specific. + + The '\ignorespaces' is often used when defining commands via +'\newcommand', or '\newenvironment', or '\def'. The example below +illustrates. It allows a user to show the points values for quiz +questions in the margin but it is inconvenient because, as shown in the +'enumerate' list, users must not put any space between the command and +the question text. + + \newcommand{\points}[1]{\makebox[0pt]{\makebox[10em][l]{#1~pts}} + \begin{enumerate} + \item\points{10}no extra space output here + \item\points{15} extra space between the number and the `extra' + \end{enumerate} + +The solution is to change to this. + + \newcommand{\points}[1]{% + \makebox[0pt]{\makebox[10em][l]{#1~pts}}\ignorespaces} + + A second example shows blanks being removed from the front of text. +The commands below allow a user to uniformly attach a title to names. +But, as given, if a title accidentally starts with a space then +'\fullname' will reproduce that. + + \newcommand{\honorific}[1]{\def\honorific{#1}} % remember title + \newcommand{\fullname}[1]{\honorific~#1} % put title before name + + \begin{tabular}{|l|} + \honorific{Mr/Ms} \fullname{Jones} \\ % no extra space here + \honorific{ Mr/Ms} \fullname{Jones} % extra space before title + \end{tabular} + +To fix this, change to +'\newcommand{\fullname}[1]{\ignorespaces\honorific~#1}'. + + The '\ignorespaces' is also often used in a '\newenvironment' at the +end of the BEGIN clause, as in '\begin{newenvironment}{ENV NAME}{... +\ignorespaces}{...}'. + + To strip blanks off the end of an environment use +'\ignorespacesafterend'. An example is that this will show a much +larger vertical space between the first and second environments than +between the second and third. + + \newenvironment{eq}{\begin{equation}}{\end{equation}} + \begin{eq} + e=mc^2 + \end{eq} + \begin{equation} + F=ma + \end{equation} + \begin{equation} + E=IR + \end{equation} + + Putting a comment character '%' immediately after the '\end{eq}' will +make the vertical space disappear, but that is inconvenient. The +solution is to change to +'\newenvironment{eq}{\begin{equation}}{\end{equation}\ignorespacesafterend}'. + + +File: latex2e.info, Node: xspace package, Prev: \ignorespaces & \ignorespacesafterend, Up: Definitions + +12.13 xspace package +==================== + +Synopsis: + + \usepackage{xspace} + ... + \newcommand{...}{...\xspace} + + The '\xspace' macro, when used at the end of a command definition, +adds a space unless the command is followed by certain punctuation +characters. + + After a command control sequence that is a control word (*note +Control sequences::, as opposed to control symbols such as '\$'), TeX +gobbles blank characters. Thus, in the first sentence below, the output +has 'Vermont' placed snugly against the period, without any intervening +space. + + \newcommand{\VT}{Vermont} + Our college is in \VT . + The \VT{} summers are nice. + + But because of the gobbling, the second sentence needs the empty +curly braces or else there would be no space separating 'Vermont' from +'summers'. (Many authors instead use a backslash-space '\ ' for this. +*Note \(SPACE)::.) + + The 'xspace' package provides '\xspace'. It is for writing commands +which are designed to be used mainly in text. It must be place at the +very end of the definition of these commands. It inserts a space after +that command unless what immediately follows is in a list of exceptions. +In this example, the empty braces are not needed. + + \newcommand{\VT}{Vermont\xspace} + Our college is in \VT . + \VT summers are nice. + + The default exception list contains the characters ',.'/?;:!~-)', the +open curly brace and the backslash-space command discussed above, and +the commands '\footnote' or '\footnotemark'. Add to that list as with +'\xspaceaddexceptions{\myfni \myfnii}' which adds '\myfni' and '\myfnii' +to the list, and remove from that list as with +'\xspaceremoveexception{!}'. + + A comment: many experts prefer not to use '\xspace'. Putting it in a +definition means that the command will usually get the spacing right. +But it isn't easy to predict when to enter empty braces because +'\xspace' will get it wrong, such as when it is followed by another +command, and so '\xspace' can make editing material harder and more +error-prone than instead always inserting the empty braces. + + +File: latex2e.info, Node: Counters, Next: Lengths, Prev: Definitions, Up: Top + +13 Counters +*********** + +Everything LaTeX numbers for you has a counter associated with it. The +name of the counter is often the same as the name of the environment or +command associated with the number, except that the counter's name has +no backslash '\'. Thus, associated with the '\chapter' command is the +'chapter' counter that keeps track of the chapter number. + + Below is a list of the counters used in LaTeX's standard document +classes to control numbering. + + part paragraph figure enumi + chapter subparagraph table enumii + section page footnote enumiii + subsection equation mpfootnote enumiv + subsubsection + + The 'mpfootnote' counter is used by the '\footnote' command inside of +a minipage (*note minipage::). The counters 'enumi' through 'enumiv' +are used in the 'enumerate' environment, for up to four levels of +nesting (*note enumerate::). + + Counters can have any integer value but they are typically positive. + + New counters are created with '\newcounter'. *Note \newcounter::. + +* Menu: + +* \alph \Alph \arabic \roman \Roman \fnsymbol:: Print value of a counter. +* \usecounter:: Use a specified counter in a list environment. +* \value:: Use the value of a counter in an expression. +* \setcounter:: Set the value of a counter. +* \addtocounter:: Add a quantity to a counter. +* \refstepcounter:: Add to a counter. +* \stepcounter:: Add to a counter, resetting subsidiary counters. +* \day & \month & \year:: Numeric date values. + + +File: latex2e.info, Node: \alph \Alph \arabic \roman \Roman \fnsymbol, Next: \usecounter, Up: Counters + +13.1 '\alph \Alph \arabic \roman \Roman \fnsymbol': Printing counters +===================================================================== + +Print the value of a counter, in a specified style. For instance, if +the counter COUNTER has the value 1 then a '\alph{COUNTER}' in your +source will result in a lowercase letter a appearing in the output. + + All of these commands take a single counter as an argument, for +instance, '\alph{enumi}'. Note that the counter name does not start +with a backslash. + +'\alph{COUNTER}' + Print the value of COUNTER in lowercase letters: 'a', 'b', ... If + the counter's value is less than 1 or more than 26 then you get + 'LaTeX Error: Counter too large.' + +'\Alph{COUNTER}' + Print in uppercase letters: 'A', 'B', ... If the counter's value + is less than 1 or more than 26 then you get 'LaTeX Error: Counter + too large.' + +'\arabic{COUNTER}' + Print in Arabic numbers such as '5' or '-2'. + +'\roman{COUNTER}' + Print in lowercase roman numerals: 'i', 'ii', ... If the counter's + value is less than 1 then you get no warning or error but LaTeX + does not print anything in the output. + +'\Roman{COUNTER}' + Print in uppercase roman numerals: 'I', 'II', ... If the counter's + value is less than 1 then you get no warning or error but LaTeX + does not print anything in the output. + +'\fnsymbol{COUNTER}' + Prints the value of COUNTER using a sequence of nine symbols that + are traditionally used for labeling footnotes. The value of + COUNTER should be between 1 and 9, inclusive. If the counter's + value is less than 0 or more than 9 then you get 'LaTeX Error: + Counter too large', while if it is 0 then you get no error or + warning but LaTeX does not output anything. + + Here are the symbols: + + Number Name Command Symbol + ----------------------------------------------------------------------------- + 1 asterisk '\ast' * + 2 dagger '\dagger' † + 3 ddagger '\ddagger' ‡ + 4 section-sign '\S' § + 5 paragraph-sign '\P' ¶ + 6 double-vert '\parallel' ‖ + 7 double-asterisk '\ast\ast' ** + 8 double-dagger '\dagger\dagger' †† + 9 double-ddagger '\ddagger\ddagger' ‡‡ + + +File: latex2e.info, Node: \usecounter, Next: \value, Prev: \alph \Alph \arabic \roman \Roman \fnsymbol, Up: Counters + +13.2 '\usecounter' +================== + +Synopsis: + + \usecounter{COUNTER} + + Used in the second argument of the 'list' environment (*note list::), +this declares that list items will be numbered by COUNTER. It +initializes COUNTER to zero, and arranges that when '\item' is called +without its optional argument then COUNTER is incremented by +'\refstepcounter', making its value be the current 'ref' value (*note +\ref::). This command is fragile (*note \protect::). + + Put in the document preamble, this example makes a new list +environment enumerated with TESTCOUNTER: + + \newcounter{testcounter} + \newenvironment{test}{% + \begin{list}{}{% + \usecounter{testcounter} + } + }{% + \end{list} + } + + +File: latex2e.info, Node: \value, Next: \setcounter, Prev: \usecounter, Up: Counters + +13.3 '\value' +============= + +Synopsis: + + \value{COUNTER} + + Expands to the value of the counter COUNTER. (Note that the name of +a counter does not begin with a backslash.) + + This example outputs 'Test counter is 6. Other counter is 5.'. + + \newcounter{test} \setcounter{test}{5} + \newcounter{other} \setcounter{other}{\value{test}} + \addtocounter{test}{1} + + Test counter is \arabic{test}. + Other counter is \arabic{other}. + + The '\value' command is not used for typesetting the value of the +counter. For that, see *note \alph \Alph \arabic \roman \Roman +\fnsymbol::. + + It is often used in '\setcounter' or '\addtocounter' but '\value' can +be used anywhere that LaTeX expects a number, such as in +'\hspace{\value{foo}\parindent}'. It must not be preceded by '\protect' +(*note \protect::). + + This example inserts '\hspace{4\parindent}'. + + \setcounter{myctr}{3} \addtocounter{myctr}{1} + \hspace{\value{myctr}\parindent} + + +File: latex2e.info, Node: \setcounter, Next: \addtocounter, Prev: \value, Up: Counters + +13.4 '\setcounter' +================== + +Synopsis: + + \setcounter{COUNTER}{VALUE} + + Globally set the counter COUNTER to have the value of the VALUE +argument, which must be an integer. Thus, you can set a counter's value +as '\setcounter{section}{5}'. Note that the counter name does not start +with a backslash. + + In this example if the counter 'theorem' has value 12 then the second +line will print 'XII'. + + \setcounter{exercise}{\value{theorem}} + Here it is in Roman: \Roman{exercise}. + + +File: latex2e.info, Node: \addtocounter, Next: \refstepcounter, Prev: \setcounter, Up: Counters + +13.5 '\addtocounter' +==================== + +Synopsis: + + \addtocounter{COUNTER}{VALUE} + + Globally increment COUNTER by the amount specified by the VALUE +argument, which may be negative. + + In this example the section value appears as 'VII'. + + \setcounter{section}{5} + \addtocounter{section}{2} + Here it is in Roman: \Roman{section}. + + +File: latex2e.info, Node: \refstepcounter, Next: \stepcounter, Prev: \addtocounter, Up: Counters + +13.6 '\refstepcounter' +====================== + +Synopsis: + + \refstepcounter{COUNTER} + + Globally increments the value of COUNTER by one, as does +'\stepcounter' (*note \stepcounter::). The difference is that this +command resets the value of any counter numbered within it. (For the +definition of "counters numbered within", *note \newcounter::.) + + In addition, this command also defines the current '\ref' value to be +the result of '\thecounter'. + + While the counter value is set globally, the '\ref' value is set +locally, i.e., inside the current group. + + +File: latex2e.info, Node: \stepcounter, Next: \day & \month & \year, Prev: \refstepcounter, Up: Counters + +13.7 '\stepcounter' +=================== + +Synopsis: + + \stepcounter{COUNTER} + + Globally adds one to COUNTER and resets all counters numbered within +it. (For the definition of "counters numbered within", *note +\newcounter::.) + + This command differs from '\refstepcounter' in that this one does not +influence references; that is, it does not define the current '\ref' +value to be the result of '\thecounter' (*note \refstepcounter::). + + +File: latex2e.info, Node: \day & \month & \year, Prev: \stepcounter, Up: Counters + +13.8 '\day' & '\month' & '\year' +================================ + +LaTeX defines the counter '\day' for the day of the month (nominally +with value between 1 and 31), '\month' for the month of the year +(nominally with value between 1 and 12), and '\year' for the year. When +TeX starts up, they are set from the current values on the system. The +related command '\today' produces a string representing the current day +(*note \today::). + + They counters are not updated as the job progresses so in principle +they could be incorrect by the end. In addition, TeX does no sanity +check: + + \day=-2 \month=13 \year=-4 \today + +gives no error or warning and results in the output '-2, -4' (the bogus +month value produces no output). + + *Note Command line input::, to force the date to a given value from +the command line. + + +File: latex2e.info, Node: Lengths, Next: Making paragraphs, Prev: Counters, Up: Top + +14 Lengths +********** + +A "length" is a measure of distance. Many LaTeX commands take a length +as an argument. + + Lengths come in two types. A "rigid length" such as '10pt' does not +contain a 'plus' or 'minus' component. (Plain TeX calls this a +"dimen".) A "rubber length" (what plain TeX calls a "skip" or "glue") +such as with '1cm plus0.05cm minus0.01cm' can contain either or both of +those components. In that rubber length, the '1cm' is the "natural +length" while the other two, the 'plus' and 'minus' components, allow +TeX to stretch or shrink the length to optimize placement. + + The illustrations below use these two commands. + + % make a black bar 10pt tall and #1 wide + \newcommand{\blackbar}[1]{\rule{#1}{10pt}} + + % Make a box around #2 that is #1 wide (excluding the border) + \newcommand{\showhbox}[2]{% + \fboxsep=0pt\fbox{\hbox to #1{#2}}} + +This example uses those commands to show a black bar 100 points long +between 'XXX' and 'YYY'. This length is rigid. + + XXX\showhbox{100pt}{\blackbar{100pt}}YYY + + As for rubber lengths, shrinking is simpler one: with '1cm minus +0.05cm', the natural length is 1cm but TeX can shrink it down as far as +0.95cm. Beyond that, TeX refuses to shrink any more. Thus, below the +first one works fine, producing a space of 98 points between the two +bars. + + XXX\showhbox{300pt}{% + \blackbar{101pt}\hspace{100pt minus 2pt}\blackbar{101pt}}YYY + + XXX\showhbox{300pt}{% + \blackbar{105pt}\hspace{100pt minus 1pt}\blackbar{105pt}}YYY + +But the second one gets a warning like 'Overfull \hbox (1.0pt too wide) +detected at line 17'. In the output the first 'Y' is overwritten by the +end of the black bar, because the box's material is wider than the 300pt +allocated, as TeX has refused to shrink the total to less than +309 points. + + Stretching is like shrinking except that if TeX is asked to stretch +beyond the given amount, it will do it. Here the first line is fine, +producing a space of 110 points between the bars. + + XXX\showhbox{300pt}{% + \blackbar{95pt}\hspace{100pt plus 10pt}\blackbar{95pt}}YYY + + XXX\showhbox{300pt}{% + \blackbar{95pt}\hspace{100pt plus 1pt}\blackbar{95pt}}YYY + +In the second line TeX needs a stretch of 10 points and only 1 point was +specified. TeX stretches the space to the required length but it gives +you a warning like 'Underfull \hbox (badness 10000) detected at line +22'. (We won't discuss badness.) + + You can put both stretch and shrink in the same length, as in '1ex +plus 0.05ex minus 0.02ex'. + + If TeX is setting two or more rubber lengths then it allocates the +stretch or shrink in proportion. + + XXX\showhbox{300pt}{% + \blackbar{100pt}% left + \hspace{0pt plus 50pt}\blackbar{80pt}\hspace{0pt plus 10pt}% middle + \blackbar{100pt}}YYY % right + +The left and right bars take up 100 points, so the middle needs another +100. The middle bar is 80 points so the two '\hspace''s must stretch +20 points. Because the two are 'plus 50pt' and 'plus 10pt', TeX gets +5/6 of the stretch from the first space and 1/6 from the second. + + The 'plus' or 'minus' component of a rubber length can contain a +"fill" component, as in '1in plus2fill'. This gives the length infinite +stretchability or shrinkability so that TeX could set it to any +distance. Here the two figures will be equal-spaced across the page. + + \begin{minipage}{\linewidth} + \hspace{0pt plus 1fill}\includegraphics{godel.png}% + \hspace{0pt plus 1fill}\includegraphics{einstein.png}% + \hspace{0pt plus 1fill} + \end{minipage} + + TeX has three levels of infinity for glue components: 'fil', 'fill', +and 'filll'. The later ones are more infinite than the earlier ones. +Ordinarily document authors only use the middle one (*note \hfill:: and +*note \vfill::). + + Multiplying a rubber length by a number turns it into a rigid length, +so that after '\setlength{\ylength}{1in plus 0.2in}' and +'\setlength{\zlength}{3\ylength}' then the value of '\zlength' is '3in'. + +* Menu: + +* Units of length:: The units that LaTeX knows. +* \setlength:: Set the value of a length. +* \addtolength:: Add a quantity to a length. +* \settodepth:: Set a length to the depth of something. +* \settoheight:: Set a length to the height of something. +* \settowidth:: Set a length to the width of something. +* \stretch:: Add infinite stretchability. +* Expressions:: Compute with lengths and integers. + + +File: latex2e.info, Node: Units of length, Next: \setlength, Up: Lengths + +14.1 Units of length +==================== + +TeX and LaTeX know about these units both inside and outside of math +mode. + +'pt' + Point, 1/72.27 inch. The (approximate) conversion to metric units + is 1point = .35146mm = .035146cm. + +'pc' + Pica, 12 pt + +'in' + Inch, 72.27 pt + +'bp' + Big point, 1/72 inch. This length is the definition of a point in + PostScript and many desktop publishing systems. + +'mm' + Millimeter, 2.845 pt + +'cm' + Centimeter, 10 mm + +'dd' + Didot point, 1.07 pt + +'cc' + Cicero, 12 dd + +'sp' + Scaled point, 1/65536 pt + + Three other units are defined according to the current font, rather +than being an absolute dimension. + +'ex' + The x-height of the current font "ex", traditionally the height of + the lowercase letter x, is often used for vertical lengths. + +'em' + Similarly "em", traditionally the width of the capital letter M, is + often used for horizontal lengths. This is also often the size of + the current font, e.g., a nominal 10pt font will have 1em = 10pt. + LaTeX has several commands to produce horizontal spaces based on + the em (*note \enspace & \quad & \qquad::). + +'mu' + Finally, in math mode, many definitions are expressed in terms of + the math unit "mu", defined by 1em = 18mu, where the em is taken + from the current math symbols family. *Note Spacing in math + mode::. + + Using these units can help make a definition work better across font +changes. For example, a definition of the vertical space between list +items given as '\setlength{\itemsep}{1ex plus 0.05ex minus 0.01ex}' is +more likely to still be reasonable if the font is changed than a +definition given in points. + + +File: latex2e.info, Node: \setlength, Next: \addtolength, Prev: Units of length, Up: Lengths + +14.2 '\setlength' +================= + +Synopsis: + + \setlength{\LEN}{AMOUNT} + + Set the length \LEN to AMOUNT. The length name '\LEN' has to be a +control sequence (*note Control sequences::), and as such must begin +with a backslash, '\' under normal circumstances. The AMOUNT can be a +rubber length (*note Lengths::). It can be positive, negative or zero, +and can be in any units that LaTeX understands (*note Units of +length::). + + Below, with LaTeX's defaults the first paragraph will be indented +while the second will not. + + I told the doctor I broke my leg in two places. + + \setlength{\parindent}{0em} + He said stop going to those places. + + If you did not declare \LEN with '\newlength', for example if you +mistype it as in +'\newlength{\specparindent}\setlength{\sepcparindent}{...}', then you +get an error like 'Undefined control sequence. \sepcindent'. +If you omit the backslash at the start of the length name then you get +an error like 'Missing number, treated as zero.'. + + +File: latex2e.info, Node: \addtolength, Next: \settodepth, Prev: \setlength, Up: Lengths + +14.3 '\addtolength' +=================== + +Synopsis: + + \addtolength{\LEN}{AMOUNT} + + Increment the length \LEN by AMOUNT. The length name '\LEN' has to +be a control sequence (*note Control sequences::), and as such must +begin with a backslash, '\' under normal circumstances. The AMOUNT is a +rubber length (*note Lengths::). It can be positive, negative or zero, +and can be in any units that LaTeX understands (*note Units of +length::). + + Below, if '\parskip' starts with the value '0pt plus 1pt' + + Doctor: how is the boy who swallowed the silver dollar? + \addtolength{\parskip}{1pt} + + Nurse: no change. + +then it has the value '1pt plus 1pt' for the second paragraph. + + If you did not declare \LEN with '\newlength', for example if you +mistype it as in +'\newlength{\specparindent}\addtolength{\sepcparindent}{...}', then you +get an error like 'Undefined control sequence. \sepcindent'. +If the AMOUNT uses some length that has not been declared, for instance +if for example you mistype the above as +'\addtolength{\specparindent}{0.6\praindent}', then you get something +like 'Undefined control sequence. \praindent'. If you leave +off the backslash at the start of \LEN, as in +'\addtolength{parindent}{1pt}', then you get something like 'You can't +use `the letter p' after \advance'. + + +File: latex2e.info, Node: \settodepth, Next: \settoheight, Prev: \addtolength, Up: Lengths + +14.4 '\settodepth' +================== + +Synopsis: + + \settodepth{\LEN}{TEXT} + + Set the length \LEN to the depth of box that LaTeX gets on +typesetting the TEXT argument. The length name '\LEN' has to be a +control sequence (*note Control sequences::), and as such must begin +with a backslash, '\' under normal circumstances. + + This will print how low the character descenders go. + + \newlength{\alphabetdepth} + \settodepth{\alphabetdepth}{abcdefghijklmnopqrstuvwxyz} + \the\alphabetdepth + + If you did not declare \LEN with '\newlength', if for example you +mistype the above as '\settodepth{\aplhabetdepth}{abc...}', then you get +something like 'Undefined control sequence. \aplhabetdepth'. +If you leave the backslash out of \LEN, as in +'\settodepth{alphabetdepth}{...}' then you get something like 'Missing +number, treated as zero. \setbox'. + + +File: latex2e.info, Node: \settoheight, Next: \settowidth, Prev: \settodepth, Up: Lengths + +14.5 '\settoheight' +=================== + +Synopsis: + + \settoheight{\LEN}{text} + + Sets the length \LEN to the height of box that LaTeX gets on +typesetting the 'text' argument. The length name '\LEN' has to be a +control sequence (*note Control sequences::), and as such must begin +with a backslash, '\' under normal circumstances. + + This will print how high the characters go. + + \newlength{\alphabetheight} + \settoheight{\alphabetheight}{abcdefghijklmnopqrstuvwxyz} + \the\alphabetheight + + If no such length \LEN has been declared with '\newlength', if for +example you mistype as '\settoheight{\aplhabetheight}{abc...}', then you +get something like 'Undefined control sequence. +\alphabetheight'. If you leave the backslash out of \LEN, as in +'\settoheight{alphabetheight}{...}' then you get something like 'Missing +number, treated as zero. \setbox'. + + +File: latex2e.info, Node: \settowidth, Next: \stretch, Prev: \settoheight, Up: Lengths + +14.6 '\settowidth' +================== + +Synopsis: + + \settowidth{\LEN}{TEXT} + + Set the length \LEN to the width of the box that LaTeX gets on +typesetting the TEXT argument. The length name '\LEN' has to be a +control sequence (*note Control sequences::), and as such must begin +with a backslash, '\' under normal circumstances. + + This prints the width of the lowercase ASCII alphabet. + + \newlength{\alphabetwidth} + \settowidth{\alphabetwidth}{abcdefghijklmnopqrstuvwxyz} + \the\alphabetwidth + + If no such length \LEN has been declared with '\newlength', if for +example you mistype the above as '\settowidth{\aplhabetwidth}{abc...}', +then you get something like 'Undefined control sequence. +\aplhabetwidth'. If you leave the backslash out of \LEN, as in +'\settoheight{alphabetwidth}{...}' then you get something like 'Missing +number, treated as zero. \setbox'. + + +File: latex2e.info, Node: \stretch, Next: Expressions, Prev: \settowidth, Up: Lengths + +14.7 '\stretch' +=============== + +Synopsis: + + \stretch{NUMBER} + + Produces a rubber length with zero natural length and NUMBER times +'\fill' units of stretchability (*note Lengths::). The NUMBER can be +positive or negative. This command is robust (*note \protect::). + + It works for both vertical and horizontal spacing. In this +horizontal example, LaTeX produces three tick marks, and the distance +between the first and second is half again as long as the distance +between the second and third. + + \rule{0.4pt}{1ex}\hspace{\stretch{1.5}}% + \rule{0.4pt}{1ex}\hspace{\stretch{1}}% + \rule{0.4pt}{1ex} + + In this vertical example, the 'We dedicate ...' will have three times +as much space under it as above it. + + \newenvironment{dedication}{% in document preamble + \clearpage\thispagestyle{empty}% + \vspace*{\stretch{1}} % stretchable space at top + \it + }{% + \vspace{\stretch{3}} % space at bot is 3x as at top + \clearpage + } + ... + \begin{dedication} % in document body + We dedicate this book to our wives. + \end{dedication} + + +File: latex2e.info, Node: Expressions, Prev: \stretch, Up: Lengths + +14.8 Expressions +================ + +Synopsis, one of: + + \numexpr EXPRESSION + \dimexpr EXPRESSION + \glueexpr EXPRESSION + \muglue EXPRESSION + + Any place where you may write an integer, or a TeX dimen, or TeX +glue, or muglue, you can instead write an expression to compute that +type of quantity. + + An example is that '\the\dimexpr\linewidth-4pt\relax' will produce as +output the length that is four points less than width of a line (the +only purpose of '\the' is to show the result in the document). +Analogously, '\romannumeral\numexpr6+3\relax' will produce 'ix', and +'\the\glueexpr 5pt plus 1pt * 2 \relax' will produce '10.0pt plus +2.0pt'. + + A convenience here over doing calculations by allocating registers +and then using '\advance', etc., is that the evaluation of expressions +does not involve assignments and can therefore be performed in places +where assignments are not allowed. The next example computes the width +of the '\parbox'. + + \newlength{\offset}\setlength{\offset}{2em} + \begin{center} + \parbox{\dimexpr\linewidth-\offset*3}{With malice toward none + with charity for all with firmness in the right as God gives us to see + the right let us strive on to finish the work we are in to bind up the + nation's wounds, to care for him who shall have borne the battle and + for his widow and his orphan \textasciitilde\ to do all which may + achieve and cherish a just and lasting peace among ourselves and with + all nations. ---Abraham Lincoln, Second Inaugural Address, from the + memorial} + \end{center} + + The EXPRESSION consists of one or more terms of the same type +(integer, dimension, etc.) that are added or subtracted. A term that +is a type of number, dimension, etc., consists of a factor of that type, +optionally multiplied or divided by factors. A factor of a type is +either a quantity of that type or a parenthesized subexpression. The +expression produces a result of the given type, so that '\numexpr' +produces an integer, '\dimexpr' produces a dimension, etc. + + In the quotation example above, changing to +'\dimexpr\linewidth-3*\offset' gives the error 'Illegal unit of measure +(pt inserted)'. This is because for '\dimexpr' and '\glueexpr', the +input consists of a dimension or glue value followed by an optional +multiplication factor, and not the other way around. Thus '\the\dimexpr +1pt*10\relax' is valid and produces '10.0pt', but '\the\dimexpr +10*1pt\relax' gives the 'Illegal unit' error. + + The expressions absorb tokens and carry out appropriate mathematics +up to a '\relax' (which will be absorbed), or up to the first non-valid +token. Thus, '\the\numexpr2+3px' will print '5px', because LaTeX reads +the '\numexpr2+3', which is made up of numbers, and then finds the +letter 'p', which cannot be part of a number. It therefore terminates +the expression and produces the '5', followed by the regular text 'px'. + + This termination behavior is useful in comparisons. In +'\ifnum\numexpr\parindent*2 < 10pt Yes\else No\fi', the less than sign +terminates the expression and the result is 'No' (in a standard LaTeX +article). + + Expressions may use the operators '+', '-', '*' and '/' along with +parentheses for subexpressions, '(...)'. In glue expressions the 'plus' +and 'minus' parts do not need parentheses to be affected by a factor. +So '\the\glueexpr 5pt plus 1pt * 2 \relax' results in '10pt plus 2pt'. + + TeX will coerce other numerical types in the same way as it does when +doing register assignment. Thus '\the\numexpr\dimexpr 1pt\relax\relax' +will result in '65536', which is '1pt' converted to scaled points (*note +'sp': units of length sp, TeX's internal unit) and then coerced into an +integer. With a '\glueexpr' here, the stretch and shrink would be +dropped. Going the other way, a '\numexpr' inside a '\dimexpr' or +'\glueexpr' will need appropriate units, as in '\the\dimexpr\numexpr 1 + +2\relax pt\relax', which produces '3.0pt'. + + The details of the arithmetic: each factor is checked to be in the +allowed range, numbers must be less than 2^{31} in absolute value, and +dimensions or glue components must be less than 2^{14} points, or 'mu', +or 'fil', etc. The arithmetic operations are performed individually, +except for a scaling operation (a multiplication immediately followed by +a division) which is done as one combined operation with a 64-bit +product as intermediate value. The result of each operation is again +checked to be in the allowed range. + + Finally, division and scaling take place with rounding (unlike TeX's +'\divide', which truncates). Thus '\the\dimexpr 5pt*(3/2)\relax' puts +'10.0pt' in the document, because it rounds '3/2' to '2', while +'\the\dimexpr 5pt*(4/3)\relax' produces '5.0pt'. + + +File: latex2e.info, Node: Making paragraphs, Next: Math formulas, Prev: Lengths, Up: Top + +15 Making paragraphs +******************** + +To start a paragraph, just type some text. To end the current +paragraph, put an empty line. This is three paragraphs, the separation +of which is made by two empty lines. + + It is a truth universally acknowledged, that a single man in possession + of a good fortune, must be in want of a wife. + + However little known the feelings or views of such a man may be on his + first entering a neighbourhood, this truth is so well fixed in the minds + of the surrounding families, that he is considered the rightful property + of some one or other of their daughters. + + ``My dear Mr. Bennet,'' said his lady to him one day, + ``have you heard that Netherfield Park is let at last?'' + + A paragraph separator is made of a sequence of at least one blank +line, at least one of which is not terminated by a comment. A blank +line is a line that is empty or made only of blank characters such as +space or tab. Comments in source code are started with a '%' and span +up to the end of line. In the following example the two columns are +identical: + + \documentclass[twocolumn]{article} + \begin{document} + First paragraph. + + Second paragraph. + \newpage + First paragraph. + + % separator lines may contain blank characters. + + Second paragraph. + \end{document} + + Once LaTeX has gathered all of a paragraph's contents it divides that +content into lines in a way that is optimized over the entire paragraph +(*note Line breaking::). + + There are places where a new paragraph is not permitted. Don't put a +blank line in math mode (*note Modes::); here the blank line before the +'\end{equation}' + + \begin{equation} + 2^{|S|} > |S| + + \end{equation} + +will get you the error 'Missing $ inserted'. Similarly, the blank line +in this '\section' argument + + \section{aaa + + bbb} + +gets 'Runaway argument? {aaa ! Paragraph ended before \@sect was +complete'. + +* Menu: + +* \par:: End the current paragraph. +* \indent & \noindent:: Go into horizontal mode, possibly with an indent. +* \parindent & \parskip:: Space added before paragraphs. +* Marginal notes:: Put remarks in the margin. + + +File: latex2e.info, Node: \par, Next: \indent & \noindent, Up: Making paragraphs + +15.1 '\par' +=========== + +Synopsis (note that while reading the input TeX converts any sequence of +one or more blank lines to a '\par', *note Making paragraphs::): + + \par + + End the current paragraph. The usual way to separate paragraphs is +with a blank line but the '\par' command is entirely equivalent. This +command is robust (*note \protect::). + + This example uses '\par' rather than a blank line simply for +readability. + + \newcommand{\syllabusLegalese}{% + \whatCheatingIs\par\whatHappensWhenICatchYou} + + In LR mode the '\par' command does nothing and is ignored. In +paragraph mode, the '\par' command terminates paragraph mode, switching +LaTeX to vertical mode (*note Modes::). + + You cannot use the '\par' command in a math mode. You also cannot +use it in the argument of many commands, such as the sectioning +commands, e.g. '\section' (*note Making paragraphs:: and *note +\newcommand & \renewcommand::). + + The '\par' command is not the same as the '\paragraph' command. The +latter is, like '\section' or '\subsection', a sectioning command used +by the LaTeX document standard classes (*note \subsubsection & +\paragraph & \subparagraph::). + + The '\par' command is not the same as '\newline' or the line break +double backslash, '\\'. The difference is that '\par' ends the +paragraph, not just the line, and also triggers the addition of the +between-paragraph vertical space '\parskip' (*note \parindent & +\parskip::). + + The output from this example + + xyz + + \setlength{\parindent}{3in} + \setlength{\parskip}{5in} + \noindent test\indent test1\par test2 + +is: after 'xyz' there is a vertical skip of 5 inches and then 'test' +appears, aligned with the left margin. On the same line, there is an +empty horizontal space of 3 inches and then 'test1' appears. Finally. +there is a vertical space of 5 inches, followed by a fresh paragraph +with a paragraph indent of 3 inches, and then LaTeX puts the text +'test2'. + + +File: latex2e.info, Node: \indent & \noindent, Next: \parindent & \parskip, Prev: \par, Up: Making paragraphs + +15.2 '\indent' & '\noindent' +============================ + +Synopsis: + + \indent + +or + + \noindent + + Go into horizontal mode (*note Modes::). The '\indent' command first +outputs an empty box whose width is '\parindent'. These commands are +robust (*note \protect::). + + Ordinarily you create a new paragraph by putting in a blank line. +*Note \par::, for the difference between this command and '\par'. To +start a paragraph without an indent, or to continue an interrupted +paragraph, use '\noindent'. + + In the middle of a paragraph the '\noindent' command has no effect, +because LaTeX is already in horizontal mode there. The '\indent' +command's only effect is to output a space. + + This example starts a fresh paragraph. + + ... end of the prior paragraph. + + \noindent This paragraph is not indented. + +and this continues an interrupted paragraph. + + The data + + \begin{center} + \begin{tabular}{rl} ... \end{tabular} + \end{center} + + \noindent shows this clearly. + + To omit indentation in the entire document put +'\setlength{\parindent}{0pt}' in the preamble. If you do that, you may +want to also set the length of spaces between paragraphs, '\parskip' +(*note \parindent & \parskip::). + + Default LaTeX styles have the first paragraph after a section that is +not indented, as is traditional typesetting in English. To change that, +look on CTAN for the package 'indentfirst'. + + +File: latex2e.info, Node: \parindent & \parskip, Next: Marginal notes, Prev: \indent & \noindent, Up: Making paragraphs + +15.3 '\parindent' & '\parskip' +============================== + +Synopsis: + + \setlength{\parindent}{HORIZONTAL LEN} + \setlength{\parskip}{VERTICAL LEN} + + Both are rubber lengths (*note Lengths::). They affect the +indentation of ordinary paragraphs, not paragraphs inside minipages +(*note minipage::), and the vertical space between paragraphs, +respectively. + + For example, if this is put in the preamble: + + \setlength{\parindent}{0em} + \setlength{\parskip}{1ex} + +The document will have paragraphs that are not indented, but instead are +vertically separated by about the height of a lowercase 'x'. + + In LaTeX standard class documents, the default value for '\parindent' +in one-column documents is '15pt' when the default text size is '10pt', +'17pt' for '11pt', and '1.5em' for '12pt'. In two-column documents it +is '1em'. (These values are set before LaTeX calls '\normalfont' so +'em' is derived from the default font, Computer Modern. If you use a +different font then to set '\parindent' to 1em matching that font, put +'\AtBeginDocument{\setlength{\parindent}{1em}}' in the preamble.) + + The default value for '\parskip' in LaTeX's standard document classes +is '0pt plus1pt'. + + +File: latex2e.info, Node: Marginal notes, Prev: \parindent & \parskip, Up: Making paragraphs + +15.4 Marginal notes +=================== + +Synopsis, one of: + + \marginpar{RIGHT} + \marginpar[LEFT]{RIGHT} + + Create a note in the margin. The first line of the note will have +the same baseline as the line in the text where the '\marginpar' occurs. + + The margin that LaTeX uses for the note depends on the current layout +(*note Document class options::) and also on '\reversemarginpar' (see +below). If you are using one-sided layout (document option 'oneside') +then it goes in the right margin. If you are using two-sided layout +(document option 'twoside') then it goes in the outside margin. If you +are in two-column layout (document option 'twocolumn') then it goes in +the nearest margin. + + If you declare '\reversemarginpar' then LaTeX will place subsequent +marginal notes in the opposite margin to that given in the prior +paragraph. Revert that to the default position with '\normalmarginpar'. + + When you specify the optional argument LEFT then it is used for a +note in the left margin, while the mandatory argument RIGHT is used for +a note in the right margin. + + Normally, a note's first word will not be hyphenated. You can enable +hyphenation there by beginning LEFT or RIGHT with '\hspace{0pt}'. + + These parameters affect the formatting of the note: + +'\marginparpush' + Minimum vertical space between notes; default '7pt' for '12pt' + documents, '5pt' else. See also *note page layout parameters + marginparpush::. + +'\marginparsep' + Horizontal space between the main text and the note; default '11pt' + for '10pt' documents, '10pt' else. + +'\marginparwidth' + Width of the note itself; default for a one-sided '10pt' document + is '90pt', '83pt' for '11pt', and '68pt' for '12pt'; '17pt' more in + each case for a two-sided document. In two column mode, the + default is '48pt'. + + The standard LaTeX routine for marginal notes does not prevent notes +from falling off the bottom of the page. + + +File: latex2e.info, Node: Math formulas, Next: Modes, Prev: Making paragraphs, Up: Top + +16 Math formulas +**************** + +Produce mathematical text by putting LaTeX into math mode or display +math mode (*note Modes::). This example shows both. + + The wave equation for \( u \) is + \begin{displaymath} + \frac{\partial^2u}{\partial t^2} = c^2\nabla^2u + \end{displaymath} + where \( \nabla^2 \) is the spatial Laplacian and \( c \) is constant. + +Math mode is for inline mathematics. In the above example it is invoked +by the starting '\(' and finished by the matching ending '\)'. Display +math mode is for displayed equations and here is invoked by the +'displaymath' environment. Note that any mathematical text whatever, +including mathematical text consisting of just one character, is handled +in math mode. + + When in math mode or display math mode, LaTeX handles many aspects of +your input text differently than in other text modes. For example, + + contrast x+y with \( x+y \) + +in math mode the letters are in italics and the spacing around the plus +sign is different. + + There are three ways to make inline formulas, to put LaTeX in math +mode. + + \( MATHEMATICAL MATERIAL \) + $ MATHEMATICAL MATERIAL $ + \begin{math} MATHEMATICAL MATERIAL \end{math} + +The first form is preferred and the second is quite common, but the +third form is rarely used. You can sometimes use one and sometimes +another, as in '\(x\) and $y$'. You can use these in paragraph mode or +in LR mode (*note Modes::). + + To make displayed formulas, put LaTeX into display math mode with +either: + + \begin{displaymath} + MATHEMATICAL MATERIAL + \end{displaymath} + +or + + \begin{equation} + MATHEMATICAL MATERIAL + \end{equation} + +(*note displaymath::, *note equation::). The only difference is that +with the 'equation' environment, LaTeX puts a formula number alongside +the formula. The construct '\[ MATH \]' is equivalent to +'\begin{displaymath} MATH \end{displaymath}'. These environments can +only be used in paragraph mode (*note Modes::). + + The American Mathematical Society has made freely available a set of +packages that greatly expand your options for writing mathematics, +'amsmath' and 'amssymb' (also be aware of the 'mathtools' package that +is an extension to, and loads, 'amsmath'). New documents that will have +mathematical text should use these packages. Descriptions of these +packages is outside the scope of this document; see their documentation +on CTAN. + +* Menu: + +* Subscripts & superscripts:: Also known as exponents or indices. +* Math symbols:: Various mathematical squiggles. +* Math functions:: Math function names like sin and exp. +* Math accents:: Accents in math. +* Over- and Underlining:: Things over or under formulas. +* Spacing in math mode:: Thick, medium, thin, and negative spaces. +* Math styles:: Determine the size of things. +* Math miscellany:: Stuff that doesn't fit anywhere else. + + +File: latex2e.info, Node: Subscripts & superscripts, Next: Math symbols, Up: Math formulas + +16.1 Subscripts & superscripts +============================== + +Synopsis (in math mode or display math mode), one of: + + BASE^EXP + BASE^{EXP} + +or, one of: + + BASE_EXP + BASE_{EXP} + + Make EXP appear as a superscript of BASE (with the caret +character, '^') or a subscript (with underscore, '_'). + + In this example the '0''s and '1''s are subscripts while the '2''s +are superscripts. + + \( (x_0+x_1)^2 \leq (x_0)^2+(x_1)^2 \) + + To have the subscript or superscript contain more than one character, +surround the expression with curly braces, as in 'e^{-2x}'. This +example's fourth line shows curly braces used to group an expression for +the exponent. + + \begin{displaymath} + (3^3)^3=27^3=19\,683 + \qquad + 3^{(3^3)}=3^{27}=7\,625\,597\,484\,987 + \end{displaymath} + + LaTeX knows how to handle a superscript on a superscript, or a +subscript on a subscript, or supers on subs, or subs on supers. So, +expressions such as 'e^{x^2}' and 'x_{i_0}' give correct output. Note +the use in those expressions of curly braces to give the BASE a +determined EXP. If you enter '\(3^3^3\)' then you get 'Double +superscript'. + + LaTeX does the right thing when something has both a subscript and a +superscript. In this example the integral has both. They come out in +the correct place without any author intervention. + + \begin{displaymath} + \int_{x=a}^b f'(x)\,dx = f(b)-f(a) + \end{displaymath} + +Note the parentheses around 'x=a' to make the entire expression a +subscript. + + To put a superscript or subscript before a symbol, use a construct +like '{}_t K^2'. The empty curly braces '{}' give the subscript +something to attach to and keeps it from accidentally attaching to a +prior symbols. + + Using the subscript or superscript character outside of math mode or +display math mode, as in 'the expression x^2', will get you the error +'Missing $ inserted'. + + A common reason to want subscripts outside of a mathematics mode is +to typeset chemical formulas. There are packages for that, such as +'mhchem'; see CTAN. + + +File: latex2e.info, Node: Math symbols, Next: Math functions, Prev: Subscripts & superscripts, Up: Math formulas + +16.2 Math symbols +================= + +LaTeX provides almost any mathematical or technical symbol that anyone +uses. For example, if you include '$\pi$' in your source, you will get +the pi symbol π. See the "Comprehensive LaTeX Symbol List" package at +. + + Here is a list of commonly-used symbols. It is by no means +exhaustive. Each symbol is described with a short phrase, and its +symbol class, which determines the spacing around it, is given in +parenthesis. Unless said otherwise, the commands for these symbols can +be used only in math mode. To redefine a command so that it can be used +whatever the current mode, see *note \ensuremath::. + +'\|' + ∥ Parallel (relation). Synonym: '\parallel'. + +'\aleph' + ℵ Aleph, transfinite cardinal (ordinary). + +'\alpha' + α Lowercase Greek letter alpha (ordinary). + +'\amalg' + ⨿ Disjoint union (binary) + +'\angle' + ∠ Geometric angle (ordinary). Similar: less-than sign '<' and + angle bracket '\langle'. + +'\approx' + ≈ Almost equal to (relation). + +'\ast' + ∗ Asterisk operator, convolution, six-pointed (binary). + Synonym: '*', which is often a superscript or subscript, as in the + Kleene star. Similar: '\star', which is five-pointed, and is + sometimes used as a general binary operation, and sometimes + reserved for cross-correlation. + +'\asymp' + ≍ Asymptotically equivalent (relation). + +'\backslash' + \ Backslash (ordinary). Similar: set minus '\setminus', and + '\textbackslash' for backslash outside of math mode. + +'\beta' + β Lowercase Greek letter beta (ordinary). + +'\bigcap' + ⋂ Variable-sized, or n-ary, intersection (operator). Similar: + binary intersection '\cap'. + +'\bigcirc' + ⚪ Circle, larger (binary). Similar: function composition '\circ'. + +'\bigcup' + ⋃ Variable-sized, or n-ary, union (operator). Similar: binary + union '\cup'. + +'\bigodot' + ⨀ Variable-sized, or n-ary, circled dot operator (operator). + +'\bigoplus' + ⨁ Variable-sized, or n-ary, circled plus operator (operator). + +'\bigotimes' + ⨂ Variable-sized, or n-ary, circled times operator (operator). + +'\bigtriangledown' + ▽ Variable-sized, or n-ary, open triangle pointing down (operator). + +'\bigtriangleup' + △ Variable-sized, or n-ary, open triangle pointing up (operator). + +'\bigsqcup' + ⨆ Variable-sized, or n-ary, square union (operator). + +'\biguplus' + ⨄ Variable-sized, or n-ary, union operator with a plus (operator). + (Note that the name has only one p.) + +'\bigvee' + ⋁ Variable-sized, or n-ary, logical-or (operator). + +'\bigwedge' + ⋀ Variable-sized, or n-ary, logical-and (operator). + +'\bot' + ⊥, Up tack, bottom, least element of a partially ordered set, or a + contradiction (ordinary). See also '\top'. + +'\bowtie' + ⋈ Natural join of two relations (relation). + +'\Box' + □ Modal operator for necessity; square open box (ordinary). Not + available in plain TeX. In LaTeX you need to load the 'amssymb' + package. + +'\bullet' + • Bullet (binary). Similar: multiplication dot '\cdot'. + +'\cap' + ∩ Intersection of two sets (binary). Similar: variable-sized + operator '\bigcap'. + +'\cdot' + ⋅ Multiplication (binary). Similar: Bullet dot '\bullet'. + +'\chi' + χ Lowercase Greek chi (ordinary). + +'\circ' + ∘ Function composition, ring operator (binary). Similar: + variable-sized operator '\bigcirc'. + +'\clubsuit' + ♣ Club card suit (ordinary). + +'\complement' + ∁, Set complement, used as a superscript as in '$S^\complement$' + (ordinary). Not available in plain TeX. In LaTeX you need to load + the 'amssymb' package. Also used: '$S^{\mathsf{c}}$' + or '$\bar{S}$'. + +'\cong' + ≅ Congruent (relation). + +'\coprod' + ∐ Coproduct (operator). + +'\cup' + ∪ Union of two sets (binary). Similar: variable-sized + operator '\bigcup'. + +'\dagger' + † Dagger relation (binary). + +'\dashv' + ⊣ Dash with vertical, reversed turnstile (relation). Similar: + turnstile '\vdash'. + +'\ddagger' + ‡ Double dagger relation (binary). + +'\Delta' + Δ Greek uppercase delta, used for increment (ordinary). + +'\delta' + δ Greek lowercase delta (ordinary). + +'\Diamond' + ◇ Large diamond operator (ordinary). Not available in plain TeX. + In LaTeX you need to load the 'amssymb' package. + +'\diamond' + ⋄ Diamond operator (binary). Similar: large diamond '\Diamond', + circle bullet '\bullet'. + +'\diamondsuit' + ♢ Diamond card suit (ordinary). + +'\div' + ÷ Division sign (binary). + +'\doteq' + ≐ Approaches the limit (relation). Similar: geometrically equal + to '\Doteq'. + +'\downarrow' + ↓ Down arrow, converges (relation). Similar: '\Downarrow' double + line down arrow. + +'\Downarrow' + ⇓ Double line down arrow (relation). Similar: '\downarrow' single + line down arrow. + +'\ell' + ℓ Lowercase cursive letter l (ordinary). + +'\emptyset' + ∅ Empty set symbol (ordinary). The variant form is '\varnothing'. + +'\epsilon' + ϵ Lowercase lunate epsilon (ordinary). Similar to Greek text + letter. More widely used in mathematics is the script small letter + epsilon '\varepsilon' ε. Related: the set membership relation + '\in' ∈. + +'\equiv' + ≡ Equivalence (relation). + +'\eta' + η Lowercase Greek letter (ordinary). + +'\exists' + ∃ Existential quantifier (ordinary). + +'\flat' + ♭ Musical flat (ordinary). + +'\forall' + ∀ Universal quantifier (ordinary). + +'\frown' + ⌢ Downward curving arc (ordinary). + +'\Gamma' + Γ uppercase Greek letter (ordinary). + +'\gamma' + γ Lowercase Greek letter (ordinary). + +'\ge' + ≥ Greater than or equal to (relation). This is a synonym + for '\geq'. + +'\geq' + ≥ Greater than or equal to (relation). This is a synonym + for '\ge'. + +'\gets' + ← Is assigned the value (relation). Synonym: '\leftarrow'. + +'\gg' + ≫ Much greater than (relation). Similar: much less than '\ll'. + +'\hbar' + ℏ Planck constant over two pi (ordinary). + +'\heartsuit' + ♡ Heart card suit (ordinary). + +'\hookleftarrow' + ↩ Hooked left arrow (relation). + +'\hookrightarrow' + ↪ Hooked right arrow (relation). + +'\iff' + ⟷ If and only if (relation). It is '\Longleftrightarrow' with a + '\thickmuskip' on either side. + +'\Im' + ℑ Imaginary part (ordinary). See: real part '\Re'. + +'\imath' + Dotless i; used when you are putting an accent on an i (*note Math + accents::). + +'\in' + ∈ Set element (relation). See also: lowercase lunate + epsilon '\epsilon'ϵ and small letter script epsilon '\varepsilon'. + +'\infty' + ∞ Infinity (ordinary). + +'\int' + ∫ Integral (operator). + +'\iota' + ι Lowercase Greek letter (ordinary). + +'\Join' + ⨝ Condensed bowtie symbol (relation). Not available in Plain TeX. + +'\jmath' + Dotless j; used when you are putting an accent on a j (*note Math + accents::). + +'\kappa' + κ Lowercase Greek letter (ordinary). + +'\Lambda' + Λ uppercase Greek letter (ordinary). + +'\lambda' + λ Lowercase Greek letter (ordinary). + +'\land' + ∧ Logical and (binary). Synonym: '\wedge'. See also + logical or '\lor'. + +'\langle' + ⟨ Left angle, or sequence, bracket (opening). Similar: + less-than '<'. Matches '\rangle'. + +'\lbrace' + { Left curly brace (opening). Synonym: '\{'. Matches '\rbrace'. + +'\lbrack' + [ Left square bracket (opening). Synonym: '['. Matches '\rbrack'. + +'\lceil' + ⌈ Left ceiling bracket, like a square bracket but with the bottom + shaved off (opening). Matches '\rceil'. + +'\le' + ≤ Less than or equal to (relation). This is a synonym for '\leq'. + +'\leadsto' + ⇝ Squiggly right arrow (relation). To get this symbol outside of + math mode you can put + '\newcommand*{\Leadsto}{\ensuremath{\leadsto}}' in the preamble and + then use '\Leadsto' instead. + +'\Leftarrow' + ⇐ Is implied by, double-line left arrow (relation). Similar: + single-line left arrow '\leftarrow'. + +'\leftarrow' + ← Single-line left arrow (relation). Synonym: '\gets'. Similar: + double-line left arrow '\Leftarrow'. + +'\leftharpoondown' + ↽ Single-line left harpoon, barb under bar (relation). + +'\leftharpoonup' + ↼ Single-line left harpoon, barb over bar (relation). + +'\Leftrightarrow' + ⇔ Bi-implication; double-line double-headed arrow (relation). + Similar: single-line double headed arrow '\leftrightarrow'. + +'\leftrightarrow' + ↔ Single-line double-headed arrow (relation). Similar: double-line + double headed arrow '\Leftrightarrow'. + +'\leq' + ≤ Less than or equal to (relation). This is a synonym for '\le'. + +'\lfloor' + ⌊ Left floor bracket (opening). Matches: '\floor'. + +'\lhd' + ◁ Arrowhead, that is, triangle, pointing left (binary). For the + normal subgroup symbol you should load 'amssymb' and + use '\vartriangleleft' (which is a relation and so gives better + spacing). + +'\ll' + ≪ Much less than (relation). Similar: much greater than '\gg'. + +'\lnot' + ¬ Logical negation (ordinary). Synonym: '\neg'. + +'\longleftarrow' + ⟵ Long single-line left arrow (relation). Similar: long + double-line left arrow '\Longleftarrow'. + +'\longleftrightarrow' + ⟷ Long single-line double-headed arrow (relation). Similar: long + double-line double-headed arrow '\Longleftrightarrow'. + +'\longmapsto' + ⟼ Long single-line left arrow starting with vertical bar + (relation). Similar: shorter version '\mapsto'. + +'\longrightarrow' + ⟶ Long single-line right arrow (relation). Similar: long + double-line right arrow '\Longrightarrow'. + +'\lor' + ∨ Logical or (binary). Synonym: '\vee'. See also + logical and '\land'. + +'\mapsto' + ↦ Single-line left arrow starting with vertical bar (relation). + Similar: longer version '\longmapsto'. + +'\mho' + ℧ Conductance, half-circle rotated capital omega (ordinary). + +'\mid' + ∣ Single-line vertical bar (relation). A typical use of '\mid' is + for a set '\{\, x \mid x\geq 5 \,\}'. + + Similar: '\vert' and '|' produce the same single-line vertical bar + symbol but without any spacing (they fall in class ordinary) and + you should not use them as relations but instead only as ordinals, + i.e., footnote symbols. For absolute value, see the entry + for '\vert' and for norm see the entry for '\Vert'. + +'\models' + ⊨ Entails, or satisfies; double turnstile, short double dash + (relation). Similar: long double dash '\vDash'. + +'\mp' + ∓ Minus or plus (relation). + +'\mu' + μ Lowercase Greek letter (ordinary). + +'\nabla' + ∇ Hamilton's del, or differential, operator (ordinary). + +'\natural' + ♮ Musical natural notation (ordinary). + +'\ne' + ≠ Not equal (relation). Synonym: '\neq'. + +'\nearrow' + ↗ North-east arrow (relation). + +'\neg' + ¬ Logical negation (ordinary). Synonym: '\lnot'. Sometimes + instead used for negation: '\sim'. + +'\neq' + ≠ Not equal (relation). Synonym: '\ne'. + +'\ni' + ∋ Reflected membership epsilon; has the member (relation). + Synonym: '\owns'. Similar: is a member of '\in'. + +'\not' + Long solidus, or slash, used to overstrike a following operator + (relation). + + Many negated operators are available that don't require '\not', + particularly with the 'amssymb' package. For example, '\notin' is + typographically preferable to '\not\in'. + +'\notin' + ∉ Not an element of (relation). Similar: not subset + of '\nsubseteq'. + +'\nu' + ν Lowercase Greek letter (ordinary). + +'\nwarrow' + ↖ North-west arrow (relation). + +'\odot' + ⊙ Dot inside a circle (binary). Similar: variable-sized + operator '\bigodot'. + +'\oint' + ∮ Contour integral, integral with circle in the middle (operator). + +'\Omega' + Ω uppercase Greek letter (ordinary). + +'\omega' + ω Lowercase Greek letter (ordinary). + +'\ominus' + ⊖ Minus sign, or dash, inside a circle (binary). + +'\oplus' + ⊕ Plus sign inside a circle (binary). Similar: variable-sized + operator '\bigoplus'. + +'\oslash' + ⊘ Solidus, or slash, inside a circle (binary). + +'\otimes' + ⊗ Times sign, or cross, inside a circle (binary). Similar: + variable-sized operator '\bigotimes'. + +'\owns' + ∋ Reflected membership epsilon; has the member (relation). + Synonym: '\ni'. Similar: is a member of '\in'. + +'\parallel' + ∥ Parallel (relation). Synonym: '\|'. + +'\partial' + ∂ Partial differential (ordinary). + +'\perp' + ⟂ Perpendicular (relation). Similar: '\bot' uses the same glyph + but the spacing is different because it is in the class ordinary. + +'\Phi' + Φ Uppercase Greek letter (ordinary). + +'\phi' + ϕ Lowercase Greek letter (ordinary). The variant form is + '\varphi' φ. + +'\Pi' + Π uppercase Greek letter (ordinary). + +'\pi' + π Lowercase Greek letter (ordinary). The variant form is + '\varpi' ϖ. + +'\pm' + ± Plus or minus (binary). + +'\prec' + ≺ Precedes (relation). Similar: less than '<'. + +'\preceq' + ⪯ Precedes or equals (relation). Similar: less than or + equals '\leq'. + +'\prime' + ′ Prime, or minute in a time expression (ordinary). Typically used + as a superscript: '$f^\prime$'; '$f^\prime$' and '$f'$' produce the + same result. An advantage of the second is that '$f'''$' produces + the desired symbol, that is, the same result as + '$f^{\prime\prime\prime}$', but uses rather less typing. You can + only use '\prime' in math mode. Using the right single quote ''' + in text mode produces a different character (apostrophe). + +'\prod' + ∏ Product (operator). + +'\propto' + ∝ Is proportional to (relation) + +'\Psi' + Ψ uppercase Greek letter (ordinary). + +'\psi' + ψ Lowercase Greek letter (ordinary). + +'\rangle' + ⟩ Right angle, or sequence, bracket (closing). Similar: greater + than '>'. Matches:'\langle'. + +'\rbrace' + } Right curly brace (closing). Synonym: '\}'. Matches '\lbrace'. + +'\rbrack' + ] Right square bracket (closing). Synonym: ']'. + Matches '\lbrack'. + +'\rceil' + ⌉ Right ceiling bracket (closing). Matches '\lceil'. + +'\Re' + ℜ Real part, real numbers, cursive capital R (ordinary). Related: + double-line, or blackboard bold, R '\mathbb{R}'; to access this, + load the 'amsfonts' package. + +'\restriction' + ↾, Restriction of a function (relation). Synonym: + '\upharpoonright'. Not available in plain TeX. In LaTeX you need + to load the 'amssymb' package. + +'\revemptyset' + ⦰, Reversed empty set symbol (ordinary). Related: '\varnothing'. + Not available in plain TeX. In LaTeX you need to load the 'stix' + package. + +'\rfloor' + ⌋ Right floor bracket, a right square bracket with the top cut off + (closing). Matches '\lfloor'. + +'\rhd' + ◁ Arrowhead, that is, triangle, pointing right (binary). For the + normal subgroup symbol you should instead load 'amssymb' and + use '\vartriangleright' (which is a relation and so gives better + spacing). + +'\rho' + ρ Lowercase Greek letter (ordinary). The variant form is + '\varrho' ϱ. + +'\Rightarrow' + ⇒ Implies, right-pointing double line arrow (relation). Similar: + right single-line arrow '\rightarrow'. + +'\rightarrow' + → Right-pointing single line arrow (relation). Synonym: '\to'. + Similar: right double line arrow '\Rightarrow'. + +'\rightharpoondown' + ⇁ Right-pointing harpoon with barb below the line (relation). + +'\rightharpoonup' + ⇀ Right-pointing harpoon with barb above the line (relation). + +'\rightleftharpoons' + ⇌ Right harpoon up above left harpoon down (relation). + +'\searrow' + ↘ Arrow pointing southeast (relation). + +'\setminus' + ⧵ Set difference, reverse solidus or reverse slash, like \ + (binary). Similar: backslash '\backslash' and also + '\textbackslash' outside of math mode. + +'\sharp' + ♯ Musical sharp (ordinary). + +'\Sigma' + Σ uppercase Greek letter (ordinary). + +'\sigma' + σ Lowercase Greek letter (ordinary). The variant form is + '\varsigma' ς. + +'\sim' + ∼ Similar, in a relation (relation). + +'\simeq' + ≃ Similar or equal to, in a relation (relation). + +'\smallint' + ∫ Integral sign that does not change to a larger size in a display + (operator). + +'\smile' + ⌣ Upward curving arc, smile (ordinary). + +'\spadesuit' + ♠ Spade card suit (ordinary). + +'\sqcap' + ⊓ Square intersection symbol (binary). Similar: + intersection 'cap'. + +'\sqcup' + ⊔ Square union symbol (binary). Similar: union 'cup'. Related: + variable-sized operator '\bigsqcup'. + +'\sqsubset' + ⊏, Square subset symbol (relation). Similar: subset '\subset'. + Not available in plain TeX. In LaTeX you need to load the + 'amssymb' package. + +'\sqsubseteq' + ⊑ Square subset or equal symbol (binary). Similar: subset or equal + to '\subseteq'. + +'\sqsupset' + ⊐, Square superset symbol (relation). Similar: superset '\supset'. + Not available in plain TeX. In LaTeX you need to load the + 'amssymb' package. + +'\sqsupseteq' + ⊒ Square superset or equal symbol (binary). Similar: superset or + equal '\supseteq'. + +'\star' + ⋆ Five-pointed star, sometimes used as a general binary operation + but sometimes reserved for cross-correlation (binary). Similar: + the synonyms asterisk '*' and '\ast', which are six-pointed, and + more often appear as a superscript or subscript, as with the Kleene + star. + +'\subset' + ⊂ Subset (occasionally, is implied by) (relation). + +'\subseteq' + ⊆ Subset or equal to (relation). + +'\succ' + ≻ Comes after, succeeds (relation). Similar: is less than '>'. + +'\succeq' + ⪰ Succeeds or is equal to (relation). Similar: less than or equal + to '\leq'. + +'\sum' + ∑ Summation (operator). Similar: Greek capital sigma '\Sigma'. + +'\supset' + ⊃ Superset (relation). + +'\supseteq' + ⊇ Superset or equal to (relation). + +'\surd' + √ Radical symbol (ordinary). The LaTeX command '\sqrt{...}' + typesets the square root of the argument, with a bar that extends + to cover the argument. + +'\swarrow' + ↙ Southwest-pointing arrow (relation). + +'\tau' + τ Lowercase Greek letter (ordinary). + +'\theta' + θ Lowercase Greek letter (ordinary). The variant form is + '\vartheta' ϑ. + +'\times' + × Primary school multiplication sign (binary). See also '\cdot'. + +'\to' + → Right-pointing single line arrow (relation). + Synonym: '\rightarrow'. + +'\top' + ⊤ Top, greatest element of a partially ordered set (ordinary). See + also '\bot'. + +'\triangle' + △ Triangle (ordinary). + +'\triangleleft' + ◁ Not-filled triangle pointing left (binary). Similar: '\lhd'. + For the normal subgroup symbol you should load 'amssymb' and + use '\vartriangleleft' (which is a relation and so gives better + spacing). + +'\triangleright' + ▷ Not-filled triangle pointing right (binary). For the normal + subgroup symbol you should instead load 'amssymb' and + use '\vartriangleright' (which is a relation and so gives better + spacing). + +'\unlhd' + ⊴ Left-pointing not-filled underlined arrowhead, that is, triangle, + with a line under (binary). For the normal subgroup symbol load + 'amssymb' and use '\vartrianglelefteq' (which is a relation and so + gives better spacing). + +'\unrhd' + ⊵ Right-pointing not-filled underlined arrowhead, that is, + triangle, with a line under (binary). For the normal subgroup + symbol load 'amssymb' and use '\vartrianglerighteq' (which is a + relation and so gives better spacing). + +'\Uparrow' + ⇑ Double-line upward-pointing arrow (relation). Similar: + single-line up-pointing arrow '\uparrow'. + +'\uparrow' + ↑ Single-line upward-pointing arrow, diverges (relation). Similar: + double-line up-pointing arrow '\Uparrow'. + +'\Updownarrow' + ⇕ Double-line upward-and-downward-pointing arrow (relation). + Similar: single-line upward-and-downward-pointing + arrow '\updownarrow'. + +'\updownarrow' + ↕ Single-line upward-and-downward-pointing arrow (relation). + Similar: double-line upward-and-downward-pointing + arrow '\Updownarrow'. + +'\upharpoonright' + ↾, Up harpoon, with barb on right side (relation). + Synonym: '\restriction'. Not available in plain TeX. In LaTeX you + need to load the 'amssymb' package. + +'\uplus' + ⊎ Multiset union, a union symbol with a plus symbol in the middle + (binary). Similar: union '\cup'. Related: variable-sized + operator '\biguplus'. + +'\Upsilon' + Υ uppercase Greek letter (ordinary). + +'\upsilon' + υ Lowercase Greek letter (ordinary). + +'\varepsilon' + ε Small letter script epsilon (ordinary). This is more widely used + in mathematics than the non-variant lunate epsilon form + '\epsilon' ϵ. Related: set membership '\in'. + +'\vanothing' + ∅, Empty set symbol. Similar: '\emptyset'. Related: + '\revemptyset'. Not available in plain TeX. In LaTeX you need to + load the 'amssymb' package. + +'\varphi' + φ Variant on the lowercase Greek letter (ordinary). The + non-variant form is '\phi' ϕ. + +'\varpi' + ϖ Variant on the lowercase Greek letter (ordinary). The + non-variant form is '\pi' π. + +'\varrho' + ϱ Variant on the lowercase Greek letter (ordinary). The + non-variant form is '\rho' ρ. + +'\varsigma' + ς Variant on the lowercase Greek letter (ordinary). The + non-variant form is '\sigma' σ. + +'\vartheta' + ϑ Variant on the lowercase Greek letter (ordinary). The + non-variant form is '\theta' θ. + +'\vdash' + ⊢ Provable; turnstile, vertical and a dash (relation). Similar: + turnstile rotated a half-circle '\dashv'. + +'\vee' + ∨ Logical or; a downwards v shape (binary). Related: logical + and '\wedge'. Similar: variable-sized operator '\bigvee'. + +'\Vert' + ‖ Vertical double bar (ordinary). *Note Delimiters::, for how to + use the 'mathtools' package to create flexibly-sized norm symbols. + +'\vert' + | Single line vertical bar (ordinary). For "such that", as in the + definition of a set, use '\mid' because it is a relation. *Note + Delimiters::, for how to use the 'mathtools' package to create + flexibly-sized absolute-value symbols. + +'\wedge' + ∧ Logical and (binary). Synonym: '\land'. See also logical or + '\vee'. Similar: variable-sized operator '\bigwedge'. + +'\wp' + ℘ Weierstrass p (ordinary). + +'\wr' + ≀ Wreath product (binary). + +'\Xi' + Ξ uppercase Greek letter (ordinary). + +'\xi' + ξ Lowercase Greek letter (ordinary). + +'\zeta' + ζ Lowercase Greek letter (ordinary). + + The following symbols are most often used in plain text but LaTeX +provides versions to use in mathematical text. + +'\mathdollar' + Dollar sign in math mode: $. + +'\mathparagraph' + Paragraph sign (pilcrow) in math mode, ¶. + +'\mathsection' + Section sign in math mode §. + +'\mathsterling' + Sterling sign in math mode: £. + +'\mathunderscore' + Underscore in math mode: _. + +* Menu: + +* Arrows:: List of arrows. +* \boldmath & \unboldmath:: Symbols in boldface. +* Blackboard bold:: Doublestruck characters. +* Calligraphic:: Cursive characters. +* Delimiters:: Parentheses, braces, etc. +* Dots:: Ellipses, etc. +* Greek letters:: List of Greek letters. + + +File: latex2e.info, Node: Arrows, Next: \boldmath & \unboldmath, Up: Math symbols + +16.2.1 Arrows +------------- + +These are the arrows that come with standard LaTeX. The 'latexsym' and +'amsfonts' packages contain many more. + +Symbol Command +--------------------------------------------------------------------------- +⇓ '\Downarrow' +↓ '\downarrow' +↩ '\hookleftarrow' +↪ '\hookrightarrow' +← '\leftarrow' +⇐ '\Leftarrow' +⇔ '\Leftrightarrow' +↔ '\leftrightarrow' +⟵ '\longleftarrow' +⟸ '\Longleftarrow' +⟷ '\longleftrightarrow' +⟺ '\Longleftrightarrow' +⟼ '\longmapsto' +⟹ '\Longrightarrow' +⟶ '\longrightarrow' +↦ '\mapsto' +↗ '\nearrow' +↖ '\nwarrow' +⇒ '\Rightarrow' +→ '\rightarrow', or '\to' +↘ '\searrow' +↙ '\swarrow' +↑ '\uparrow' +⇑ '\Uparrow' +↕ '\updownarrow' +⇕ '\Updownarrow' + + An example of the difference between '\to' and '\mapsto' is: '\( +f\colon D\to C \) given by \( n\mapsto n^2 \)'. + + For commutative diagrams there are a number of packages, including +'tikz-cd' and 'amscd'. + + +File: latex2e.info, Node: \boldmath & \unboldmath, Next: Blackboard bold, Prev: Arrows, Up: Math symbols + +16.2.2 '\boldmath' & '\unboldmath' +---------------------------------- + +Synopsis (used in paragraph mode or LR mode): + + \boldmath \( MATH \) + +or + + \unboldmath \( MATH \) + + Declarations to change the letters and symbols in MATH to be in a +bold font, or to countermand that and bring back the regular (non-bold) +default. They must be used when not in math mode or display math mode +(*note Modes::). Both commands are fragile (*note \protect::). + + In this example each '\boldmath' command takes place inside an +'\mbox', + + we have $\mbox{\boldmath \( v \)} = 5\cdot\mbox{\boldmath \( u \)$}$ + +which means '\boldmath' is only called in a text mode, here LR mode, and +explains why LaTeX must switch to math mode to set 'v' and 'u'. + + If you use either command inside math mode, as with 'Trouble: \( +\boldmath x \)', then you get something like 'LaTeX Font Warning: +Command \boldmath invalid in math mode on input line 11' and 'LaTeX Font +Warning: Command \mathversion invalid in math mode on input line 11'. + + There are many issues with '\boldmath'. New documents should use the +'bm' package provided by the LaTeX Project team. A complete description +is outside the scope of this document (see the full documentation on +CTAN) but even this small example + + \usepackage{bm} % in preamble + ... + we have $\bm{v} = 5\cdot\bm{u}$ + +shows that it is an improvement over '\boldmath'. + + +File: latex2e.info, Node: Blackboard bold, Next: Calligraphic, Prev: \boldmath & \unboldmath, Up: Math symbols + +16.2.3 Blackboard bold +---------------------- + +Synopsis: + + \usepackage{amssymb} % in preamble + ... + \mathbb{UPPERCASE-LETTER} + + Provide blackboard bold symbols, sometimes also known as doublestruck +letters, used to denote number sets such as the natural numbers, the +integers, etc. + + Here + + \( \forall n \in \mathbb{N}, n^2 \geq 0 \) + +the '\mathbb{N}' gives blackboard bold symbol ℕ, representing the +natural numbers. + + If you use other than an uppercase letter then you do not get an +error but you get strange results, including unexpected characters. + + There are packages that give access to symbols other than just the +capital letters; look on CTAN. + + +File: latex2e.info, Node: Calligraphic, Next: Delimiters, Prev: Blackboard bold, Up: Math symbols + +16.2.4 Calligraphic +------------------- + +Synopsis: + + \mathcal{UPPERCASE-LETTERS} + + Use a script-like font. + + In this example the graph identifier is output in a cursive font. + + Let the graph be \( \mathcal{G} \). + + If you use something other than an uppercase letter then you do not +get an error but you also do not get math calligraphic output. For +instance, '\mathcal{g}' outputs a close curly brace symbol. + + +File: latex2e.info, Node: Delimiters, Next: Dots, Prev: Calligraphic, Up: Math symbols + +16.2.5 Delimiters +----------------- + +Delimiters are parentheses, braces, or other characters used to mark the +start and end of subformulas. This formula has three sets of +parentheses delimiting the three subformulas. + + (z-z_0)^2 = (x-x_0)^2 + (y-y_0)^2 + +The delimiters do not need to match, so you can enter '\( [0,1) \)'. + + Here are the common delimiters: + +DelimiterCommand Name +------------------------------------------------------ +( '(' Left parenthesis +) ')' Right parenthesis +\} '{' or Left brace + '\lbrace' +\{ '}' or Right brace + '\rbrace' +[ '[' or Left bracket + '\lbrack' +] ']' or Right bracket + '\rbrack' +⌊ '\lfloor' Left floor bracket +⌋ '\rfloor' Right floor bracket +⌈ '\lceil' Left ceiling bracket +⌉ '\rceil' Right ceiling bracket +⟨ '\langle' Left angle bracket +⟩ '\rangle' Right angle bracket +/ '/' Slash, or forward slash +\ '\backslash' Reverse slash, or backslash +| '|' or Vertical bar + '\vert' +‖ '\|' or Double vertical bar + '\Vert' + + The 'mathtools' package allows you to create commands for paired +delimiters. For instance, if you put +'\DeclarePairedDelimiter\abs{\lvert}{\rvert}' in your preamble then you +get two commands for single-line vertical bars (they only work in math +mode). The starred form, such as '\abs*{\frac{22}{7}}', has the height +of the vertical bars match the height of the argument. The unstarred +form, such as '\abs{\frac{22}{7}}', has the bars fixed at a default +height. This form accepts an optional argument, as in '\abs[SIZE +COMMAND]{\frac{22}{7}}', where the height of the bars is given in SIZE +COMMAND, such as '\Bigg'. Using instead '\lVert' and '\rVert' as the +symbols will give you a norm symbol with the same behavior. + +* Menu: + +* \left & \right:: Automatically sized delimiters. +* \bigl & \bigr etc.:: Manually sized delimiters. + + +File: latex2e.info, Node: \left & \right, Next: \bigl & \bigr etc., Up: Delimiters + +16.2.5.1 '\left' & '\right' +........................... + +Synopsis: + + \left DELIMITER1 ... \right DELIMITER2 + + Make matching parentheses, braces, or other delimiters. LaTeX makes +the delimiters tall enough to just cover the size of the formula that +they enclose. + + This makes a unit vector surrounded by parentheses tall enough to +cover the entries. + + \begin{equation} + \left(\begin{array}{c} + 1 \\ + 0 \\ + \end{array}\right) + \end{equation} + + *Note Delimiters::, for a list of the common delimiters. + + Every '\left' must have a matching '\right'. In the above example, +leaving out the '\left(' gets the error message 'Extra \right'. Leaving +out the '\right)' gets 'You can't use `\eqno' in math mode'. + + However, DELIMITER1 and DELIMITER2 need not match. A common case is +that you want an unmatched brace, as below. Use a period, '.', as a +null delimiter. + + \begin{equation} + f(n)=\left\{\begin{array}{ll} + 1 &\mbox{--if \(n=0\)} \\ + f(n-1)+3n^2 &\mbox{--else} + \end{array}\right. + \end{equation} + +Note that to get a curly brace as a delimiter you must prefix it with a +backslash, '\{' (*note Reserved characters::). (The packages 'amsmath' +and 'mathtools' allow you to get the above construct through in a +'cases' environment.) + + The '\left ... \right' pair make a group. One consequence is that +the formula enclosed in the '\left ... \right' pair cannot have line +breaks in the output. This includes both manual line breaks and +LaTeX-generated automatic ones. In this example, LaTeX breaks the +equation to make the formula fit the margins. + + Lorem ipsum dolor sit amet + \( (a+b+c+d+e+f+g+h+i+j+k+l+m+n+o+p+q+r+s+t+u+v+w+x+y+z) \) + +But with '\left' and '\right' + + Lorem ipsum dolor sit amet + \( \left(a+b+c+d+e+f+g+h+i+j+k+l+m+n+o+p+q+r+s+t+u+v+w+x+y+z\right) \) + +LaTeX won't break the line, causing the formula to extend into the +margin. + + Because '\left ... \right' make a group, all the usual grouping rules +hold. Here, the value of '\testlength' set inside the equation will be +forgotten, and the output is '1.2pt'. + + \newlength{\testlength} \setlength{\testlength}{1.2pt} + \begin{equation} + \left( a+b=c \setlength{\testlength}{3.4pt} \right) + \the\testlength + \end{equation} + + The '\left ... \right' pair affect the horizontal spacing of the +enclosed formula, in two ways. The first is that in '\( \sin(x) = +\sin\left(x\right) \)' the one after the equals sign has more space +around the 'x'. That's because '\left( ... \right)' inserts an inner +node while '( ... )' inserts an opening node. The second way that the +pair affect the horizontal spacing is that because they form a group, +the enclosed subformula will be typeset at its natural width, with no +stretching or shrinking to make the line fit better. + + TeX scales the delimiters according to the height and depth of the +enclosed formula. Here LaTeX grows the brackets to extend the full +height of the integral. + + \begin{equation} + \left[ \int_{x=r_0}^{\infty} -G\frac{Mm}{r^2}\, dr \right] + \end{equation} + + Manual sizing is often better. For instance, although below the rule +has no depth, TeX will create delimiters that extend far below the rule. + + \begin{equation} + \left( \rule{1pt}{1cm} \right) + \end{equation} + +TeX can choose delimiters that are too small, as in '\( \left| |x|+|y| +\right| \)'. It can also choose delimiters that are too large, as here. + + \begin{equation} + \left( \sum_{0\leq i1/2}+ + \underbrace{1/5+1/6+1/7+1/8}_{>1/2}+\cdots + \end{displaymath} + + The superscript appears on top of the expression, and so can look + unconnected to the underbrace. + +'\overbrace{MATH}' + Put a brace over MATH, as with + '\overbrace{x+x+\cdots+x}^{\mbox{\(k\) times}}'. See also + '\underbrace'. + + The package 'mathtools' adds an over- and underbrace, as well as some +improvements on the braces. See the documentation on CTAN. + + +File: latex2e.info, Node: Spacing in math mode, Next: Math styles, Prev: Over- and Underlining, Up: Math formulas + +16.6 Spacing in math mode +========================= + +When typesetting mathematics, LaTeX puts in spacing according to the +normal rules for mathematics texts. If you enter 'y=m x' then LaTeX +ignores the space and in the output the m is next to the x, as y=mx. + + But LaTeX's rules occasionally need tweaking. For example, in an +integral the tradition is to put a small extra space between the 'f(x)' +and the 'dx', here done with the '\,' command: + + \int_0^1 f(x)\,dx + + LaTeX provides the commands that follow for use in math mode. Many +of these spacing definitions are expressed in terms of the math unit +"mu". It is defined as 1/18em, where the em is taken from the current +math symbols family (*note Units of length::). Thus, a '\thickspace' is +something like 5/18 times the width of a 'M'. + +'\;' + + Synonym: '\thickspace'. Normally '5.0mu plus 5.0mu'. With the + 'amsmath' package, or as of the 2020-10-01 LaTeX release, can be + used in text mode as well as math mode; otherwise, in math mode + only. + +'\negthickspace' + Normally '-5.0mu plus 2.0mu minus 4.0mu'. With the 'amsmath' + package, or as of the 2020-10-01 LaTeX release, can be used in text + mode as well as math mode; otherwise, in math mode only. + +'\:' +'\>' + Synonym: '\medspace'. Normally '4.0mu plus 2.0mu minus 4.0mu'. + With the 'amsmath' package, or as of the 2020-10-01 LaTeX release, + can be used in text mode as well as math mode; before that, in math + mode only. + +'\negmedspace' + Normally '-4.0mu plus 2.0mu minus 4.0mu'. With the 'amsmath' + package, or as of the 2020-10-01 LaTeX release, can be used in text + mode as well as math mode; before that, in math mode only. + +'\,' + Synonym: '\thinspace'. Normally '3mu', which is 1/6em. Can be + used in both math mode and text mode (*note \thinspace & + \negthinspace::). + + This space is widely used, for instance between the function and + the infinitesimal in an integral '\int f(x)\,dx' and, if an author + does this, before punctuation in a displayed equation. + + The antiderivative is + \begin{equation} + 3x^{-1/2}+3^{1/2}\,. + \end{equation} + +'\!' + Synonym: '\negthinspace'. A negative thin space. Normally '-3mu'. + With the 'amsmath' package, or as of the 2020-10-01 LaTeX release, + can be used in text mode as well as math mode; otherwise, the '\!' + command is math mode only but the '\negthinspace' command has + always also worked in text mode (*note \thinspace & + \negthinspace::). + +'\quad' + This is 18mu, that is, 1em. This is often used for space + surrounding equations or expressions, for instance for the space + between two equations inside a 'displaymath' environment. It is + available in both text and math mode. + +'\qquad' + A length of 2 quads, that is, 36mu = 2em. It is available in both + text and math mode. + +* Menu: + +* \smash:: Eliminate height or depth of a subformula. +* \phantom & \vphantom & \hphantom:: Make empty box same size as argument. +* \mathstrut:: Add some vertical space to a formula. + + +File: latex2e.info, Node: \smash, Next: \phantom & \vphantom & \hphantom, Up: Spacing in math mode + +16.6.1 '\smash' +--------------- + +Synopsis: + + \smash{SUBFORMULA} + + Typeset SUBFORMULA as if its height and depth were zero. + + In this example the exponential is so tall that without the '\smash' +command LaTeX would separate its line from the line above it, and the +uneven line spacing might be unsightly. + + To compute the tetration $\smash{2^{2^{2^2}}}$, + evaluate from the top down, as $2^{2^4}=2^{16}=65536$. + + (Because of the '\smash' the printed expression could run into the +line above so you may want to wait until the final version of the +document to make such adjustments.) + + This pictures the effect of '\smash' by using '\fbox' to surround the +box that LaTeX will put on the line. The '\blackbar' command makes a +bar extending from 10 points below the baseline to 20 points above. + + \newcommand{\blackbar}{\rule[-10pt]{5pt}{30pt}} + \fbox{\blackbar} + \fbox{\smash{\blackbar}} + + The first box that LaTeX places is 20 points high and 10 points deep. +But the second box is treated by LaTeX as having zero height and zero +depth, despite that the ink printed on the page still extends well above +and below the line. + + The '\smash' command appears often in mathematics to adjust the size +of an element that surrounds a subformula. Here the first radical +extends below the baseline while the second lies just on the baseline. + + \begin{equation} + \sqrt{\sum_{0\leq k< n} f(k)} + \sqrt{\vphantom{\sum}\smash{\sum_{0\leq k< n}} f(k)} + \end{equation} + + Note the use of '\vphantom' to give the '\sqrt' command an argument +with the height of the '\sum' (*note \phantom & \vphantom & +\hphantom::). + + While most often used in mathematics, the '\smash' command can appear +in other contexts. However, it doesn't change into horizontal mode. So +if it starts a paragraph then you should first put a '\leavevmode', as +in the bottom line below. + + xxx xxx xxx + + \smash{yyy} % no paragraph indent + + \leavevmode\smash{zzz} % usual paragraph indent + + The package 'mathtools' has operators that provide even finer control +over smashing a subformula box. + + +File: latex2e.info, Node: \phantom & \vphantom & \hphantom, Next: \mathstrut, Prev: \smash, Up: Spacing in math mode + +16.6.2 '\phantom' & '\vphantom' & '\hphantom' +--------------------------------------------- + +Synopsis: + + \phantom{SUBFORMULA} + + or + + \vphantom{SUBFORMULA} + + or + + \hphantom{SUBFORMULA} + + The '\phantom' command creates a box with the same height, depth, and +width as SUBFORMULA, but empty. That is, this command causes LaTeX to +typeset the space but not fill it with the material. Here LaTeX will +put a blank line that is the correct width for the answer, but will not +show that answer. + + \begin{displaymath} + \int x^2\,dx=\mbox{\underline{$\phantom{(1/3)x^3+C}$}} + \end{displaymath} + + The '\vphantom' variant produces an invisible box with the same +vertical size as SUBFORMULA, the same height and depth, but having zero +width. And '\hphantom' makes a box with the same width as SUBFORMULA +but with zero height and depth. + + In this example, the tower of exponents in the second summand +expression is so tall that TeX places this expression further down than +its default. Without adjustment, the two summand expressions would be +at different levels. The '\vphantom' in the first expression tells TeX +to leave as much vertical room as it does for the tower, so the two +expressions come out at the same level. + + \begin{displaymath} + \sum_{j\in\{0,\ldots\, 10\}\vphantom{3^{3^{3^j}}}} + \sum_{i\in\{0,\ldots\, 3^{3^{3^j}}\}} i\cdot j + \end{displaymath} + + These commands are often used in conjunction with '\smash'. *Note +\smash::, which includes another example of '\vphantom'. + + The three phantom commands appear often but note that LaTeX provides +a suite of other commands to work with box sizes that may be more +convenient, including '\makebox' (*note \mbox & \makebox::) as well as +'\settodepth' (*note \settodepth::), '\settoheight' (*note +\settoheight::), and '\settowidth' (*note \settowidth::). In addition, +the 'mathtools' package has many commands that offer fine-grained +control over spacing. + + All three commands produce an ordinary box, without any special +mathematics status. So to do something like attaching a superscript you +should give it such a status, for example with the '\operatorname' +command from the package 'amsmath'. + + While most often used in mathematics, these three can appear in other +contexts. However, they don't cause LaTeX to change into horizontal +mode. So if one of these starts a paragraph then you should prefix it +with '\leavevmode'. + + +File: latex2e.info, Node: \mathstrut, Prev: \phantom & \vphantom & \hphantom, Up: Spacing in math mode + +16.6.3 '\mathstrut' +------------------- + +Synopsis: + + \mathstrut + + The analogue of '\strut' for mathematics. *Note \strut::. + + The input '$\sqrt{x} + \sqrt{x^i}$' gives output where the second +radical is taller than the first. To add extra vertical space without +any horizontal space, so that the two have the same height, use +'$\sqrt{x\mathstrut} + \sqrt{x^i\mathstrut}$'. + + The '\mathstrut' command adds the vertical height of an open +parenthesis, '(', but no horizontal space. It is defined as +'\vphantom{(}', so see *note \phantom & \vphantom & \hphantom:: for +more. An advantage over '\strut' is that '\mathstrut' adds no depth, +which is often the right thing for formulas. Using the height of an +open parenthesis is just a convention; for complete control over the +amount of space, use '\rule' with a width of zero. *Note \rule::. + + +File: latex2e.info, Node: Math styles, Next: Math miscellany, Prev: Spacing in math mode, Up: Math formulas + +16.7 Math styles +================ + +TeX's rules for typesetting a formula depend on the context. For +example, inside a displayed equation, the input '\sum_{0\leq +i and '\' are equivalent +to backslash-space, '\ '. + + Please also note that in order to allow source code indentation, +under normal circumstances, TeX ignores leading blanks in a line. So +the following prints 'one word': + + one + word + +where the white space between 'one' and 'word' is produced by the +newline after 'one', not by the space before 'word'. + + +File: latex2e.info, Node: ~, Next: \thinspace & \negthinspace, Prev: \(SPACE), Up: Spaces + +19.7 '~' +======== + +Synopsis: + + BEFORE~AFTER + + The "tie" character, '~', produces a space between BEFORE and AFTER +at which the line will not be broken. By default the white space has +length 3.33333pt plus 1.66666pt minus 1.11111pt (*note Lengths::). + + Note that the word 'tie' has this meaning in the TeX/Texinfo +community; this differs from the typographic term "tie", which is a +diacritic in the shape of an arc, called a "tie-after" accent in 'The +TeXbook'. + + Here LaTeX will not break the line between the final two words: + + Thanks to Prof.~Lerman. + +In addition, despite the period, LaTeX does not use the end-of-sentence +spacing (*note \@::). + + Ties prevent the end of line separation of things where that could +cause confusion. They also still allow hyphenation (of either of the +tied words), so they are generally preferable to putting consecutive +words in an '\mbox' (*note \mbox & \makebox::). + + They are also matters of taste, sometimes alarmingly dogmatic taste, +among readers. Nevertheless, here are some usage models, many of them +from 'The TeXbook'. + + * Between an enumerator label and number, such as in references: + 'Chapter~12', or 'Theorem~\ref{th:Wilsons}', or + 'Figure~\ref{fig:KGraph}'. + + * When cases are enumerated inline: '(b)~Show that $f(x)$ is + (1)~continuous, and (2)~bounded'. + + * Between a number and its unit: '$745.7.8$~watts' (the 'siunitx' + package has a special facility for this) or '144~eggs'. This + includes between a month and day number in a date: 'October~12' or + '12~Oct'. In general, in any expressions where numbers and + abbreviations or symbols are separated by a space: 'AD~565', or + '2:50~pm', or 'Boeing~747', or '268~Plains Road', or + '\$$1.4$~billion'. Other common choices here are a thin space + (*note \thinspace & \negthinspace::) and no space at all. + + * When mathematical phrases are rendered in words: 'equals~$n$', or + 'less than~$\epsilon$', or 'given~$X$', or 'modulo~$p^e$ for all + large~$n$' (but compare 'is~$15$' with 'is $15$~times the height'). + Between mathematical symbols in apposition with nouns: + 'dimension~$d$' or 'function~$f(x)$' (but compare 'with length + $l$~or more'). When a symbol is a tightly bound object of a + preposition: 'of~$x$', or 'from $0$ to~$1$', or 'in common + with~$m$'. + + * Between symbols in series: '$1$,~$2$, or~$3$' or '$1$,~$2$, + \ldots,~$n$'. + + * Between a person's given names and between multiple surnames: + 'Donald~E. Knuth', or 'Luis~I. Trabb~Pardo', or 'Charles~XII'--but + you must give TeX places to break the line so you might do 'Charles + Louis Xavier~Joseph de~la Vall\'ee~Poussin'. + + +File: latex2e.info, Node: \thinspace & \negthinspace, Next: \/, Prev: ~, Up: Spaces + +19.8 '\thinspace' & '\negthinspace' +=================================== + +Synopsis, one of: + + \thinspace + \negthinspace + + These produce unbreakable and unstretchable spaces of 1/6em and +-1/6em, respectively. These are the text mode equivalents of '\,' and +'\!' (*note Spacing in math mode/\thinspace::). + + You can use '\,' as a synonym for '\thinspace' in text mode. + + One common use of '\thinspace' is as the space between nested quotes: + + Killick replied, ``I heard the Captain say, `Ahoy there.'\thinspace'' + +Another use is that some style guides call for a '\thinspace' between an +ellipsis and a sentence ending period (other style guides, think the +three dots and/or four dots are plenty). Another style-specific use is +between initials, as in 'D.\thinspace E.\ Knuth'. + + LaTeX provides a variety of similar spacing commands for math mode +(*note Spacing in math mode::). With the 'amsmath' package, or as of +the 2020-10-01 LaTeX release, they can be used in text mode as well as +math mode, including '\!' for '\negthinspace'; but otherwise, they are +available only in math mode. + + +File: latex2e.info, Node: \/, Next: \hrulefill & \dotfill, Prev: \thinspace & \negthinspace, Up: Spaces + +19.9 '\/' +========= + +Synopsis: + + BEFORE-CHARACTER\/AFTER-CHARACTER + + Insert an "italic correction", a small space defined by the font +designer for each character (possibly zero), to avoid the character +colliding with whatever follows. When you use '\/', LaTeX takes the +correction from the font metric file, scales it by any scaling that has +been applied to the font, and then inserts that much horizontal space. + + Here, were it not for the '\/', the BEFORE-CHARACTER italic f would +hit the AFTER-CHARACTER roman H + + \newcommand{\companylogo}{{\it f}\/H} + +because the italic letter f leans far to the right. + + If AFTER-CHARACTER is a period or comma then don't insert an italic +correction since those punctuation symbols are so low to the baseline +already. However, with semicolons or colons, as well as with normal +letters, the italic correction can help. It is typically used between a +switch from italic or slanted fonts to an upright font. + + When you use commands such as '\emph' and '\textit' and '\textsl' to +change fonts, LaTeX automatically inserts the italic correction when +needed (*note Font styles::). However, declarations such as '\em' and +'\itshape' and '\slshape' do not automatically insert italic +corrections. + + Upright characters can also have an italic correction. An example +where this is needed is the name 'pdf\/\TeX'. However, most upright +characters have a zero italic correction. Some font creators do not +include italic correction values even for italic fonts. + + Technically, LaTeX uses another font-specific value, the so-called +"slant parameter" (namely '\fontdimen1'), to determine whether to +possibly insert an italic correction, rather than tying the action to +particular font commands. + + There is no concept of italic correction in math mode; math spacing +is done in a different way. + + +File: latex2e.info, Node: \hrulefill & \dotfill, Next: \bigskip & \medskip & \smallskip, Prev: \/, Up: Spaces + +19.10 '\hrulefill' & '\dotfill' +=============================== + +Synopsis, one of: + + \hrulefill + \dotfill + + Produce an infinite horizontal rubber length (*note Lengths::) that +LaTeX fills with a rule (that is, a line) or with dots, instead of white +space. + + This outputs a line 2 inches long. + + Name:~\makebox[2in]{\hrulefill} + +This example, when placed between blank lines, creates a paragraph that +is left and right justified and where the middle is filled with evenly +spaced dots. + + \noindent John Aubrey, RN \dotfill{} Melbury Lodge + + To make the rule or dots go to the line's end use '\null' at the +start or end. + + To change the rule's thickness, copy the definition and adjust it, as +here + + \renewcommand{\hrulefill}{% + \leavevmode\leaders\hrule height 1pt\hfill\kern0pt } + +which changes the default thickness of 0.4pt to 1pt. Similarly, adjust +the dot spacing as with + + \renewcommand{\dotfill}{% + \leavevmode\cleaders\hbox to 1.00em{\hss .\hss }\hfill\kern0pt } + +which changes the default length of 0.33em to 1.00em. + + This example produces a line for a signature. + + \begin{minipage}{4cm} + \centering + \hrulefill\\ + Signed + \end{minipage} + +The line is 4cm long. + + +File: latex2e.info, Node: \bigskip & \medskip & \smallskip, Next: \bigbreak & \medbreak & \smallbreak, Prev: \hrulefill & \dotfill, Up: Spaces + +19.11 '\bigskip' & '\medskip' & '\smallskip' +============================================ + +Synopsis, one of: + + \bigskip + \medskip + \smallskip + + Produce an amount of vertical space, large or medium-sized or small. +These commands are fragile (*note \protect::). + + Here the skip suggests the passage of time (from The Golden Ocean by +O'Brian). + + Mr Saumarez would have something rude to say to him, no doubt: he + was at home again, and it was delightful. + + \bigskip + ``A hundred and fifty-seven miles and one third, in twenty-four hours,'' + said Peter. + + Each command is associated with a length defined in the document +class file. + +'\bigskip' + The same as '\vspace{\bigskipamount}', ordinarily about one line + space, with stretch and shrink. The default for the 'book' and + 'article' classes is '12pt plus 4pt minus 4pt'. + +'\medskip' + The same as '\vspace{\medskipamount}', ordinarily about half of a + line space, with stretch and shrink. The default for the 'book' + and 'article' classes is '6pt plus 2pt minus 2pt'. + +'\smallskip' + The same as '\vspace{\smallskipamount}', ordinarily about a quarter + of a line space, with stretch and shrink. The default for the + 'book' and 'article' classes is '3pt plus 1pt minus 1pt'. + + Because each command is a '\vspace', if you use it in mid-paragraph +then it will insert its vertical space between the line in which you use +it and the next line, not necessarily at the place that you use it. So +these are best between paragraphs. + + The commands '\bigbreak', '\medbreak', and '\smallbreak' are similar +but also suggest to LaTeX that this is a good place to put a page break +(*note \bigbreak & \medbreak & \smallbreak::. + + +File: latex2e.info, Node: \bigbreak & \medbreak & \smallbreak, Next: \strut, Prev: \bigskip & \medskip & \smallskip, Up: Spaces + +19.12 '\bigbreak' & '\medbreak' & '\smallbreak' +=============================================== + +Synopsis, one of: + + \bigbreak + \medbreak + \smallbreak + + Produce a vertical space that is big or medium-sized or small, and +suggest to LaTeX that this is a good place to break the page. (The +associated penalties are -200, -100, and -50.) + + *Note \bigskip & \medskip & \smallskip::, for more. These commands +produce the same vertical space but differ in that they also remove a +preceding vertical space if it is less than what they would insert (as +with '\addvspace'). In addition, they terminate a paragraph where they +are used: this example + + abc\bigbreak def ghi + + jkl mno pqr + +will output three paragraphs, the first ending in 'abc' and the second +starting, after an extra vertical space and a paragraph indent, with +'def'. + + +File: latex2e.info, Node: \strut, Next: \vspace, Prev: \bigbreak & \medbreak & \smallbreak, Up: Spaces + +19.13 '\strut' +============== + +Synopsis: + + \strut + + Ensure that the current line has height at least '0.7\baselineskip' +and depth at least '0.3\baselineskip'. Essentially, LaTeX inserts into +the line a rectangle having zero width, +'\rule[-0.3\baselineskip]{0pt}{\baselineskip}' (*note \rule::). The +'\baselineskip' changes with the current font or fontsize. + + In this example the '\strut' keeps the box inside the frame from +having zero height. + + \setlength{\fboxsep}{0pt}\framebox[2in]{\strut} + + This example has four lists. In the first there is a much bigger gap +between items 2 and 3 than there is between items 1 and 2. The second +list fixes that with a '\strut' at the end of its first item's second +line. + + \setlength{\fboxsep}{0pt} + \noindent\begin{minipage}[t]{0.2\linewidth} + \begin{enumerate} + \item \parbox[t]{15pt}{test \\ test} + \item test + \item test + \end{enumerate} + \end{minipage}% + \begin{minipage}[t]{0.2\linewidth} + \begin{enumerate} + \item \parbox[t]{15pt}{test \\ test\strut} + \item test + \item test + \end{enumerate} + \end{minipage}% + \begin{minipage}[t]{0.2\linewidth} + \begin{enumerate} + \item \fbox{\parbox[t]{15pt}{test \\ test}} + \item \fbox{test} + \item \fbox{test} + \end{enumerate} + \end{minipage}% + \begin{minipage}[t]{0.2\linewidth} + \begin{enumerate} + \item \fbox{\parbox[t]{15pt}{test \\ test\strut}} + \item \fbox{test} + \item \fbox{test} + \end{enumerate} + \end{minipage}% + +The final two lists use '\fbox' to show what's happening. The first +item '\parbox' of the third list goes only to the bottom of its second +'test', which happens not have any characters that descend below the +baseline. The fourth list adds the strut that gives the needed extra +below-baseline space. + + The '\strut' command is often useful in graphics, such as in 'TikZ' +or 'Asymptote'. For instance, you may have a command such as +'\graphnode{NODE-NAME}' that fits a circle around NODE-NAME. However, +unless you are careful the NODE-NAME's 'x' and 'y' will produce +different-diameter circles because the characters are different sizes. +A careful '\graphnode' might insert a '\strut', then NODE-NAME, and then +draw the circle. + + The general approach of using a zero width '\rule' is useful in many +circumstances. In this table, the zero-width rule keeps the top of the +first integral from hitting the '\hline'. Similarly, the second rule +keeps the second integral from hitting the first. + + \begin{tabular}{rl} + \textsc{Integral} &\textsc{Value} \\ + \hline + $\int_0^x t\, dt$ &$x^2/2$ \rule{0em}{2.5ex} \\ + $\int_0^x t^2\, dt$ &$x^3/3$ \rule{0em}{2.5ex} + \end{tabular} + +(Although the line-ending double backslash command has an available +optional argument to change the corresponding baseline skip, that won't +solve this issue. Changing the first double backslash to something like +'\\[2.5ex]' will put more room between the header line and the '\hline' +rule, and the integral would still hit the rule.) + + +File: latex2e.info, Node: \vspace, Next: \vfill, Prev: \strut, Up: Spaces + +19.14 '\vspace' +=============== + +Synopsis, one of: + + \vspace{LENGTH} + \vspace*{LENGTH} + + Add the vertical space LENGTH. The LENGTH can be positive, negative, +or zero. It is a rubber length--it may contain a 'plus' or 'minus' +component (*note Lengths::). + + This puts space between the two paragraphs. + + And I slept. + + \vspace{1ex plus 0.5ex} + The new day dawned cold. + +(*Note \bigskip & \medskip & \smallskip::, for common inter-paragraph +spaces.) + + The '*'-form inserts vertical space that is non-discardable. More +precisely, LaTeX discards vertical space at a page break and the +'*'-form causes the space to stay. This example leaves space between +the two questions. + + Question: Find the integral of \( 5x^4+5 \). + + \vspace*{2cm plus 0.5cm} + Question: Find the derivative of \( x^5+5x+9 \). + +That space will be present even if the page break happens to fall +between the questions. + + If you use '\vspace' in the middle of a paragraph (i.e., in +horizontal mode) then the space is inserted after the line containing +the '\vspace' command; it does not start a new paragraph at the +'\vspace' command. + + In this example the two questions will be evenly spaced vertically on +the page, with at least one inch of space below each. + + \begin{document} + 1) Who put the bomp in the bomp bah bomp bah bomp? + \vspace{1in plus 1fill} + + 2) Who put the ram in the rama lama ding dong? + \vspace{1in plus 1fill} + \end{document} + + +File: latex2e.info, Node: \vfill, Next: \addvspace, Prev: \vspace, Up: Spaces + +19.15 '\vfill' +============== + +Synopsis: + + \vfill + + End the current paragraph and insert a vertical rubber length that is +infinite, so it can stretch or shrink as far as needed (*note +Lengths::). + + It is often used in the same way as '\vspace{\fill}', except that +'\vfill' ends the current paragraph whereas '\vspace{\fill}' adds the +infinite vertical space below its line, irrespective of the paragraph +structure. In both cases that space will disappear at a page boundary; +to circumvent this see the starred option in *note \vspace::. + + In this example the page is filled, so the top and bottom lines +contain the text 'Lost Dog!' and the second 'Lost Dog!' is exactly +halfway between them. + + \begin{document} + Lost Dog! + \vfill + Lost Dog! % perfectly in the middle + \vfill + Lost Dog! + \end{document} + + +File: latex2e.info, Node: \addvspace, Prev: \vfill, Up: Spaces + +19.16 '\addvspace' +================== + +Synopsis: + + \addvspace{VERT-LENGTH} + + Add a vertical space of VERT-LENGTH. However, if there are two or +more '\addvspace''s in a sequence then together they only add the space +needed to make the natural length equal to the maximum of the +VERT-LENGTH's in that sequence. This command is fragile (*note +\protect::). The VERT-LENGTH is a rubber length (*note Lengths::). + + This example illustrates. The 'picture' draws a scale over which to +rules are placed. In a standard LaTeX article the length +'\baselineskip' is 12pt. As shown by the scale, the two rules are 22pt +apart: the sum of the '\baselineskip' and the 10pt from the first +'\addvspace'. + + \documentclass{article} + \usepackage{color} + \begin{document} + \setlength{\unitlength}{2pt}% + \noindent\begin{picture}(0,0)% + \multiput(0,0)(0,-1){25}{{\color{blue}\line(1,0){1}}} + \multiput(0,0)(0,-5){6}{{\color{red}\line(1,0){2}}} + \end{picture}% + \rule{0.25\linewidth}{0.1pt}% + \par\addvspace{10pt}% \addvspace{20pt}% + \par\noindent\rule{0.25\linewidth}{0.1pt}% + \end{document} + +Now uncomment the second '\addvspace'. It does not make the gap 20pt +longer; instead the gap is the sum of '\baselineskip' and 20pt. So +'\addvspace' in a sense does the opposite of its name--it makes sure +that multiple vertical spaces do not accumulate, but instead that only +the largest one is used. + + LaTeX uses this command to adjust the vertical space above or below +an environment that starts a new paragraph. For instance, a 'theorem' +environment begins and ends with '\addvspace' so that two consecutive +'theorem''s are separated by one vertical space, not two. + + A error 'Something's wrong--perhaps a missing \item' pointing to an +'\addvspace' means that you were not in vertical mode when you hit this +command. One way to change that is to precede '\addvspace' with a +'\par' command (*note \par::), as in the above example. + + +File: latex2e.info, Node: Boxes, Next: Color, Prev: Spaces, Up: Top + +20 Boxes +******** + +At its core, LaTeX puts things in boxes and then puts the boxes on a +page. So these commands are central. + + There are many packages on CTAN that are useful for manipulating +boxes. One useful adjunct to the commands here is 'adjustbox'. + +* Menu: + +* \mbox & \makebox:: Horizontal boxes. +* \fbox & \framebox:: Put a frame around a box. +* \parbox:: Box with text in paragraph mode. +* \raisebox:: Raise or lower text. +* \sbox & \savebox:: Like '\makebox' but save the text for later. +* lrbox:: Environment version of '\sbox'. +* \usebox:: Print saved text. + + +File: latex2e.info, Node: \mbox & \makebox, Next: \fbox & \framebox, Up: Boxes + +20.1 '\mbox' & '\makebox' +========================= + +Synopsis, one of: + + \mbox{TEXT} + \makebox{TEXT} + \makebox[WIDTH]{TEXT} + \makebox[WIDTH][POSITION]{TEXT} + + Create a box, a container for material. The TEXT is typeset in LR +mode (*note Modes::) so it is not broken into lines. The '\mbox' +command is robust, while '\makebox' is fragile (*note \protect::). + + Because 'text' is not broken into lines, you can use '\mbox' to +prevent hyphenation. In this example, LaTeX will not hyphenate the tank +name, 'T-34'. + + The soviet tank \mbox{T-34} is a symbol of victory against nazism. + + The first two command synopsis versions, '\mbox' and '\makebox', are +roughly equivalent. They create a box just wide enough to contain the +TEXT. (They are like plain TeX's '\hbox'.) + + In the third version the optional argument WIDTH specifies the width +of the box. Note that the space occupied by the text need not equal the +width of the box. For one thing, TEXT can be too small; this creates a +full-line box + + \makebox[\linewidth]{Chapter Exam} + +with 'Chapter Exam' centered. But TEXT can also be too wide for WIDTH. +See the example below of zero-width boxes. + + In the WIDTH argument you can use the following lengths that refer to +the dimension of the box that LaTeX gets on typesetting TEXT: '\depth', +'\height', '\width', '\totalheight' (this is the box's height plus its +depth). For example, to make a box with the text stretched to double +the natural size you can say this. + + \makebox[2\width]{Get a stretcher} + + For the fourth command synopsis version the optional argument +POSITION gives position of the text within the box. It may take the +following values: + +'c' + The TEXT is centered (default). + +'l' + The TEXT is flush left. + +'r' + Flush right. + +'s' + Stretch the interword space in TEXT across the entire WIDTH. The + TEXT must contain stretchable space for this to work. For + instance, this could head a press release: + '\noindent\makebox[\textwidth][s]{\large\hfil IMMEDIATE\hfil + RELEASE\hfil}' + + A common use of '\makebox' is to make zero-width text boxes. This +puts the value of the quiz questions to the left of those questions. + + \newcommand{\pts}[1]{\makebox[0em][r]{#1 points\hspace*{1em}}} + \pts{10}What is the air-speed velocity of an unladen swallow? + + \pts{90}An African or European swallow? + + + The right edge of the output '10 points ' (note the ending space +after 'points') will be just before the 'What'. You can use '\makebox' +similarly when making graphics, such as in 'TikZ' or 'Asymptote', where +you put the edge of the text at a known location, regardless of the +length of that text. + + For boxes with frames see *note \fbox & \framebox::. For colors +see *note Colored boxes::. + + There is a related version of '\makebox' that is used within the +'picture' environment, where the length is given in terms of +'\unitlength' (*note \makebox (picture)::). + + As TEXT is typeset in LR mode, neither a double backslash '\\' nor +'\par' will give you a new line; for instance '\makebox{abc def \\ ghi}' +outputs 'abc defghi' while '\makebox{abc def \par ghi}' outputs 'abc def +ghi', both on a single line. To get multiple lines see *note \parbox:: +and *note minipage::. + + +File: latex2e.info, Node: \fbox & \framebox, Next: \parbox, Prev: \mbox & \makebox, Up: Boxes + +20.2 '\fbox' & '\framebox' +========================== + +Synopses, one of: + + \fbox{TEXT} + \framebox{TEXT} + \framebox[WIDTH]{TEXT} + \framebox[WIDTH][POSITION]{TEXT} + + Create a box with an enclosing frame, four rules surrounding the +TEXT. These commands are the same as '\mbox' and '\makebox' except for +the frame (*note \mbox & \makebox::). The '\fbox' command is robust, +the '\framebox' command is fragile (*note \protect::). + + \fbox{Warning! No work shown, no credit given.} + +LaTeX puts the text into a box, the text cannot be hyphenated. Around +that box, separated from it by a small gap, are four rules making a +frame. + + The first two command invocations, '\fbox{...}' and '\framebox{...}', +are roughly the same. As to the third and fourth invocations, the +optional arguments allow you to specify the box width as WIDTH and the +position of the text inside that box as POSITION. *Note \mbox & +\makebox::, for the full description but here is an example creating an +empty box that is 1/4in wide. + + \setlength{\fboxsep}{0pt}\framebox[0.25in]{\strut}} + +The '\strut' ensures a total height of '\baselineskip' (*note \strut::). + + These parameters determine the frame layout. + +'\fboxrule' + The thickness of the rules around the enclosed box. The default is + 0.2pt. Change it with a command such as + '\setlength{\fboxrule}{0.8pt}' (*note \setlength::). + +'\fboxsep' + The distance from the frame to the enclosed box. The default is + 3pt. Change it with a command such as '\setlength{\fboxsep}{0pt}' + (*note \setlength::). Setting it to 0pt is useful sometimes: this + will put a frame around the picture with no white border. + + {\setlength{\fboxsep}{0pt}% + \framebox{% + \includegraphics[width=0.5\textwidth]{prudence.jpg}}} + + The extra curly braces keep the effect of the '\setlength' local. + + As with '\mbox' and '\makebox', LaTeX will not break lines in TEXT. +But this example has LaTeX break lines to make a paragraph, and then +frame the result. + + \framebox{% + \begin{minipage}{0.6\linewidth} + My dear, here we must run as fast as we can, just to stay in place. + And if you wish to go anywhere you must run twice as fast as that. + \end{minipage}} + + *Note Colored boxes::, for colors other than black and white. + + The 'picture' environment has a version of the '\framebox' command +where the units depend on 'picture''s '\unitlength' (*note \framebox +(picture)::). + + +File: latex2e.info, Node: \parbox, Next: \raisebox, Prev: \fbox & \framebox, Up: Boxes + +20.3 '\parbox' +============== + +Synopses, one of: + + \parbox{WIDTH}{CONTENTS} + \parbox[POSITION]{WIDTH}{CONTENTS} + \parbox[POSITION][HEIGHT]{WIDTH}{CONTENTS} + \parbox[POSITION][HEIGHT][INNER-POS]{WIDTH}{CONTENTS} + + Produce a box of text that is WIDTH wide. Use this command to make a +box of small pieces of text, of a single paragraph. This command is +fragile (*note \protect::). + + \begin{picture}(0,0) + ... + \put(1,2){\parbox{1.75in}{\raggedright Because the graph is a line on + this semilog paper, the relationship is + exponential.}} + \end{picture} + + The CONTENTS are processed in a text mode (*note Modes::) so LaTeX +will break lines to make a paragraph. But it won't make multiple +paragraphs; for that, use a 'minipage' environment (*note minipage::). + + The options for '\parbox' (except for CONTENTS) are the same as those +for 'minipage'. For convenience a summary of the options is here but +see *note minipage:: for a complete description. + + There are two required arguments. The WIDTH is a rigid length (*note +Lengths::). It sets the width of the box into which LaTeX typesets +CONTENTS. The CONTENTS is the text that is placed in that box. It +should not have any paragraph-making components. + + There are three optional arguments, POSITION, HEIGHT, and INNER-POS. +The POSITION gives the vertical alignment of the "parbox" with respect +to the surrounding material. The supported values are 'c' or 'm' to +make the vertical center of the parbox lines up with the center of the +adjacent text line (this is the default), or 't' to match the top line +of the parbox with the baseline of the surrounding material, or 'b' to +match the bottom line. + + The optional argument HEIGHT overrides the natural height of the box. + + The optional argument INNER-POS controls the placement of CONTENT +inside the 'parbox'. Its default is the value of POSITION. Its +possible values are: 't' to put the CONTENT at the top of the box, 'c' +to put it in the vertical center, 'b' to put it at the bottom of the +box, and 's' to stretch it out vertically (for this, the text must +contain vertically stretchable space). + + +File: latex2e.info, Node: \raisebox, Next: \sbox & \savebox, Prev: \parbox, Up: Boxes + +20.4 '\raisebox' +================ + +Synopsis, one of: + + \raisebox{DISTANCE}{TEXT} + \raisebox{DISTANCE}[HEIGHT]{TEXT} + \raisebox{DISTANCE}[HEIGHT][DEPTH]{TEXT} + + Raise or lower TEXT. This command is fragile (*note \protect::). + + This example makes a command for denoting the restriction of a +function by lowering the vertical bar symbol. + + \newcommand*\restricted[1]{\raisebox{-.5ex}{$|$}_{#1}} + $f\restricted{A}$ + + The first mandatory argument DISTANCE specifies how far to raise the +second mandatory argument TEXT. This is a rigid length (*note +Lengths::). If it is negative then it lowers TEXT. The TEXT is +processed in LR mode so it cannot contain line breaks (*note Modes::). + + The optional arguments HEIGHT and DEPTH are dimensions. If they are +specified, they override the natural height and depth of the box LaTeX +gets by typesetting TEXT. + + In the arguments DISTANCE, HEIGHT, and DEPTH you can use the +following lengths that refer to the dimension of the box that LaTeX gets +on typesetting TEXT: '\depth', '\height', '\width', '\totalheight' (this +is the box's height plus its depth). + + This will align two graphics on their top (*note Graphics::). + + \usepackage{graphicx,calc} % in preamble + ... + \begin{center} + \raisebox{1ex-\height}{% + \includegraphics[width=0.4\linewidth]{lion.png}} + \qquad + \raisebox{1ex-\height}{% + \includegraphics[width=0.4\linewidth]{meta.png}} + \end{center} + +The first '\height' is the height of 'lion.png' while the second is the +height of 'meta.png'. + + +File: latex2e.info, Node: \sbox & \savebox, Next: lrbox, Prev: \raisebox, Up: Boxes + +20.5 '\sbox' & '\savebox' +========================= + +Synopsis, one of: + + \sbox{BOX-CMD}{TEXT} + \savebox{BOX-CMD}{TEXT} + \savebox{BOX-CMD}[WIDTH]{TEXT} + \savebox{BOX-CMD}[WIDTH][POS]{TEXT} + + Typeset TEXT just as with '\makebox' (*note \mbox & \makebox::) +except that LaTeX does not output it but instead saves it in a box +register referred to by a variable named BOX-CMD. The variable name +BOX-CMD begins with a backslash, '\'. You must have previously +allocated the box register BOX-CMD with '\newsavebox' (*note +\newsavebox::). The '\sbox' command is robust while '\savebox' is +fragile (*note \protect::). + + This creates and uses a box register. + + \newsavebox{\fullname} + \sbox{\fullname}{John Jacob Jingleheimer Schmidt} + ... + \usebox{\fullname}! His name is my name, too! + Whenever we go out, the people always shout! + There goes \usebox{\fullname}! Ya da da da da da da. + +One advantage of using and reusing a box register over a '\newcommand' +macro variable is efficiency, that LaTeX need not repeatedly retypeset +the contents. See the example below. + + The first two command invocations, '\sbox{BOX-CMD}{TEXT}' and +'\savebox{BOX-CMD}{TEXT}', are roughly equivalent. As to the third and +fourth, the optional arguments allow you to specify the box width as +WIDTH, and the position of the text inside that box as POSITION. *Note +\mbox & \makebox::, for the full description. + + In the '\sbox' and '\savebox' commands the TEXT is typeset in LR mode +so it does not have line breaks (*note Modes::). If you use these then +LaTeX doesn't give you an error but it ignores what you want: if you +enter '\sbox{\newreg}{test \\ test}' and '\usebox{\newreg}' then you get +'testtest', while if you enter '\sbox{\newreg}{test \par test}' and +'\usebox{\newreg}' then you get 'test test', but no error or warning. +To fix this use a '\parbox' or 'minipage' as here. + + \newsavebox{\areg} + \savebox{\areg}{% + \begin{minipage}{\linewidth} + \begin{enumerate} + \item First item + \item Second item + \end{enumerate} + \end{minipage}} + ... + \usebox{\areg} + + As an example of the efficiency of reusing a register's contents, +this puts the same picture on each page of the document by putting it in +the header. LaTeX only typesets it once. + + \usepackage{graphicx} % all this in the preamble + \newsavebox{\sealreg} + \savebox{\sealreg}{% + \setlength{\unitlength}{1in}% + \begin{picture}(0,0)% + \put(1.5,-2.5){% + \begin{tabular}{c} + \includegraphics[height=2in]{companylogo.png} \\ + Office of the President + \end{tabular}} + \end{picture}% + } + \markright{\usebox{\sealreg}} + \pagestyle{headings} + +The 'picture' environment is good for fine-tuning the placement. + + If the register '\noreg' has not already been defined then you get +something like 'Undefined control sequence. \noreg'. + + +File: latex2e.info, Node: lrbox, Next: \usebox, Prev: \sbox & \savebox, Up: Boxes + +20.6 'lrbox' +============ + +Synopsis: + + \begin{lrbox}{BOX-CMD} + TEXT + \end{lrbox} + + This is the environment form of the '\sbox' and '\savebox' commands, +and is equivalent to them. *Note \sbox & \savebox::, for the full +description. + + The TEXT inside the environment is saved in the box register referred +to by variable 'BOX-CMD'. The variable name BOX-CMD must begin with a +backslash, '\'. You must allocate this box register in advance with +'\newsavebox' (*note \newsavebox::). In this example the environment is +convenient for entering the 'tabular'. + + \newsavebox{\jhreg} + \begin{lrbox}{\jhreg} + \begin{tabular}{c} + \includegraphics[height=1in]{jh.png} \\ + Jim Hef{}feron + \end{tabular} + \end{lrbox} + ... + \usebox{\jhreg} + + +File: latex2e.info, Node: \usebox, Prev: lrbox, Up: Boxes + +20.7 '\usebox' +============== + +Synopsis: + + \usebox{BOX-CMD} + + Produce the box most recently saved in the box register BOX-CMD by +the commands '\sbox' or '\savebox', or the 'lrbox' environment. For +more information and examples, *note \sbox & \savebox::. (Note that the +variable name BOX-CMD starts with a backslash, '\'.) This command is +robust (*note \protect::). + + +File: latex2e.info, Node: Color, Next: Graphics, Prev: Boxes, Up: Top + +21 Color +******** + +You can add color to text, rules, etc. You can also have color in a box +or on an entire page and write text on top of it. + + Color support comes as an additional package. So put +'\usepackage{color}' in your document preamble to use the commands +described here. + + Many other packages also supplement LaTeX's color abilities. +Particularly worth mentioning is 'xcolor', which is widely used and +significantly extends the capabilities described here, including adding +'HTML' and 'Hsb' color models. + +* Menu: + +* Color package options:: Options when you load the standard package. +* Color models:: How colors are represented. +* Commands for color:: The available commands. + + +File: latex2e.info, Node: Color package options, Next: Color models, Up: Color + +21.1 'color' package options +============================ + +Synopsis (must be in the document preamble): + + \usepackage[COMMA-SEPARATED OPTION LIST]{color} + + When you load the 'color' package there are two kinds of available +options. + + The first specifies the "printer driver". LaTeX doesn't contain +information about different output systems but instead depends on +information stored in a file. Normally you should not specify the +driver option in the document, and instead rely on your system's +default. One advantage of this is that it makes the document portable +across systems. For completeness we include a list of the drivers. The +currently relevant ones are: 'dvipdfmx', 'dvips', 'dvisvgm', 'luatex', +'pdftex', 'xetex'. The two 'xdvi' and 'oztex' are essentially aliases +for 'dvips' (and 'xdvi' is monochrome). Ones that should not be used +for new systems are: 'dvipdf', 'dvipdfm', 'dviwin', 'dvipsone', 'emtex', +'pctexps', 'pctexwin', 'pctexhp', 'pctex32', 'truetex', 'tcidvi', 'vtex' +(and 'dviwindo' is an alias for 'dvipsone'). + + The second kind of options, beyond the drivers, are below. + +'monochrome' + Disable the color commands, so that they do not generate errors but + do not generate color either. + +'dvipsnames' + Make available a list of 68 color names that are often used, + particularly in legacy documents. These color names were + originally provided by the 'dvips' driver, giving the option name. + +'nodvipsnames' + Do not load that list of color names, saving LaTeX a tiny amount of + memory space. + + +File: latex2e.info, Node: Color models, Next: Commands for color, Prev: Color package options, Up: Color + +21.2 Color models +================= + +A "color model" is a way of representing colors. LaTeX's capabilities +depend on the printer driver. However, the 'pdftex', 'xetex', and +'luatex' printer drivers are today by far the most commonly used. The +models below work for those drivers. All but one of these is also +supported by essentially all other printer drivers used today. + + Note that color combination can be additive or subtractive. Additive +mixes colors of light, so that for instance combining full intensities +of red, green, and blue produces white. Subtractive mixes pigments, +such as with inks, so that combining full intensity of cyan, magenta, +and yellow makes black. + +'cmyk' + A comma-separated list with four real numbers between 0 and 1, + inclusive. The first number is the intensity of cyan, the second + is magenta, and the others are yellow and black. A number value of + 0 means minimal intensity, while a 1 is for full intensity. This + model is often used in color printing. It is a subtractive model. + +'gray' + A single real number between 0 and 1, inclusive. The colors are + shades of grey. The number 0 produces black while 1 gives white. + +'rgb' + A comma-separated list with three real numbers between 0 and 1, + inclusive. The first number is the intensity of the red component, + the second is green, and the third the blue. A number value of 0 + means that none of that component is added in, while a 1 means full + intensity. This is an additive model. + +'RGB' + ('pdftex', 'xetex', 'luatex' drivers) A comma-separated list with + three integers between 0 and 255, inclusive. This model is a + convenience for using 'rgb' since outside of LaTeX colors are often + described in a red-green-blue model using numbers in this range. + The values entered here are converted to the 'rgb' model by + dividing by 255. + +'named' + Colors are accessed by name, such as 'PrussianBlue'. The list of + names depends on the driver, but all support the names 'black', + 'blue', 'cyan', 'green', 'magenta', 'red', 'white', and 'yellow' + (See the 'dvipsnames' option in *note Color package options::). + + +File: latex2e.info, Node: Commands for color, Prev: Color models, Up: Color + +21.3 Commands for color +======================= + +These are the commands available with the 'color' package. + +* Menu: + +* Define colors:: Give a color a name. +* Colored text:: Text or rules in color. +* Colored boxes:: A box of color, to write over. +* Colored pages:: A whole page colored. + + +File: latex2e.info, Node: Define colors, Next: Colored text, Up: Commands for color + +21.3.1 Define colors +-------------------- + +Synopsis: + + \definecolor{NAME}{MODEL}{SPECIFICATION} + + Give the name NAME to the color. For example, after this + + \definecolor{silver}{rgb}{0.75,0.75,0.74} + +you can use that color name with 'Hi ho, \textcolor{silver}{Silver}!'. + + This example gives the color a more abstract name, so it could change +and not be misleading. + + \definecolor{logocolor}{RGB}{145,92,131} % RGB needs pdflatex + \newcommand{\logo}{\textcolor{logocolor}{Bob's Big Bagels}} + + Often a document's colors are defined in the preamble, or in the +class or style, rather than in the document body. + + +File: latex2e.info, Node: Colored text, Next: Colored boxes, Prev: Define colors, Up: Commands for color + +21.3.2 Colored text +------------------- + +Synopses: + + \textcolor{NAME}{...} + \textcolor[COLOR MODEL]{COLOR SPECIFICATION}{...} + +or + + \color{NAME} + \color[COLOR MODEL]{SPECIFICATION} + + The affected text gets the color. This line + + \textcolor{magenta}{My name is Ozymandias, king of kings:} + Look on my works, ye Mighty, and despair! + +causes the first half to be in magenta while the rest is in black. You +can use a color declared with '\definecolor' in exactly the same way +that we just used the builtin color 'magenta'. + + \definecolor{MidlifeCrisisRed}{rgb}{1.0,0.11,0.0} + I'm thinking about getting a \textcolor{MidlifeCrisisRed}{sports car}. + + The two '\textcolor' and '\color' differ in that the first is a +command form, enclosing the text to be colored as an argument. Often +this form is more convenient, or at least more explicit. The second +form is a declaration, as in 'The moon is made of {\color{green} green} +cheese', so it is in effect until the end of the current group or +environment. This is sometimes useful when writing macros or as below +where it colors everything inside the 'center' environment, including +the vertical and horizontal lines. + + \begin{center} \color{blue} + \begin{tabular}{l|r} + UL &UR \\ \hline + LL &LR + \end{tabular} + \end{center} + + You can use color in equations. A document might have this +definition in the preamble + + \definecolor{highlightcolor}{RGB}{225,15,0} + +and then contain this equation. + + \begin{equation} + \int_a^b \textcolor{highlightcolor}{f'(x)}\,dx=f(b)-f(a) + \end{equation} + + Typically the colors used in a document are declared in a class or +style but sometimes you want a one-off. Those are the second forms in +the synopses. + + Colors of \textcolor[rgb]{0.33,0.14,0.47}{Purple} and + {\color[rgb]{0.72,0.60,0.37}Gold} for the team. + + The format of COLOR SPECIFICATION depends on the color model (*note +Color models::). For instance, while 'rgb' takes three numbers, 'gray' +takes only one. + + The selection was \textcolor[gray]{0.5}{grayed out}. + + Colors inside colors do not combine. Thus + + \textcolor{green}{kind of \textcolor{blue}{blue}} + +has a final word that is blue, not a combination of blue and green. + + +File: latex2e.info, Node: Colored boxes, Next: Colored pages, Prev: Colored text, Up: Commands for color + +21.3.3 Colored boxes +-------------------- + +Synopses: + + \colorbox{NAME}{...} + \colorbox[MODEL NAME]{BOX BACKGROUND COLOR}{...} + +or + + \fcolorbox{FRAME COLOR}{BOX BACKGROUND COLOR}{...} + \fcolorbox[MODEL NAME]{FRAME COLOR}{BOX BACKGROUND COLOR}{...} + + Make a box with the stated background color. The '\fcolorbox' +command puts a frame around the box. For instance this + + Name:~\colorbox{cyan}{\makebox[5cm][l]{\strut}} + +makes a cyan-colored box that is five centimeters long and gets its +depth and height from the '\strut' (so the depth is '-.3\baselineskip' +and the height is '\baselineskip'). This puts white text on a blue +background. + + \colorbox{blue}{\textcolor{white}{Welcome to the machine.}} + + The '\fcolorbox' commands use the same parameters as '\fbox' (*note +\fbox & \framebox::), '\fboxrule' and '\fboxsep', to set the thickness +of the rule and the boundary between the box interior and the +surrounding rule. LaTeX's defaults are '0.4pt' and '3pt', respectively. + + This example changes the thickness of the border to 0.8 points. Note +that it is surrounded by curly braces so that the change ends at the end +of the second line. + + {\setlength{\fboxrule}{0.8pt} + \fcolorbox{black}{red}{Under no circumstances turn this knob.}} + + +File: latex2e.info, Node: Colored pages, Prev: Colored boxes, Up: Commands for color + +21.3.4 Colored pages +-------------------- + +Synopses: + + \pagecolor{NAME} + \pagecolor[COLOR MODEL]{COLOR SPECIFICATION} + \nopagecolor + + The first two set the background of the page, and all subsequent +pages, to the color. For an explanation of the specification in the +second form *note Colored text::. The third returns the background to +normal, which is a transparent background. (If that is not supported +use '\pagecolor{white}', although that will make a white background +rather than the default transparent background.) + + ... + \pagecolor{cyan} + ... + \nopagecolor + + +File: latex2e.info, Node: Graphics, Next: Special insertions, Prev: Color, Up: Top + +22 Graphics +*********** + +You can use graphics such as PNG or PDF files in your LaTeX document. +You need an additional package, which comes standard with LaTeX. This +example is the short how-to. + + \include{graphicx} % goes in the preamble + ... + \includegraphics[width=0.5\linewidth]{plot.pdf} + + To use the commands described here your document preamble must +contain either '\usepackage{graphicx}' or '\usepackage{graphics}'. Most +of the time, 'graphicx' is the better choice. + + Graphics come in two main types, raster and vector. LaTeX can use +both. In raster graphics the file contains an entry for each location +in an array, describing what color it is. An example is a photograph in +JPG format. In vector graphics, the file contains a list of +instructions such as 'draw a circle with this radius and that center'. +An example is a line drawing produced by the Asymptote program, in PDF +format. Generally vector graphics are more useful because you can +rescale their size without pixelation or other problems, and because +they often have a smaller size. + + There are systems particularly well-suited to make graphics for a +LaTeX document. For example, these allow you to use the same fonts as +in your document. LaTeX comes with a 'picture' environment (*note +picture::) that has simple capabilities. Besides that, there are other +ways to include the graphic-making commands in the document. Two such +systems are the PSTricks and TikZ packages. There are also systems +external to LaTeX, that generate a graphic that you include using the +commands of this chapter. Two that use a programming language are +Asymptote and MetaPost. One that uses a graphical interface is Xfig. +Full description of these systems is outside the scope of this document; +see their documentation on CTAN. + +* Menu: + +* Graphics package options:: Options when you load the package. +* Graphics package configuration:: Where to look for files, which file types. +* Commands for graphics:: The available commands. + + +File: latex2e.info, Node: Graphics package options, Next: Graphics package configuration, Up: Graphics + +22.1 'graphics' package options +=============================== + +Synopsis (must be in the document preamble): + + \usepackage[COMMA-SEPARATED OPTION LIST]{graphics} + +or + + \usepackage[COMMA-SEPARATED OPTION LIST]{graphicx} + + The 'graphicx' package has a format for optional arguments to the +'\includegraphics' command that is convenient (it is the key-value +format), so it is the better choice for new documents. When you load +the 'graphics' or 'graphicx' package with '\usepackage' there are two +kinds of available options. + + The first is that LaTeX does not contain information about different +output systems but instead depends on information stored in a "printer +driver" file. Normally you should not specify the driver option in the +document, and instead rely on your system's default. One advantage of +this is that it makes the document portable across systems. + + For completeness here is a list of the drivers. The currently +relevant ones are: 'dvipdfmx', 'dvips', 'dvisvgm', 'luatex', 'pdftex', +'xetex'. The two 'xdvi' and 'oztex' are essentially aliases for 'dvips' +(and 'xdvi' is monochrome). Ones that should not be used for new +systems are: 'dvipdf', 'dvipdfm', 'dviwin', 'dvipsone', 'emtex', +'pctexps', 'pctexwin', 'pctexhp', 'pctex32', 'truetex', 'tcidvi', 'vtex' +(and 'dviwindo' is an alias for 'dvipsone'). These are stored in files +with a '.def' extension, such as 'pdftex.def'. + + The second kind of options are below. + +'demo' + Instead of an image file, LaTeX puts in a 150 pt by 100 pt + rectangle (unless another size is specified in the + '\includegraphics' command). + +'draft' + For each graphic file, it is not shown but instead its file name is + printed in a box of the correct size. In order to determine the + size, the file must be present. + +'final' + (Default) Override any previous 'draft' option, so that the + document shows the contents of the graphic files. + +'hiderotate' + Do not show rotated text. (This allows for the possibility that a + previewer does not have the capability to rotate text.) + +'hidescale' + Do not show scaled text. (This allows for the possibility that a + previewer does not have the capability to scale.) + +'hiresbb' + In a PS or EPS file the graphic size may be specified in two ways. + The '%%BoundingBox' lines describe the graphic size using integer + multiples of a PostScript point, that is, integer multiples of 1/72 + inch. A later addition to the PostScript language allows decimal + multiples, such as 1.23, in '%%HiResBoundingBox' lines. This + option has LaTeX to read the size from the latter. + + +File: latex2e.info, Node: Graphics package configuration, Next: Commands for graphics, Prev: Graphics package options, Up: Graphics + +22.2 'graphics' package configuration +===================================== + +These commands configure the way LaTeX searches the file system for the +graphic. + + The behavior of file system search code is necessarily platform +dependent. In this document we cover GNU/Linux, Macintosh, and Windows, +as those systems are typically configured. For other situations consult +the documentation in 'grfguide.pdf', or the LaTeX source, or your TeX +distribution's documentation. + +* Menu: + +* \graphicspath:: Directories to search. +* \DeclareGraphicsExtensions:: File types, such as JPG or EPS. +* \DeclareGraphicsRule:: How to handle file types. + + +File: latex2e.info, Node: \graphicspath, Next: \DeclareGraphicsExtensions, Up: Graphics package configuration + +22.2.1 '\graphicspath' +---------------------- + +Synopsis: + + \graphicspath{LIST OF DIRECTORIES INSIDE CURLY BRACES} + + Declare a list of directories to search for graphics files. This +allows you to later say something like '\includegraphics{lion.png}' +instead of having to give its path. + + LaTeX always looks for graphic files first in the current directory +(and the output directory, if specified; *note output directory::). The +declaration below tells the system to then look in the subdirectory +'pix', and then '../pix'. + + \usepackage{graphicx} % or graphics; put in preamble + ... + \graphicspath{ {pix/} {../pix/} } + + The '\graphicspath' declaration is optional. If you don't include it +then LaTeX's default is to search all of the places that it usually +looks for a file (it uses LaTeX's '\input@path'). In particular, in +this case one of the places it looks is the current directory. + + Enclose each directory name in curly braces; for example, above it +says ''{pix}''. Do this even if there is only one directory. Each +directory name must end in a forward slash, '/'. This is true even on +Windows, where good practice is to use forward slashes for all the +directory separators since it makes the document portable to other +platforms. If you have spaces in your directory name then use double +quotes, as with '{"my docs/"}'. Getting one of these rules wrong will +cause LaTeX to report 'Error: File `FILENAME' not found'. + + Basically, the algorithm is that with this example, after looking in +the current directory, + + \graphicspath{ {pix/} {../pix/} } + ... + \usepackage{lion.png} + +for each of the listed directories, LaTeX concatenates it with the +filename and searches for the result, checking for 'pix/lion.png' and +then '../pix/lion.png'. This algorithm means that the '\graphicspath' +command does not recursively search subdirectories: if you issue +'\graphicspath{{a/}}' and the graphic is in 'a/b/lion.png' then LaTeX +will not find it. It also means that you can use absolute paths such as +'\graphicspath{{/home/jim/logos/}}' or +'\graphicspath{{C:/Users/Albert/Pictures/}}'. However, using these +means that the document is not portable. (You could preserve +portability by adjusting your TeX system settings configuration file +parameter 'TEXINPUTS'; see the documentation of your system.) + + You can use '\graphicspath' anywhere in the document. You can use it +more than once. Show its value with +'\makeatletter\typeout{\Ginput@path}\makeatother'. + + The directories are taken with respect to the base file. That is, +suppose that you are working on a document based on 'book/book.tex' and +it contains '\include{chapters/chap1}'. If in 'chap1.tex' you put +'\graphicspath{{plots/}}' then LaTeX will not search for graphics in +'book/chapters/plots', but instead in 'book/plots'. + + +File: latex2e.info, Node: \DeclareGraphicsExtensions, Next: \DeclareGraphicsRule, Prev: \graphicspath, Up: Graphics package configuration + +22.2.2 '\DeclareGraphicsExtensions' +----------------------------------- + +Synopses: + + \DeclareGraphicsExtensions{COMMA-SEPARATED LIST OF FILE EXTENSIONS} + + Declare the filename extensions to try. This allows you to specify +the order in which to choose graphic formats when you include graphic +files by giving the filename without the extension, as in +'\includegraphics{functionplot}'. + + In this example, LaTeX will find files in the PNG format before PDF +files. + + \DeclareGraphicsExtensions{.png,PNG,.pdf,.PDF} + ... + \includegraphics{lion} % will find lion.png before lion.pdf + +Because the filename 'lion' does not have a period, LaTeX uses the +extension list. For each directory in the graphics path (*note +\graphicspath::), LaTeX will try the extensions in the order given. If +it does not find such a file after trying all the directories and +extensions then it reports '! LaTeX Error: File `'lion'' not found'. +Note that you must include the periods at the start of the extensions. + + Because GNU/Linux and Macintosh filenames are case sensitive, the +list of file extensions is case sensitive on those platforms. The +Windows platform is not case sensitive. + + You are not required to include '\DeclareGraphicsExtensions' in your +document; the printer driver has a sensible default. For example, the +most recent 'pdftex.def' has this extension list. + + .pdf,.png,.jpg,.mps,.jpeg,.jbig2,.jb2,.PDF,.PNG,.JPG,.JPEG,.JBIG2,.JB2 + + To change the order, use the 'grfext' package. + + You can use this command anywhere in the document. You can use it +more than once. Show its value with +'\makeatletter\typeout{\Gin@extensions}\makeatother'. + + +File: latex2e.info, Node: \DeclareGraphicsRule, Prev: \DeclareGraphicsExtensions, Up: Graphics package configuration + +22.2.3 '\DeclareGraphicsRule' +----------------------------- + +Synopsis: + + \DeclareGraphicsRule{EXTENSION}{TYPE}{SIZE-FILE EXTENSION}{COMMAND} + + Declare how to handle graphic files whose names end in EXTENSION. + + This example declares that all files with names of the form +'filename-without-dot.mps' will be treated as output from MetaPost, +meaning that the printer driver will use its MetaPost-handling code to +input the file. + + \DeclareGraphicsRule{.mps}{mps}{.mps}{} + + This + + \DeclareGraphicsRule{*}{mps}{*}{} + +tells LaTeX that it should handle as MetaPost output any file with an +extension not covered by another rule, so it covers 'filename.1', +'filename.2', etc. + + This describes the four arguments. + +EXTENSION + The file extension to which this rule applies. The extension is + anything after and including the first dot in the filename. Use + the Kleene star, '*', to denote the default behavior for all + undeclared extensions. + +TYPE + The type of file involved. This type is a string that must be + defined in the printer driver. For instance, files with extensions + '.ps', '.eps', or '.ps.gz' may all be classed as type 'eps'. All + files of the same type will be input with the same internal command + by the printer driver. For example, the file types that 'pdftex' + recognizes are: 'jpg', 'jbig2', 'mps', 'pdf', 'png', 'tif'. + +SIZE-FILE EXTENSION + The extension of the file to be read to determine the size of the + graphic, if there is such a file. It may be the same as EXTENSION + but it may be different. + + As an example, consider a PostScript graphic. To make it smaller, + it might be compressed into a '.ps.gz' file. Compressed files are + not easily read by LaTeX so you can put the bounding box + information in a separate file. If SIZE-FILE EXTENSION is empty + then you must specify size information in the arguments of + '\includegraphics'. + + If the driver file has a procedure for reading size files for + 'type' then that will be used, otherwise it will use the procedure + for reading '.eps' files. (Thus you may specify the size of bitmap + files in a file with a PostScript style '%%BoundingBox' line if no + other format is available.) + +COMMAND + A command that will be applied to the file. This is very often + left empty. This command must start with a single backward quote. + Thus, '\DeclareGraphicsRule{.eps.gz}{eps}{.eps.bb}{`gunzip -c #1}' + specifies that any file with the extension '.eps.gz' should be + treated as an 'eps' file, with the BoundingBox information stored + in the file with extension '.eps.bb', and that the command 'gunzip + -c' will run on your platform to decompresses the file. + + Such a command is specific to your platform. In addition, your TeX + system must allow you to run external commands; as a security + measure modern systems restrict running commands unless you + explicitly allow it. See the documentation for your TeX + distribution. + + +File: latex2e.info, Node: Commands for graphics, Prev: Graphics package configuration, Up: Graphics + +22.3 Commands for graphics +========================== + +These are the commands available with the 'graphics' and 'graphicx' +packages. + +* Menu: + +* \includegraphics:: Using a graphic in your document. +* \rotatebox:: Rotating boxes, including graphics. +* \scalebox:: Scaling boxes, including graphics. +* \resizebox:: Scaling boxes, including graphics, to a set size. + + +File: latex2e.info, Node: \includegraphics, Next: \rotatebox, Up: Commands for graphics + +22.3.1 '\includegraphics' +------------------------- + +Synopses for 'graphics' package: + + \includegraphics{FILENAME} + \includegraphics[URX,URY]{FILENAME} + \includegraphics[LLX,LLY][URX,URY]{FILENAME} + \includegraphics*{FILENAME} + \includegraphics*[URX,URY]{FILENAME} + \includegraphics*[LLX,LLY][URX,URY]{FILENAME} + + Synopses for 'graphicx' package: + + \includegraphics{FILENAME} + \includegraphics[KEY-VALUE LIST]{FILENAME} + \includegraphics*{FILENAME} + \includegraphics*[KEY-VALUE LIST]{FILENAME} + + Include a graphics file. The starred form '\includegraphics*' will +clip the graphic to the size specified, while for the unstarred form any +part of the graphic that is outside the box of the specified size will +over-print the surrounding area. + + This + + \usepackage{graphicx} % in preamble + ... + \begin{center} + \includegraphics{plot.pdf} + \end{center} + +will incorporate into the document the graphic in 'plot.pdf', centered +and at its nominal size. You can also give a path to the file, as with +'\includegraphics{graphics/plot.pdf}'. To specify a list of locations +to search for the file, *note \graphicspath::. + + If your filename includes spaces then put it in double quotes. An +example is '\includegraphics{"sister picture.jpg"}'. + + The '\includegraphics{FILENAME}' command decides on the type of +graphic by splitting FILENAME on the first dot. You can instead use +FILENAME with no dot, as in '\includegraphics{turing}', and then LaTeX +tries a sequence of extensions such as '.png' and '.pdf' until it finds +a file with that extension (*note \DeclareGraphicsExtensions::). + + If your file name contains dots before the extension then you can +hide them with curly braces, as in +'\includegraphics{{plot.2018.03.12.a}.pdf}'. Or, if you use the +'graphicx' package then you can use the options 'type' and 'ext'; see +below. This and other filename issues are also handled with the package +'grffile'. + + This example puts a graphic in a 'figure' environment so LaTeX can +move it to the next page if fitting it on the current page is awkward +(*note figure::). + + \begin{figure} + \centering + \includegraphics[width=3cm]{lungxray.jpg} + \caption{The evidence is overwhelming: don't smoke.} \label{fig:xray} + \end{figure} + + This places a graphic that will not float, so it is sure to appear at +this point in the document even if makes LaTeX stretch the text or +resort to blank areas on the page. It will be centered and will have a +caption. + + \usepackage{caption} % in preamble + ... + \begin{center} + \includegraphics{pix/nix.png} + \captionof{figure}{The spirit of the night} \label{pix:nix} % optional + \end{center} + + This example puts a box with a graphic side by side with one having +text, with the two vertically centered. + + \newcommand*{\vcenteredhbox}[1]{\begin{tabular}{@{}c@{}}#1\end{tabular}} + ... + \begin{center} + \vcenteredhbox{\includegraphics[width=0.4\textwidth]{plot}} + \hspace{1em} + \vcenteredhbox{\begin{minipage}{0.4\textwidth} + \begin{displaymath} + f(x)=x\cdot \sin (1/x) + \end{displaymath} + \end{minipage}} + \end{center} + + If you use the 'graphics' package then the only options involve the +size of the graphic (but see *note \rotatebox:: and *note \scalebox::). +When one optional argument is present then it is '[URX,URY]' and it +gives the coordinates of the top right corner of the image, as a pair of +TeX dimensions (*note Units of length::). If the units are omitted they +default to 'bp'. In this case, the lower left corner of the image is +assumed to be at (0,0). If two optional arguments are present then the +leading one is '[LLX,LLY]', specifying the coordinates of the image's +lower left. Thus, '\includegraphics[1in,0.618in]{...}' calls for the +graphic to be placed so it is 1 inch wide and 0.618 inches tall and so +its origin is at (0,0). + + The 'graphicx' package gives you many more options. Specify them in +a key-value form, as here. + + \begin{center} + \includegraphics[width=1in,angle=90]{lion} + \hspace{2em} + \includegraphics[angle=90,width=1in]{lion} + \end{center} + +The options are read left-to-right. So the first graphic above is made +one inch wide and then rotated, while the second is rotated and then +made one inch wide. Thus, unless the graphic is perfectly square, the +two will end with different widths and heights. + + There are many options. The primary ones are listed first. + + Note that a graphic is placed by LaTeX into a box, which is +traditionally referred to as its "bounding box" (distinct from the +PostScript BoundingBox described below). The graphic's printed area may +go beyond this box, or sit inside this box, but when LaTeX makes up a +page it puts together boxes and this is the box allocated for the +graphic. + +'width' + The graphic will be shown so its bounding box is this width. An + example is '\includegraphics[width=1in]{plot}'. You can use the + standard TeX dimensions (*note Units of length::) and also + convenient is '\linewidth', or in a two-column document, + '\columnwidth' (*note Page layout parameters::). An example is + that by using the 'calc' package you can make the graphic be 1 cm + narrower than the width of the text with + '\includegraphics[width=\linewidth-1.0cm]{hefferon.jpg}'. + +'height' + The graphic will be shown so its bounding box is this height. You + can use the standard TeX dimensions (*note Units of length::), and + also convenient are '\pageheight' and '\textheight' (*note Page + layout parameters::). For instance, the command + '\includegraphics[height=0.25\textheight]{godel}' will make the + graphic a quarter of the height of the text area. + +'totalheight' + The graphic will be shown so its bounding box has this height plus + depth. This differs from the height if the graphic was rotated. + For instance, if it has been rotated by -90 then it will have zero + height but a large depth. + +'keepaspectratio' + If set to 'true', or just specified as here + + \includegraphics[...,keepaspectratio,...]{...} + + and you give as options both 'width' and 'height' (or + 'totalheight'), then LaTeX will make the graphic is as large as + possible without distortion. That is, LaTeX will ensure that + neither is the graphic wider than 'width' nor taller than 'height' + (or 'totalheight'). + +'scale' + Factor by which to scale the graphic. To make a graphic twice its + nominal size, enter '\includegraphics[scale=2.0]{...}'. This + number may be any value; a number between 0 and 1 will shrink the + graphic and a negative number will reflect it. + +'angle' + Rotate the graphic. The angle is taken in degrees and + counterclockwise. The graphic is rotated about its 'origin'; see + that option. For a complete description of how rotated material is + typeset, *note \rotatebox::. + +'origin' + The point of the graphic about which the rotation happens. + Possible values are any string containing one or two of: 'l' for + left, 'r' for right, 'b' for bottom, 'c' for center, 't' for top, + and 'B' for baseline. Thus, entering the command + '\includegraphics[angle=180,origin=c]{moon}' will turn the picture + upside down about that picture's center, while the command + '\includegraphics[angle=180,origin=lB]{LeBateau}' will turn its + picture upside down about its left baseline. (The character 'c' + gives the horizontal center in 'bc' or 'tc', but gives the vertical + center in 'lc' or 'rc'.) The default is 'lB'. + + To rotate about an arbitrary point, *note \rotatebox::. + + These are lesser-used options. + +'viewport' + Pick out a subregion of the graphic to show. Takes four arguments, + separated by spaces and given in TeX dimensions, as with + '\includegraphics[.., viewport=0in 0in 1in 0.618in]{...}'. When + the unit is omitted, the dimensions default to big points, 'bp'. + They are taken relative to the origin specified by the bounding + box. See also the 'trim' option. + +'trim' + Gives parts of the graphic to not show. Takes four arguments, + separated by spaces, that are given in TeX dimensions, as with + '\includegraphics[.., trim= 0in 0.1in 0.2in 0.3in, ...]{...}'. + These give the amounts of the graphic not to show, that is, LaTeX + will crop the picture by 0 inches on the left, 0.1 inches on the + bottom, 0.2 inches on the right, and 0.3 inches on the top. See + also the 'viewport' option. + +'clip' + If set to 'true', or just specified as here + + \includegraphics[...,clip,...]{...} + + then the graphic is cropped to the bounding box. This is the same + as using the starred form of the command, + '\includegraphics*[...]{...}'. + +'page' + Give the page number of a multi-page PDF file. The default is + 'page=1'. + +'pagebox' + Specifies which bounding box to use for PDF files from among + 'mediabox', 'cropbox', 'bleedbox', 'trimbox', or 'artbox'. PDF + files do not have the BoundingBox that PostScript files have, but + may specify up to four predefined rectangles. The MediaBox gives + the boundaries of the physical medium. The CropBox is the region + to which the contents of the page are to be clipped when displayed. + The BleedBox is the region to which the contents of the page should + be clipped in production. The TrimBox is the intended dimensions + of the finished page. The ArtBox is the extent of the page's + meaningful content. The driver will set the image size based on + CropBox if present, otherwise it will not use one of the others, + with a driver-defined order of preference. MediaBox is always + present. + +'interpolate' + Enable or disable interpolation of raster images by the viewer. + Can be set with 'interpolate=true' or just specified as here. + + \includegraphics[...,interpolate,...]{...} + +'quiet' + Do not write information to the log. You can set it with + 'quiet=true' or just specified it with + '\includegraphics[...,quiet,...]{...}', + +'draft' + If you set it with 'draft=true' or just specify it with + + \includegraphics[...,draft,...]{...} + + then the graphic will not appear in the document, possibly saving + color printer ink. Instead, LaTeX will put an empty box of the + correct size with the filename printed in it. + + These options address the bounding box for Encapsulated PostScript +graphic files, which have a size specified with a line '%%BoundingBox' +that appears in the file. It has four values, giving the lower x +coordinate, lower y coordinate, upper x coordinate, and upper y +coordinate. The units are PostScript points, equivalent to TeX's big +points, 1/72 inch. For example, if an '.eps' file has the line +'%%BoundingBox 10 20 40 80' then its natural size is 30/72 inch wide by +60/72 inch tall. + +'bb' + Specify the bounding box of the displayed region. The argument is + four dimensions separated by spaces, as with '\includegraphics[.., + bb= 0in 0in 1in 0.618in]{...}'. Usually '\includegraphics' reads + the BoundingBox numbers from the EPS file automatically, so this + option is only useful if the bounding box is missing from that file + or if you want to change it. + +'bbllx, bblly, bburx, bbury' + Set the bounding box. These four are obsolete, but are retained + for compatibility with old packages. + +'natwidth, natheight' + An alternative for 'bb'. Setting + + \includegraphics[...,natwidth=1in,natheight=0.618in,...]{...} + + is the same as setting 'bb=0 0 1in 0.618in'. + +'hiresbb' + If set to 'true', or just specified as with + + \includegraphics[...,hiresbb,...]{...} + + then LaTeX will look for '%%HiResBoundingBox' lines instead of + '%%BoundingBox' lines. (The 'BoundingBox' lines use only natural + numbers while the 'HiResBoundingBox' lines use decimals; both use + units equivalent to TeX's big points, 1/72 inch.) To override a + prior setting of 'true', you can set it to 'false'. + + These following options allow a user to override LaTeX's method of +choosing the graphic type based on the filename extension. An example +is that '\includegraphics[type=png,ext=.xxx,read=.xxx]{lion}' will read +the file 'lion.xxx' as though it were 'lion.png'. For more on these, +*note \DeclareGraphicsRule::. + +'type' + Specify the graphics type. + +'ext' + Specify the graphics extension. Only use this in conjunction with + the option 'type'. + +'read' + Specify the file extension of the read file. Only use this in + conjunction with the option 'type'. + +'command' + Specify a command to be applied to this file. Only use this in + conjunction with the option 'type'. *Note Command line options::, + for a discussion of enabling the '\write18' functionality to run + external commands. + + +File: latex2e.info, Node: \rotatebox, Next: \scalebox, Prev: \includegraphics, Up: Commands for graphics + +22.3.2 '\rotatebox' +------------------- + +Synopsis if you use the 'graphics' package: + + \rotatebox{ANGLE}{MATERIAL} + + Synopses if you use the 'graphicx' package: + + \rotatebox{ANGLE}{MATERIAL} + \rotatebox[KEY-VALUE LIST]{ANGLE}{MATERIAL} + + Put MATERIAL in a box and rotate it ANGLE degrees counterclockwise. + + This example rotates the table column heads forty-five degrees. + + \begin{tabular}{ll} + \rotatebox{45}{Character} &\rotatebox{45}{NATO phonetic} \\ + A &AL-FAH \\ + B &BRAH-VOH + \end{tabular} + + The MATERIAL can be anything that goes in a box, including a graphic. + + \rotatebox[origin=c]{45}{\includegraphics[width=1in]{lion}} + + To place the rotated material, the first step is that LaTeX sets +MATERIAL in a box, with a reference point on the left baseline. The +second step is the rotation, by default about the reference point. The +third step is that LaTeX computes a box to bound the rotated material. +Fourth, LaTeX moves this box horizontally so that the left edge of this +new bounding box coincides with the left edge of the box from the first +step (they need not coincide vertically). This new bounding box, in its +new position, is what LaTeX uses as the box when typesetting this +material. + + If you use the 'graphics' package then the rotation is about the +reference point of the box. If you use the 'graphicx' package then +these are the options that can go in the KEY-VALUE LIST, but note that +you can get the same effect without needing this package, except for the +'x' and 'y' options (*note \includegraphics::). + +'origin' + The point of the MATERIAL's box about which the rotation happens. + Possible value is any string containing one or two of: 'l' for + left, 'r' for right, 'b' for bottom, 'c' for center, 't' for top, + and 'B' for baseline. Thus, the first line here + + \rotatebox[origin=c]{180}{moon} + \rotatebox[origin=lB]{180}{LeBateau} + + will turn the picture upside down from the center while the second + will turn its picture upside down about its left baseline. (The + character 'c' gives the horizontal center in 'bc' or 'tc' but gives + the vertical center in 'lc' or 'rc', and gives both in 'c'.) The + default is 'lB'. + +'x, y' + Specify an arbitrary point of rotation with '\rotatebox[x=TeX + DIMENSION,y=TeX DIMENSION]{...}' (*note Units of length::). These + give the offset from the box's reference point. + +'units' + This key allows you to change the default of degrees + counterclockwise. Setting 'units=-360' changes the direction to + degrees clockwise and setting 'units=6.283185' changes to radians + counterclockwise. + + +File: latex2e.info, Node: \scalebox, Next: \resizebox, Prev: \rotatebox, Up: Commands for graphics + +22.3.3 '\scalebox' +------------------ + +Synopses: + + \scalebox{HORIZONTAL FACTOR}{MATERIAL} + \scalebox{HORIZONTAL FACTOR}[VERTICAL FACTOR]{MATERIAL} + \reflectbox{MATERIAL} + + Scale the MATERIAL. + + This example halves the size, both horizontally and vertically, of +the first text and doubles the size of the second. + + \scalebox{0.5}{DRINK ME} and \scalebox{2.0}{Eat Me} + + If you do not specify the optional VERTICAL FACTOR then it defaults +to the same value as the HORIZONTAL FACTOR. + + You can use this command to resize a graphic, as here. + + \scalebox{0.5}{\includegraphics{lion}} + +If you use the 'graphicx' package then you can accomplish the same thing +with optional arguments to '\includegraphics' (*note +\includegraphics::). + + The '\reflectbox' command abbreviates '\scalebox{-1}[1]{MATERIAL}'. +Thus, 'Able was I\reflectbox{Able was I}' will show the phrase 'Able was +I' immediately followed by its mirror reflection against a vertical +axis. + + +File: latex2e.info, Node: \resizebox, Prev: \scalebox, Up: Commands for graphics + +22.3.4 '\resizebox' +------------------- + +Synopses: + + \resizebox{HORIZONTAL LENGTH}{VERTICAL LENGTH}{MATERIAL} + \resizebox*{HORIZONTAL LENGTH}{VERTICAL LENGTH}{MATERIAL} + + Given a size, such as '3cm', transform MATERIAL to make it that size. +If either HORIZONTAL LENGTH or VERTICAL LENGTH is an exclamation +point '!' then the other argument is used to determine a scale factor +for both directions. + + This example makes the graphic be a half inch wide and scales it +vertically by the same factor to keep it from being distorted. + + \resizebox{0.5in}{!}{\includegraphics{lion}} + + The unstarred form '\resizebox' takes VERTICAL LENGTH to be the box's +height while the starred form '\resizebox*' takes it to be height+depth. +For instance, make the text have a height+depth of a quarter-inch with +'\resizebox*{!}{0.25in}{\parbox{3.5in}{This box has both height and +depth.}}'. + + You can use '\depth', '\height', '\totalheight', and '\width' to +refer to the original size of the box. Thus, make the text two inches +wide but keep the original height with '\resizebox{2in}{\height}{Two +inches}'. + + +File: latex2e.info, Node: Special insertions, Next: Splitting the input, Prev: Graphics, Up: Top + +23 Special insertions +********************* + +LaTeX provides commands for inserting characters that have a special +meaning do not correspond to simple characters you can type. + +* Menu: + +* Reserved characters:: Inserting '# $ % & { } _ ~ ^ \' +* Upper and lower case:: Make text upper or lower case. +* Symbols by font position:: Inserting font symbols by number. +* Text symbols:: Inserting other non-letter symbols in text. +* Accents:: Inserting accents. +* Additional Latin letters:: Inserting other non-English characters. +* inputenc package:: Set the input file text encoding. +* \rule:: Inserting lines and rectangles. +* \today:: Inserting today's date. + + +File: latex2e.info, Node: Reserved characters, Next: Upper and lower case, Up: Special insertions + +23.1 Reserved characters +======================== + +LaTeX sets aside the following characters for special purposes. For +example, the percent sign '%' is for comments. They are called +"reserved characters" or "special characters". They are all discussed +elsewhere in this manual. + + # $ % & { } _ ~ ^ \ + + If you want a reserved character to be printed as itself, in the text +body font, for all but the final three characters in that list simply +put a backslash '\' in front of the character. Thus, typing '\$1.23' +will produce '$1.23' in your output. + + As to the last three characters, to get a tilde in the text body font +use '\~{}' (omitting the curly braces would result in the next character +receiving a tilde accent). Similarly, to get a text body font +circumflex use '\^{}'. To get a backslash in the font of the text body, +enter '\textbackslash{}'. + + To produce the reserved characters in a typewriter font use '\verb!!' +as below (the double backslash '\\' is only there to split the lines in +the output). + + \begin{center} + \# \$ \% \& \{ \} \_ \~{} \^{} \textbackslash \\ + \verb!# $ % & { } _ ~ ^ \! + \end{center} + + +File: latex2e.info, Node: Upper and lower case, Next: Symbols by font position, Prev: Reserved characters, Up: Special insertions + +23.2 Upper and lower case +========================= + +Synopsis: + + \uppercase{TEXT} + \lowercase{TEXT} + \MakeUppercase{TEXT} + \MakeLowercase{TEXT} + + Change the case of characters. The TeX primitive commands +'\uppercase' and '\lowercase' are set up by default to work only with +the 26 letters a-z and A-Z. The LaTeX commands '\MakeUppercase' and +'\MakeLowercase' commands also change characters accessed by commands +such as '\ae' or '\aa'. The commands '\MakeUppercase' and +'\MakeLowercase' are robust but they have moving arguments (*note +\protect::). + + These commands do not change the case of letters used in the name of +a command within TEXT. But they do change the case of every other Latin +letter inside the argument TEXT. Thus, '\MakeUppercase{Let $y=f(x)$'} +produces 'LET Y=F(X)'. Another example is that the name of an +environment will be changed, so that '\MakeUppercase{\begin{tabular} ... +\end{tabular}}' will produce an error because the first half is changed +to '\begin{TABULAR}'. + + LaTeX uses the same fixed table for changing case throughout a +document, The table used is designed for the font encoding T1; this +works well with the standard TeX fonts for all Latin alphabets but will +cause problems when using other alphabets. + + To change the case of text that results from a macro inside TEXT you +need to do expansion. Here the '\Schoolname' produces 'COLLEGE OF +MATHEMATICS'. + + \newcommand{\schoolname}{College of Mathematics} + \newcommand{\Schoolname}{\expandafter\MakeUppercase + \expandafter{\schoolname}} + + The 'textcase' package brings some of the missing feature of the +standard LaTeX commands '\MakeUppercase' and '\MakeLowerCase'. + + To uppercase only the first letter of words, you can use the package +'mfirstuc'. + + Handling all the casing rules specified by Unicode, e.g., for +non-Latin scripts, is a much bigger job than anything envisioned in the +original TeX and LaTeX. It has been implemented in the 'expl3' package +as of 2020. The article "Case changing: From TeX primitives to the +Unicode algorithm", (Joseph Wright, 'TUGboat' 41:1, +), gives a good +overview of the topic, past and present. + + +File: latex2e.info, Node: Symbols by font position, Next: Text symbols, Prev: Upper and lower case, Up: Special insertions + +23.3 Symbols by font position +============================= + +You can access any character of the current font using its number with +the '\symbol' command. For example, the visible space character used in +the '\verb*' command has the code decimal 32 in the standard Computer +Modern typewriter font, so it can be typed as '\symbol{32}'. + + You can also specify numbers in octal (base 8) by using a ''' prefix, +or hexadecimal (base 16) with a '"' prefix, so the visible space at 32 +decimal could also be written as '\symbol{'40}' or '\symbol{"20}'. + + +File: latex2e.info, Node: Text symbols, Next: Accents, Prev: Symbols by font position, Up: Special insertions + +23.4 Text symbols +================= + +LaTeX provides commands to generate a number of non-letter symbols in +running text. Some of these, especially the more obscure ones, are not +available in OT1. As of the LaTeX February 2020 release, all symbols +are available by default; before that, it was necessary to use the +'textcomp' package for some (technically, those in the 'TS1' font +encoding). + +'\copyright' +'\textcopyright' + (C) The copyright symbol. + +'\dag' + † The dagger symbol (in text). + +'\ddag' + ‡ The double dagger symbol (in text). + +'\LaTeX' + The LaTeX logo. + +'\LaTeXe' + The LaTeX2e logo. + +'\guillemotleft («)' +'\guillemotright (»)' +'\guilsinglleft (‹)' +'\guilsinglright (›)' + «, », ‹, › Double and single angle quotation marks, commonly used + in French. + +'\ldots' +'\dots' +'\textellipsis' + ... An ellipsis (three dots at the baseline): '\ldots' and '\dots' + also work in math mode. + +'\lq' + ' Left (opening) quote. + +'\P' +'\textparagraph' + ¶ Paragraph sign (pilcrow). + +'\pounds' +'\textsterling' + £ English pounds sterling. + +'\quotedblbase („)' +'\quotesinglbase (‚)' + „ and ‚ Double and single quotation marks on the baseline. + +'\rq' + ' Right (closing) quote. + +'\S' +'\textsection' + § Section sign. + +'\TeX' + The TeX logo. + +'\textasciicircum' + ^ ASCII circumflex. + +'\textasciitilde' + ~ ASCII tilde. + +'\textasteriskcentered' + * Centered asterisk. + +'\textbackslash' + \ Backslash. + +'\textbar' + | Vertical bar. + +'\textbardbl' + ⏸ Double vertical bar. + +'\textbigcircle' + ◯, Big circle symbol. + +'\textbraceleft' + { Left brace. + +'\textbraceright' + } Right brace. + +'\textbullet' + * Bullet. + +'\textcircled{LETTER}' + Ⓐ, Circle around LETTER. + +'\textcompwordmark' +'\textcapitalcompwordmark' +'\textascendercompwordmark' + Used to separate letters that would normally ligature. For + example, 'f\textcompwordmark i' produces 'fi' without a ligature. + This is most useful in non-English languages. The + '\textcapitalcompwordmark' form has the cap height of the font + while the '\textascendercompwordmark' form has the ascender height. + +'\textdagger' + † Dagger. + +'\textdaggerdbl' + ‡ Double dagger. + +'\textdollar (or '\$')' + $ Dollar sign. + +'\textemdash (or '---')' + -- Em-dash. Used for punctuation, usually similar to commas or + parentheses, as in ''The playoffs---if you're lucky enough to make + the playoffs---are more like a sprint.'' Conventions for spacing + around em-dashes vary widely. + +'\textendash (or '--')' + - En-dash. Used for ranges, as in ''see pages 12--14''. + +'\texteuro' + + The Euro currency symbol: €. + + For an alternative glyph design, try the 'eurosym' package; also, + most fonts nowadays come with their own Euro symbol (Unicode + U+20AC). + +'\textexclamdown (or '!`')' + ¡ Upside down exclamation point. + +'\textfiguredash' + Dash used between numerals, Unicode U+2012. Defined in the June + 2021 release of LaTeX. When used in pdfTeX, approximated by an + en-dash; with a Unicode engine, either typesets the glyph if + available in the current font, or writes the usual "Missing + character" warning to the log file. + +'\textgreater' + > Greater than symbol. + +'\texthorizontalbar' + Horizontal bar character, Unicode U+2015. Defined in the June 2021 + release of LaTeX. Behavior as with '\textfiguredash' above; the + pdfTeX approximation is an em-dash. + +'\textless' + < Less than symbol. + +'\textleftarrow' + ←, Left arrow. + +'\textnonbreakinghyphen' + Non-breaking hyphen character, Unicode U+2011. Defined in the June + 2021 release of LaTeX. Behavior as with '\textfiguredash' above; + the pdfTeX approximation is a regular ASCII hyphen (with breaks + disallowed after). + +'\textordfeminine' +'\textordmasculine' + ª, º Feminine and masculine ordinal symbols. + +'\textperiodcentered' + · Centered period. + +'\textquestiondown (or '?`')' + ¿ Upside down question mark. + +'\textquotedblleft (or '``')' + " Double left quote. + +'\textquotedblright (or '''')' + " Double right quote. + +'\textquoteleft (or '`')' + ' Single left quote. + +'\textquoteright (or ''')' + ' Single right quote. + +'\textquotesingle' + ', Straight single quote. (From TS1 encoding.) + +'\textquotestraightbase' +'\textquotestraightdblbase' + Single and double straight quotes on the baseline. + +'\textregistered' + (R) Registered symbol. + +'\textrightarrow' + →, Right arrow. + +'\textthreequartersemdash' + ﹘, "Three-quarters" em-dash, between en-dash and em-dash. + +'\texttrademark' + ™ Trademark symbol. + +'\texttwelveudash' + ﹘, "Two-thirds" em-dash, between en-dash and em-dash. + +'\textunderscore' + _ Underscore. + +'\textvisiblespace' + ␣, Visible space symbol. + + +File: latex2e.info, Node: Accents, Next: Additional Latin letters, Prev: Text symbols, Up: Special insertions + +23.5 Accents +============ + +LaTeX has wide support for many of the world's scripts and languages, +provided through the core 'babel' package, which supports pdfLaTeX, +XeLaTeX and LuaLaTeX. The 'polyglossia' package provides similar +support with the latter two engines. + + This section does not cover that support. It only lists the core +LaTeX commands for creating accented characters. The '\capital...' +commands shown here produce alternative forms for use with capital +letters. These are not available with OT1. + + Below, to make them easier to find, the accents are all illustrated +with lowercase 'o'. + + Note that '\i' produces a dotless i, and '\j' produces a dotless j. +These are often used in place of their dotted counterparts when they are +accented. + +'\"' +'\capitaldieresis' + ö Umlaut (dieresis). + +'\'' +'\capitalacute' + ó Acute accent. + +'\.' + ȯ Dot accent. + +'\=' +'\capitalmacron' + ō Macron (overbar) accent. + +'\^' +'\capitalcircumflex' + ô Circumflex (hat) accent. + +'\`' +'\capitalgrave' + ò Grave accent. + +'\~' +'\capitaltilde' + ñ Tilde accent. + +'\b' + o̲ Bar accent underneath. + + Related to this, '\underbar{TEXT}' produces a bar under TEXT. The + argument is always processed in LR mode (*note Modes::). The bar + is always a fixed position under the baseline, thus crossing + through descenders. See also '\underline' in *note Math + miscellany::. + +'\c' +'\capitalcedilla' + ç Cedilla accent underneath. + +'\d' +'\capitaldotaccent' + ọ Dot accent underneath. + +'\H' +'\capitalhungarumlaut' + ő Long Hungarian umlaut accent. + +'\k' +'\capitalogonek' + ǫ Ogonek. Not available in the OT1 encoding. + +'\r' +'\capitalring' + o̊ Ring accent. + +'\t' +'\capitaltie' +'\newtie' +'\capitalnewtie' + Tie-after accent (used for transliterating from Cyrillic, such as + in the ALA-LC romanization). It expects that the argument has two + characters. The '\newtie' form is centered in its box. + +'\u' +'\capitalbreve' + ŏ Breve accent. + +'\v' +'\capitalcaron' + ǒ Háček (check, caron) accent. + +* Menu: + +* \accent:: Low level command to produce an accented character. + + +File: latex2e.info, Node: \accent, Up: Accents + +23.5.1 '\accent' +---------------- + +Synopsis: + + \accent NUMBER CHARACTER + + A TeX primitive command used to generate accented characters from +accent marks and letters. The accent mark is selected by NUMBER, a +numeric argument, followed by a space and then a CHARACTER argument to +construct the accented character in the current font. + + These are accented 'e' characters. + + \accent18 e + \accent20 e + \accent21 e + \accent22 e + \accent23 e + +The first is a grave, the second a caron, the third a breve, the fourth +a macron, and the fifth a ring above. + + The position of the accent is determined by the font designer and so +the outcome of '\accent' use may differ between fonts. In LaTeX it is +desirable to have glyphs for accented characters rather than building +them using '\accent'. Using glyphs that already contain the accented +characters (as in T1 encoding) allows correct hyphenation whereas +'\accent' disables hyphenation (specifically with OT1 font encoding +where accented glyphs are absent). + + There can be an optional font change between NUMBER and CHARACTER. +Note also that this command sets the '\spacefactor' to 1000 (*note +\spacefactor::). + + An unavoidable characteristic of some Cyrillic letters and the +majority of accented Cyrillic letters is that they must be assembled +from multiple elements (accents, modifiers, etc.) while '\accent' +provides for a single accent mark and a single letter combination. +There are also cases where accents must appear between letters that +\accent does not support. Still other cases exist where the letters I +and J have dots above their lowercase counterparts that conflict with +dotted accent marks. The use of '\accent' in these cases will not work +as it cannot analyze upper/lower case. + + +File: latex2e.info, Node: Additional Latin letters, Next: inputenc package, Prev: Accents, Up: Special insertions + +23.6 Additional Latin letters +============================= + +Here are the basic LaTeX commands for inserting letters beyond A-Z that +extend the Latin alphabet, used primarily in languages other than +English. + +'\aa' +'\AA' + å and Å. + +'\ae' +'\AE' + æ and Æ. + +'\dh' +'\DH' + Icelandic letter eth: ð and Ð. Not available with OT1 encoding, you + need the 'fontenc' package to select an alternate font encoding, + such as T1. + +'\dj' +'\DJ' + Crossed d and D, a.k.a. capital and small letter d with stroke. + Not available with OT1 encoding, you need the 'fontenc' package to + select an alternate font encoding, such as T1. + +'\ij' +'\IJ' + ij and IJ (except somewhat closer together than appears here). + +'\l' +'\L' + ł and Ł. + +'\ng' +'\NG' + Lappish letter eng, also used in phonetics. + +'\o' +'\O' + ø and Ø. + +'\oe' +'\OE' + œ and Œ. + +'\ss' +'\SS' + ß and SS. + +'\th' +'\TH' + Icelandic letter thorn: þ and Þ. Not available with OT1 encoding, + you need the 'fontenc' package to select an alternate font + encoding, such as T1. + + +File: latex2e.info, Node: inputenc package, Next: \rule, Prev: Additional Latin letters, Up: Special insertions + +23.7 'inputenc' package +======================= + +Synopsis: + + \usepackage[ENCODING-NAME]{inputenc} + + Declare the input file's text encoding to be ENCODING-NAME. The +default, if this package is not loaded, is UTF-8. Technically, +specifying the encoding name is optional, but in practice it is not +useful to omit it. + + In a computer file, the characters are stored according to a scheme +called the "encoding". There are many different encodings. The +simplest is ASCII, which supports 95 printable characters, not enough +for most of the world's languages. For instance, to typeset the +a-umlaut character 'ä' in an ASCII-encoded LaTeX source file, the +sequence '\"a' is used. This would make source files for anything but +English hard to read; even for English, often a more extensive encoding +is more convenient. + + The modern encoding standard, in some ways a union of the others, is +UTF-8, one of the representations of Unicode. This is the default for +LaTeX since 2018. + + The 'inputenc' package is how LaTeX knows what encoding is used. For +instance, the following command explicitly says that the input file is +UTF-8 (note the lack of a dash). + + \usepackage[utf8]{inputenc} + + Caution: use 'inputenc' only with the pdfTeX engine (*note TeX +engines::). (The XeTeX and LuaTeX engines assume that the input file is +UTF-8 encoded.) If you invoke LaTeX with either the 'xelatex' command +or the 'lualatex' command, and try to declare a non-UTF-8 encoding with +'inputenc', such as 'latin1', then you will get the error 'inputenc is +not designed for xetex or luatex'. + + An 'inputenc' package error such as 'Invalid UTF-8 byte "96' means +that some of the material in the input file does not follow the encoding +scheme. Often these errors come from copying material from a document +that uses a different encoding than the input file; this one is a left +single quote from a web page using 'latin1' inside a LaTeX input file +that uses UTF-8. The simplest solution is to replace the non-UTF-8 +character with its UTF-8 equivalent, or use a LaTeX equivalent command +or character. + + In some documents, such as a collection of journal articles from a +variety of authors, changing the encoding in mid-document may be +necessary. Use the command '\inputencoding{ENCODING-NAME}'. The most +common values for ENCODING-NAME are: 'ascii', 'latin1', 'latin2', +'latin3', 'latin4', 'latin5', 'latin9', 'latin10', and 'utf8'. + + +File: latex2e.info, Node: \rule, Next: \today, Prev: inputenc package, Up: Special insertions + +23.8 '\rule' +============ + +Synopsis, one of: + + \rule{WIDTH}{THICKNESS} + \rule[RAISE]{WIDTH}{THICKNESS} + + Produce a "rule", a filled-in rectangle. + + This example produces a rectangular blob, sometimes called a Halmos +symbol, or just "qed", often used to mark the end of a proof: + + \newcommand{\qedsymbol}{\rule{0.4em}{2ex}} + +The 'amsthm' package includes this command, with a somewhat +different-looking symbol. + + The mandatory arguments give the horizontal WIDTH and vertical +THICKNESS of the rectangle. They are rigid lengths (*note Lengths::). +The optional argument RAISE is also a rigid length, and tells LaTeX how +much to raise the rule above the baseline, or lower it if the length is +negative. + + This produces a line, a rectangle that is wide but not tall. + + \noindent\rule{\textwidth}{0.4pt} + +The line is the width of the page and 0.4 points tall. This line +thickness is common in LaTeX. + + A rule that has zero width, or zero thickness, will not show up in +the output, but can cause LaTeX to change the output around it. *Note +\strut::, for examples. + + +File: latex2e.info, Node: \today, Prev: \rule, Up: Special insertions + +23.9 '\today' +============= + +Synopsis: + + \today + + Produce today's date in the format 'MONTH DD, YYYY'. An example of a +date in that format is 'July 4, 1976'. + + Multilingual packages such as 'babel' or 'polyglossia', or classes +such as 'lettre', will localize '\today'. For example, the following +will output '4 juillet 1976': + + \year=1976 \month=7 \day=4 + \documentclass{minimal} + \usepackage[french]{babel} + \begin{document} + \today + \end{document} + +'\today' uses the counters '\day', '\month', and '\year' (*note \day & +\month & \year::). + + A number of package on CTAN work with dates. One is 'datetime' +package which can produce a wide variety of date formats, including ISO +standards. + + The date is not updated as the LaTeX process runs, so in principle +the date could be incorrect by the time the program finishes. + + +File: latex2e.info, Node: Splitting the input, Next: Front/back matter, Prev: Special insertions, Up: Top + +24 Splitting the input +********************** + +LaTeX lets you split a large document into several smaller ones. This +can simplify editing or allow multiple authors to work on the document. +It can also speed processing. + + Regardless of how many separate files you use, there is always one +"root file", on which LaTeX compilation starts. This shows such a file +with five included files. + + \documentclass{book} + \includeonly{ % comment out lines below to omit compiling + pref, + chap1, + chap2, + append, + bib + } + \begin{document} + \frontmatter + \include{pref} + \mainmatter + \include{chap1} + \include{chap2} + \appendix + \include{append} + \backmatter + \include{bib} + \end{document} + +This will bring in material from 'pref.tex', 'chap1.tex', 'chap2.tex', +'append.tex', and 'bib.tex'. If you compile this file, and then comment +out all of the lines inside '\includeonly{...}' except for 'chap1', and +compile again, then LaTeX will only process the material in the first +chapter. Thus, your output will appear more quickly and be shorter to +print. However, the advantage of the '\includeonly' command is that +LaTeX will retain the page numbers and all of the cross reference +information from the other parts of the document so these will appear in +your output correctly. + + *Note Larger book template::, for another example of '\includeonly'. + +* Menu: + +* \endinput:: Stop including material from a file. +* \include & \includeonly:: Conditionally include files. +* \input:: Unconditionally include a file. + + +File: latex2e.info, Node: \endinput, Next: \include & \includeonly, Up: Splitting the input + +24.1 '\endinput' +================ + +Synopsis: + + \endinput + + When you '\include{filename}', inside 'filename.tex' the material +after '\endinput' will not be included. This command is optional; if +'filename.tex' has no '\endinput' then LaTeX will read all of the file. + + For example, suppose that a document's root file has '\input{chap1}' +and this is 'chap1.tex'. + + \chapter{One} + This material will appear in the document. + \endinput + This will not appear. + + This can be useful for putting documentation or comments at the end +of a file, or for avoiding junk characters that can be added if the file +is transmitted in the body of an email. It is also useful for +debugging: one strategy to localize errors is to put '\endinput' halfway +through the included file and see if the error disappears. Now, knowing +which half contains the error, moving '\endinput' to halfway through +that area further narrows down the location. This process rapidly finds +the offending line. + + After reading '\endinput', LaTeX continues to read to the end of the +line, so something can follow this command and be read nonetheless. +This allows you, for instance, to close an '\if...' with a '\fi'. + + +File: latex2e.info, Node: \include & \includeonly, Next: \input, Prev: \endinput, Up: Splitting the input + +24.2 '\include' & '\includeonly' +================================ + +Synopsis: + + \includeonly{ % in document preamble + ... + FILENAME, + ... + } + ... + \include{FILENAME} % in document body + + Bring material from the external file 'FILENAME.tex' into a LaTeX +document. + + The '\include' command does three things: it executes '\clearpage' +(*note \clearpage & \cleardoublepage::), then it inputs the material +from 'FILENAME.tex' into the document, and then it does another +'\clearpage'. This command can only appear in the document body. + + The '\includeonly' command controls which files will be read by LaTeX +under subsequent '\include' commands. Its list of filenames is +comma-separated. It must appear in the preamble or even earlier, e.g., +the command line; it can't appear in the document body. + + This example root document, 'constitution.tex', brings in three +files, 'preamble.tex', 'articles.tex', and 'amendments.tex'. + + \documentclass{book} + \includeonly{ + preamble, + articles, + amendments + } + \begin{document} + \include{preamble} + \include{articles} + \include{amendments} + \end{document} + +The file 'preamble.tex' contains no special code; you have just +excerpted the chapter from 'consitution.tex' and put it in a separate +file just for editing convenience. + + \chapter{Preamble} + We the People of the United States, + in Order to form a more perfect Union, ... + +Running LaTeX on 'constitution.tex' makes the material from the three +files appear in the document but also generates the auxiliary files +'preamble.aux', 'articles.aux', and 'amendments.aux'. These contain +information such as page numbers and cross-references (*note Cross +references::). If you now comment out '\includeonly''s lines with +'preamble' and 'amendments' and run LaTeX again then the resulting +document shows only the material from 'articles.tex', not the material +from 'preamble.tex' or 'amendments.tex'. Nonetheless, all of the +auxiliary information from the omitted files is still there, including +the starting page number of the chapter. + + If the document preamble does not have '\includeonly' then LaTeX will +include all the files you call for with '\include' commands. + + The '\include' command makes a new page. To avoid that, see *note +\input:: (which, however, does not retain the auxiliary information). + + *Note Larger book template::, for another example using '\include' +and '\includeonly'. That example also uses '\input' for some material +that will not necessarily start on a new page. + + File names can involve paths. + + \documentclass{book} + \includeonly{ + chapters/chap1, + } + \begin{document} + \include{chapters/chap1} + \end{document} + + To make your document portable across distributions and platforms you +should avoid spaces in the file names. The tradition is to instead use +dashes or underscores. Nevertheless, for the name 'amo amas amat', this +works under TeX Live on GNU/Linux: + + \documentclass{book} + \includeonly{ + "amo\space amas\space amat" + } + \begin{document} + \include{"amo\space amas\space amat"} + \end{document} + + and this works under MiKTeX on Windows: + + \documentclass{book} + \includeonly{ + {"amo amas amat"} + } + \begin{document} + \include{{"amo amas amat"}} + \end{document} + + You cannot use '\include' inside a file that is being included or you +get 'LaTeX Error: \include cannot be nested.' The '\include' command +cannot appear in the document preamble; you will get 'LaTeX Error: +Missing \begin{document}'. + + If a file that you '\include' does not exist, for instance if you +'\include{athiesm}' but you meant '\include{atheism}', then LaTeX does +not give you an error but will warn you 'No file athiesm.tex.' (It will +also create 'athiesm.aux'.) + + If you '\include' the root file in itself then you first get 'LaTeX +Error: Can be used only in preamble.' Later runs get 'TeX capacity +exceeded, sorry [text input levels=15]'. To fix this, you must remove +the inclusion '\include{ROOT}' but also delete the file 'ROOT.aux' and +rerun LaTeX. + + +File: latex2e.info, Node: \input, Prev: \include & \includeonly, Up: Splitting the input + +24.3 '\input' +============= + +Synopsis: + + \input{FILENAME} + + LaTeX processes the file as if its contents were inserted in the +current file. For a more sophisticated inclusion mechanism see *note +\include & \includeonly::. + + If FILENAME does not end in '.tex' then LaTeX first tries the +filename with that extension; this is the usual case. If FILENAME ends +with '.tex' then LaTeX looks for the filename as it is. + + For example, this + + \input{macros} + +will cause LaTeX to first look for 'macros.tex'. If it finds that file +then it processes its contents as thought they had been copy-pasted in. +If there is no file of the name 'macros.tex' then LaTeX tries the name +'macros', without an extension. (This may vary by distribution.) + + To make your document portable across distributions and platforms you +should avoid spaces in the file names. The tradition is to instead use +dashes or underscores. Nevertheless, for the name 'amo amas amat', this +works under TeX Live on GNU/Linux: + + \input{"amo\space amas\space amat"} + + and this works under MiKTeX on Windows: + + \input{{"amo amas amat"}} + + +File: latex2e.info, Node: Front/back matter, Next: Letters, Prev: Splitting the input, Up: Top + +25 Front/back matter +******************** + +* Menu: + +* Table of contents etc.:: Table of contents, list of figures, list of tables. +* Indexes:: Generate an index. +* Glossaries:: Generate a glossary. + + +File: latex2e.info, Node: Table of contents etc., Next: Indexes, Up: Front/back matter + +25.1 Table of contents, list of figures, list of tables +======================================================= + +Synopsis, one of: + + \tableofcontents + \listoffigures + \listoftables + + Produce a table of contents, or list of figures, or list of tables. +Put the command in the input file where you want the table or list to +go. You do not type the entries; for example, typically the table of +contents entries are automatically generated from the sectioning +commands '\chapter', etc. + + This example illustrates the first command, '\tableofcontents'. +LaTeX will produce a table of contents on the book's first page. + + \documentclass{book} + % \setcounter{tocdepth}{1} + \begin{document} + \tableofcontents\newpage + ... + \chapter{...} + ... + \section{...} + ... + \subsection{...} + ... + \end{document} + +Uncommenting the second line would cause that table to contain chapter +and section listings but not subsection listings, because the '\section' +command has level 1. *Note Sectioning::, for level numbers of the +sectioning units. For more on the 'tocdepth' *note +Sectioning/tocdepth::. + + Another example of the use of '\tableofcontents' is in *note Larger +book template::. + + If you want a page break after the table of contents, write a +'\newpage' command after the '\tableofcontents' command, as above. + + To make the table of contents, LaTeX stores the information in an +auxiliary file named 'ROOT-FILE.toc' (*note Splitting the input::). For +example, this LaTeX file 'test.tex' + + \documentclass{article} + \begin{document} + \tableofcontents\newpage + \section{First section} + \subsection{First subsection} + ... + +writes these lines to 'test.toc'. + + \contentsline {section}{\numberline {1}First section}{2} + \contentsline {subsection}{\numberline {1.1}First subsection}{2} + +Each line contains a single command, '\contentsline' (*note +\contentsline::). The first argument, the 'section' or 'subsection', is +the sectioning unit. The second argument has two components. The hook +'\numberline' determines how the sectioning number, '1' or '1.1', +appears in the table of contents (*note \numberline::). The remainder +of the second argument of '\contentsline', 'First section' or 'First +subsection', is the sectioning title text. Finally, the third argument, +'2', is the page number on which this sectioning unit starts. + + To typeset these lines, the document class provides '\l@SECTION-UNIT' +commands such as '\l@section{TEXT}{PAGENUMBER}' and +'\l@subsection{TEXT}{PAGENUMBER}'. These commands often use the +'\@dottedtocline' command (*note \@dottedtocline::). + + A consequence of LaTeX's strategy of using auxiliary files is that to +get the correct information in the document you must run LaTeX twice, +once to store the information and the second time to retrieve it. In +the ordinary course of writing a document authors run LaTeX a number of +times, but you may notice that the first time that you compile a new +document, the table of contents page will be empty except for its +'Contents' header. Just run LaTeX again. + + The commands '\listoffigures' and '\listoftables' produce a list of +figures and a list of tables. Their information is stored in files with +extension '.lof' and '.lot'. They work the same way as +'\tableofcontents' but the latter is more common, so we use it for most +examples. + + You can manually add material to the table of contents, the list of +figures, and the list of tables. For instance, add a line about a +section to the table of contents with +'\addcontentsline{toc}{section}{TEXT}'. (*note \addcontentsline::). +Add arbitrary material, that is, non-line material, with +'\addtocontents', as with the command +'\addtocontents{lof}{\protect\vspace{2ex}}', which adds vertical space +to the list of figures (*note \addtocontents::). + + Lines in the table of contents, the list of figures, and the list of +tables, have four parts. First is an indent. Next is a box into which +sectioning numbers are placed, and then the third box holds the title +text, such as 'First section'. Finally there is a box up against the +right margin, inside of which LaTeX puts the page number box. For the +indent and the width of the number box, *note \@dottedtocline::. The +right margin box has width '\@tocrmarg' and the page number is flush +right in that space, inside a box of width '\@pnumwidth'. By default +'\@tocrmarg' is '2.55em' and '\@pnumwidth' is '1.55em'. Change these as +with '\renewcommand{\@tocrmarg}{3.5em}'. + + CTAN has many packages for the table of contents and lists of figures +and tables (*note CTAN::). The package 'tocloft' is convenient for +adjusting some aspects of the default such as spacing. And, +'tocbibbind' will automatically add the bibliography, index, etc. to +the table of contents. + + To change the header for the table of contents page, do something +like these commands before you call '\tableofcontents', etc. + + \renewcommand{\contentsname}{Table of Contents} + \renewcommand{\listfigurename}{Plots} + \renewcommand{\listtablename}{Specifications} + + + Internationalization packages such as 'babel' or 'polyglossia' will +change these headers depending on the chosen base language. + +* Menu: + +* \@dottedtocline:: Format entry line in table of contents, etc. +* \addcontentsline:: Add an entry to table of contents, etc. +* \addtocontents:: Add text directly to table of contents file, etc. +* \contentsline:: Set line in table of contents, etc. +* \nofiles:: Prevent writing to auxiliary files. +* \numberline:: Put its number argument flush left in a box. + + +File: latex2e.info, Node: \@dottedtocline, Next: \addcontentsline, Up: Table of contents etc. + +25.1.1 '\@dottedtocline' +------------------------ + +Synopsis: + + \@dottedtocline{SECTION-LEVEL-NUM}{INDENT}{NUMWIDTH}{TEXT}{PAGENUMBER} + + Used internally by LaTeX to format an entry line in the table of +contents, list of figures, or list of tables. Authors do not directly +enter '\@dottedtocline' commands. + + This command is typically used by '\l@section', '\l@subsection', +etc., to format the content lines. For example, the 'article.cls' file +contains these definitions: + + \newcommand*\l@section{\@dottedtocline{1}{1.5em}{2.3em}} + \newcommand*\l@subsection{\@dottedtocline{2}{3.8em}{3.2em}} + \newcommand*\l@subsubsection{\@dottedtocline{3}{7.0em}{4.1em}} + +In this example, '\@dottedcline' appears to have been given only three +arguments. But tracing the internal code shows that it picks up the +final TEXT and PAGENUMBER arguments in the synopsis from a call to +'\contentsline' (*note \contentsline::). + + Between the box for the title text of a section and the right margin +box, these '\@dottedtocline' commands insert "leaders", that is, +evenly-spaced dots. The dot-to-dot space is given by the command +'\@dotsep'. By default it is 4.5 (it is in math units, aka. 'mu', which +are '1/18' em. Change it using '\renewcommand', as in +'\renewcommand{\@dotsep}{3.5}'. + + In the standard 'book' class, LaTeX does not use dotted leaders for +the Part and Chapter table entries, and in the standard 'article' class +it does not use dotted leaders for Section entries. + + +File: latex2e.info, Node: \addcontentsline, Next: \addtocontents, Prev: \@dottedtocline, Up: Table of contents etc. + +25.1.2 '\addcontentsline' +------------------------- + +Synopsis: + + \addcontentsline{EXT}{UNIT}{TEXT} + + Add an entry to the auxiliary file with extension EXT. + + The following will result in an 'Appendices' line in the table of +contents. + + \addcontentsline{toc}{section}{\protect\textbf{Appendices}} + +It will appear at the same indentation level as the sections, will be in +boldface, and will be assigned the page number associated with the point +where the command appears in the input file. + + The '\addcontentsline' command writes information to the file +'ROOT-NAME.EXT', where ROOT-NAME is the file name of the root file +(*note Splitting the input::). It writes that information as the text +of the command '\contentsline{UNIT}{TEXT}{NUM}', where 'NUM' is the +current value of counter 'UNIT' (*note \contentsline::). The most +common case is the table of contents and there NUM is the page number of +the first page of UNIT. + + This command is invoked by the sectioning commands '\chapter', etc. +(*note Sectioning::), and also by '\caption' inside a float environment +(*note Floats::). But it is also directly used by authors. For +example, an author writing a book whose style is to have an unnumbered +preface may use the starred '\chapter*'. But that command leaves out +table of contents information, which can be entered manually, as here. + + \chapter*{Preface} + \addcontentsline{toc}{chapter}{\protect\numberline{}Preface} + +In the 'ROOT-NAME.toc' file LaTeX will put the line '\contentsline +{chapter}{\numberline {}Preface}{3}'; note that the page number '3' is +automatically generated by the system, not entered manually. + + All of the arguments for '\addcontentsline' are required. + +EXT + Typically one of the strings 'toc' for the table of contents, 'lof' + for the list of figures, or 'lot' for the list of tables. The + filename extension of the information file. + +UNIT + A string that depends on the value of the EXT argument, typically + one of: + + 'toc' + For the table of contents, this is the name of a sectional + unit: 'part', 'chapter', 'section', 'subsection', etc. + + 'lof' + For the list of figures: 'figure'. + + 'lot' + For the list of tables: 'table'. + +TEXT + The text of the entry. You must '\protect' any fragile commands + (*note \protect::) used in it. + + The '\addcontentsline' command has an interaction with '\include' +(*note \include & \includeonly::). If you use them at the same level, +as with '\addcontentsline{...}{...}{...}\include{...}' then lines in the +table of contents can come out in the wrong order. The solution is to +move '\addcontentsline' into the file being included. + + If you use a UNIT that LaTeX does not recognize, as with the typo +here + + \addcontentsline{toc}{setcion}{\protect\textbf{Appendices}} + +then you don't get an error but the formatting in the table of contents +will not make sense. + + +File: latex2e.info, Node: \addtocontents, Next: \contentsline, Prev: \addcontentsline, Up: Table of contents etc. + +25.1.3 '\addtocontents' +----------------------- + +Synopsis: + + \addtocontents{EXT}{TEXT} + + Add TEXT, which may be text or formatting commands, directly to the +auxiliary file with extension EXT. This is most commonly used for the +table of contents so that is the discussion here, but it also applies to +the list of figures and list of tables. + + This will put some vertical space in the table of contents after the +'Contents' header. + + \tableofcontents\newpage + \addtocontents{toc}{\protect\vspace*{3ex}} + +This puts the word 'Page', in boldface, above the column of page numbers +and after the header. + + \tableofcontents + \addtocontents{toc}{~\hfill\textbf{Page}\par} + \chapter{...} + +This adds a line announcing work by a new author. + + \addtocontents{toc}{% + \protect\vspace{2ex} + \textbf{Chapters by N. Other Author}\par} + + The difference between '\addtocontents' and '\addcontentsline' is +that the latter is strictly for lines, such as with a line giving the +page number for the start of a new subset of the chapters. As the above +examples show, '\addtocontents' is for material such as spacing. + + The '\addtocontents' command has two arguments. Both are required. + +EXT + Typically one of: 'toc' for the table of contents, 'lof' for the + list of figures, or 'lot' for the list of tables. The extension of + the file holding the information. + +TEXT + The text, and possibly commands, to be written. + + The sectioning commands such as '\chapter' use the '\addcontentsline' +command to store information. This command creates lines in the '.toc' +auxiliary file containing the '\contentsline' command (*note +\addcontentsline::). In contrast, the command '\addtocontents' puts +material directly in that file. + + The '\addtocontents' command has an interaction with '\include' +(*note \include & \includeonly::). If you use them at the same level, +as with '\addtocontents{...}{...}\include{...}' then lines in the table +of contents can come out in the wrong order. The solution is to move +'\addtocontents' into the file being included. + + +File: latex2e.info, Node: \contentsline, Next: \nofiles, Prev: \addtocontents, Up: Table of contents etc. + +25.1.4 '\contentsline' +---------------------- + +Synopsis: + + \contentsline{UNIT}{TEXT}{PAGENUMBER} + + Used internally by LaTeX to typeset an entry of the table of +contents, list of figures, or list of tables (*note Table of contents +etc.::). Authors do not directly enter '\contentsline' commands. + + Usually adding material to these lists is done automatically by the +commands '\chapter', '\section', etc. for the table of contents, or by +the '\caption' command inside of a '\figure' or '\table' environment +(*note figure:: and *note table::). Thus, where the root file is +'thesis.tex', and contains the declaration '\tableofcontents', the +command '\chapter{Chapter One}' produces something like this in the file +'thesis.toc'. + + \contentsline {chapter}{\numberline {1}Chapter One}{3} + + If the file contains the declaration '\listoffigures' then a figure +environment involving '\caption{Test}' will produce something like this +in 'thesis.lof'. + + \contentsline {figure}{\numberline {1.1}{\ignorespaces Test}}{6} + + To manually add material, use +'\addcontentsline{FILETYPE}{UNIT}{TEXT}', where FILETYPE is 'toc', +'lof', or 'lot' (*note \addcontentsline::). + + For manipulating how the '\contentline' material is typeset, see the +'tocloft' package. + + Note that the 'hyperref' package changes the definition of +'\contentsline' (and '\addcontentsline') to add more arguments, to make +hyperlinks. This is the source of the error 'Argument of \contentsline +has an extra }' when one adds/remove the use of package 'hyperref' and a +compilation was already run. Fix this error by deleting the '.toc' or +'.lof' or '.lot' file, and running LaTeX again. + + +File: latex2e.info, Node: \nofiles, Next: \numberline, Prev: \contentsline, Up: Table of contents etc. + +25.1.5 '\nofiles' +----------------- + +Synopsis: + + \nofiles + + Prevent LaTeX from writing any auxiliary files. The only output will +be the '.log' and '.pdf' (or '.dvi') files. This command must go in the +preamble. + + Because of the '\nofiles' command this example will not produce a +'.toc' file. + + \documentclass{book} + \nofiles + \begin{document} + \tableofcontents\newpage + \chapter{...} + ... + +LaTeX will not erase any existing auxiliary files, so if you insert the +'\nofiles' command after you have run the file and gotten a '.toc' then +the table of contents page will continue to show the old information. + + +File: latex2e.info, Node: \numberline, Prev: \nofiles, Up: Table of contents etc. + +25.1.6 '\numberline' +-------------------- + +Synopsis: + + \numberline{NUMBER} + + Typeset its argument flush left in a box. This is used in a +'\contentsline' command to typeset the section number (*note +\contentsline::). + + For example, this line in a '.toc' file causes the '1.1' to be +typeset flush left. + + \contentsline {subsection}{\numberline {1.1}Motivation}{2} + + By default, LaTeX typesets the section numbers in a box of length +'\@tempdima'. That length is set by the commands '\l@section', +'\l@subsection', etc. Put section numbers inside a natural-width box +with '\renewcommand{\numberline}[1]{#1~}' before '\tableofcontents'. + + This command is fragile so you may need to precede it with '\protect' +(*note \protect::). An example is the use of '\protect' in this +command, + + \addcontentsline{toc}{section}{\protect\numberline{}Summary} + +to get the '\numberline' into the '\contentsline' command in the '.toc' +file: '\contentsline {section}{\numberline {}Summary}{6}' (the page +number '6' is automatically added by LaTeX; *note \addcontentsline::). + + +File: latex2e.info, Node: Indexes, Next: Glossaries, Prev: Table of contents etc., Up: Front/back matter + +25.2 Indexes +============ + +If you tell LaTeX what terms you want to appear in an index then it can +produce that index, alphabetized and with the page numbers automatically +maintained. This illustrates the basics. + + \documentclass{article} + \usepackage{makeidx} % Provide indexing commands + \makeindex + % \usepackage{showidx} % Show marginal notes of index entries + ... + \begin{document} + ... + Wilson's Theorem\index{Wilson's Theorem} + says that a number $n>1$ is prime if and only if the factorial + of $n-1$ is congruent to $-1$ + modulo~$n$.\index{congruence!and Wilson's Theorem} + ... + \printindex + \end{document} + +As that shows, declare index entries with the '\index' command (*note +\index::). When you run LaTeX, the '\index' writes its information, +such as 'Wilson's Theorem' and the page number, to an auxiliary file +whose name ends in '.idx'. Next, to alphabetize and do other +manipulations, run an external command, typically 'makeindex' (*note +makeindex::), which writes a file whose name ends in '.ind'. Finally, +'\printindex' brings this manipulated information into the output (*note +\printindex::). + + Thus, if the above example is in the file 'numth.tex' then running +'pdflatex numth' will save index entry and page number information to +'numth.idx'. Then running 'makeindex numth' will alphabetize and save +the results to 'numth.ind'. Finally, again running 'pdflatex numth' +will show the desired index, at the place where the '\printindex' +command is in the source file. + + There are many options for the output. An example is that the +exclamation point in '\index{congruence!and Wilson's Theorem}' produces +a main entry of 'congruence' with a subentry of 'and Wilson's Theorem'. +For more, *note makeindex::. + + The '\makeindex' and '\printindex' commands are independent. Leaving +out the '\makeindex' will stop LaTeX from saving the index entries to +the auxiliary file. Leaving out the '\printindex' will cause LaTeX to +not show the index in the document output. + + There are many packages in the area of indexing. The 'showidx' +package causes each index entries to be shown in the margin on the page +where the '\index' appears. This can help in preparing the index. The +'multind' package, among others, supports multiple indexes. See also +the TeX FAQ entry on this topic, , +and the CTAN topic, . + +* Menu: + +* Produce the index manually:: Alphabetize entries yourself. +* \index:: Declare an index entry. +* makeindex:: Alphabetize index entries automatically. +* \printindex:: Put the index here. + + +File: latex2e.info, Node: Produce the index manually, Next: \index, Up: Indexes + +25.2.1 Produce the index manually +--------------------------------- + +Documents that are small and static can have a manually produced index. +This will make a separate page labeled 'Index', in twocolumn format. + + \begin{theindex} + \item acorn squash, 1 + \subitem maple baked, 2 + \indexspace + \item bacon, 3 + \subitem maple baked, 4 + \end{theindex} + + Note that the author must enter the page numbers, which is tedious +and which will result in wrong numbers if the document changes. This is +why in most cases automated methods such as 'makeindex' are best. *Note +Indexes::. + + However we cover the commands for completeness, and because the +automated methods are based on these commands. There are three levels +of entries. As the example shows, a main entry uses '\item', subentries +use '\subitem', and the lowest level uses '\subsubitem'. Blank lines +between entries have no effect. The example above includes +'\indexspace' to produce vertical space in the output that some index +styles use before the first entry starting with a new letter. + + +File: latex2e.info, Node: \index, Next: makeindex, Prev: Produce the index manually, Up: Indexes + +25.2.2 '\index' +--------------- + +Synopsis: + + \index{INDEX-ENTRY-STRING} + + Declare an entry in the index. This command is fragile (*note +\protect::). + + For example, as described in *note Indexes::, one way to get an index +from what's below is to compile the document with 'pdflatex test', then +process the index entries with 'makeindex test', and then compile again +with 'pdflatex test'. + + % file test.tex + ... + W~Ackermann (1896--1962).\index{Ackermann} + ... + Ackermann function\index{Ackermann!function} + ... + rate of growth\index{Ackermann!function!growth rate} + +All three index entries will get a page number, such as 'Ackermann, 22'. +LaTeX will format the second as a subitem of the first, on the line +below it and indented, and the third as a subitem of the second. Three +levels deep is as far as you can nest subentries. (If you add +'\index{Ackermann!function!growth rate!comparison}' then 'makeindex' +says 'Scanning input file test.idx....done (4 entries accepted, 1 +rejected)' and the fourth level is silently missing from the index.) + + If you enter a second '\index' with the same INDEX-ENTRY-STRING then +you will get a single index entry with two page numbers (unless they +happen to fall on the same page). Thus, adding 'as for +Ackermann.\index{Ackermann}' later in the same document as above will +give an index entry like 'Ackermann, 22, 151'. Also, you can enter the +index entries in any order, so for instance '\index{Ackermann!function}' +could come before '\index{Ackermann}'. + + Get a page range in the output, like 'Hilbert, 23--27', as here. + + W~Ackermann (1896--1962).\index{Ackermann} + ... + D~Hilbert (1862--1943)\index{Ackermann!Hilbert|(} + ... + disapproved of his marriage.\index{Ackermann!Hilbert|)} + +If the beginning and ending of the page range are equal then the system +just gives a single page number, not a range. + + If you index subentries but not a main entry, as with +'\index{Jones!program}' and '\index{Jones!results}', then the output is +the item 'Jones' with no comma or page number, followed by two subitems, +like 'program, 50' and 'results, 51'. + + Generate a index entry that says 'see' by using a vertical bar +character: '\index{Ackermann!function|see{P\'eter's function}}'. You +can instead get 'see also' with 'seealso'. (The text 'see' is defined +by '\seename', and 'see also' by '\alsoname'. You can redefine these +either by using an internationalization package such as 'babel' or +'polyglossia', or directly as with '\renewcommand{\alsoname}{Also +see}'.) + + The 'see' feature is part of a more general functionality. After the +vertical bar you can put the name of a one-input command, as in +'\index{group|textit}' (note the missing backslash on the '\textit' +command) and the system will apply that command to the page number, here +giving something like '\textit{7}'. You can define your own one-input +commands, such as '\newcommand{\definedpage}[1]{{\color{blue}#1}}' and +then '\index{Ackermann!function|definedpage}' will give a blue page +number (*note Color::). Another, less practical, example is this, + + \newcommand\indexownpage[1]{#1, \thepage} + ... Epimenides.\index{self-reference|indexownpage} + +which creates an entry citing the page number of its own index listing. + + The two functions just described combine, as here + + \index{Ackermann!function|(definedpage} + ... + \index{Ackermann!function|)} + +which outputs an index entry like 'function, 23--27' where the page +number range is in blue. + + Consider an index entry such as 'α-ring'. Entering it as +'$\alpha$-ring' will cause it to be alphabetized according to the dollar +sign. You can instead enter it using an at-sign, as +'\index{alpha-ring@$\alpha$-ring}'. If you specify an entry with an +at-sign separating two strings, 'POS@TEXT', then POS gives the +alphabetical position of the entry while TEXT produces the text of the +entry. Another example is that '\index{Saint Michael's College@SMC}' +produces an index entry 'SMC' alphabetized into a different location +than its spelling would naturally give it. + + To put a '!', or '@', or '|', or '"' character in an index entry, +escape it by preceding it with a double quote, '"'. (The double quote +gets deleted before alphabetization.) + + A number of packages on CTAN have additional functionality beyond +that provided by 'makeidx'. One is 'index', which allows for multiple +indices and contains a command '\index*{INDEX-ENTRY-STRING}' that prints +the INDEX-ENTRY-STRING as well as indexing it. + + The '\index' command writes the indexing information to the file +'ROOT-NAME.idx' file. Specifically, it writes text of the command +'\indexentry{INDEX-ENTRY-STRING}{PAGE-NUM}', where PAGE-NUM is the value +of the '\thepage' counter. On occasion, when the '\printindex' command +is confused, you have to delete this file to start with a fresh slate. + + If you omit the closing brace of an '\index' command then you get a +message like this. + + Runaway argument? {Ackermann!function + ! Paragraph ended before \@wrindex was complete. + + +File: latex2e.info, Node: makeindex, Next: \printindex, Prev: \index, Up: Indexes + +25.2.3 'makeindex' +------------------ + +Synopsis, one of: + + makeindex FILENAME + makeindex -s STYLE-FILE FILENAME + makeindex OPTIONS FILENAME0 ... + + Sort, and otherwise process, the index information in the auxiliary +file FILENAME. This is a command line program. It takes one or more +raw index files, 'FILENAME.idx' files, and produces the actual index +file, the 'FILENAME.ind' file that is input by '\printindex' (*note +\printindex::). + + The first form of the command suffices for many uses. The second +allows you to format the index by using an "index style file", a '.isty' +file. The third form is the most general; see the full documentation on +CTAN. + + This is a simple '.isty' file. + + % book.isty + % $ makeindex -s book.isty -p odd book.idx + % creates the index as book.ind, starting on an odd page. + preamble + "\\pagestyle{empty} + \\small + \\begin{theindex} + \\thispagestyle{empty}" + + postamble + "\n + \\end{theindex}" + + The description here covers only some of the index formatting +possibilities in STYLE-FILE. For a full list see the documentation on +CTAN. + + A style file consists of a list of pairs: SPECIFIER and ATTRIBUTE. +These can appear in the file in any order. All of the ATTRIBUTEs are +strings, except where noted. Strings are surrounded with double quotes, +'"', and the maximum length of a string is 144 characters. The '\n' is +for a newline and '\t' is for a tab. Backslashes are escaped with +another backslash, '\\'. If a line begins with a percent sign, '%', +then it is a comment. + +'preamble' + Preamble of the output index file. Defines the context in which + the index is formatted. Default: '"\\begin{theindex}\n"'. + +'postamble' + Postamble of the output index file. Default: + '"\n\n\\end{theindex}\n"'. + +'group_skip' + Traditionally index items are broken into groups, typically a group + for entries starting with letter 'a', etc. This specifier gives + what is inserted when a new group begins. Default: '"\n\n + \\indexspace\n"' ('\indexspace' is a command inserting a rubber + length, by default '10pt plus5pt minus3pt'). + +'lethead_flag' + An integer. It governs what is inserted for a new group or letter. + If it is 0 (which is the default) then other than 'group_skip' + nothing will be inserted before the group. If it is positive then + at a new letter the 'lethead_prefix' and 'lethead_suffix' will be + inserted, with that letter in uppercase between them. If it is + negative then what will be inserted is the letter in lowercase. + The default is 0. + +'lethead_prefix' + If a new group begins with a different letter then this is the + prefix inserted before the new letter header. Default: '""' + +'lethead_suffix' + If a group begins with a different letter then this is the suffix + inserted after the new letter header. Default: '""'. + +'item_0' + What is put between two level 0 items. Default: '"\n \\item "'. + +'item_1' + Put between two level 1 items. Default: '"\n \\subitem "'. + +'item_2' + put between two level 2 items. Default: '"\n \\subsubitem "'. + +'item_01' + What is put between a level 0 item and a level 1 item. Default: + '"\n \\subitem "'. + +'item_x1' + What is put between a level 0 item and a level 1 item in the case + that the level 0 item doesn't have any page numbers (as in + '\index{aaa|see{bbb}}'). Default: '"\n \\subitem "'. + +'item_12' + What is put between a level 1 item and a level 2 item. Default: + '"\n \\subsubitem "'. + +'item_x2' + What is put between a level 1 item and a level 2 item, if the + level 1 item doesn't have page numbers. Default: '"\n \\subsubitem + "'. + +'delim_0' + Delimiter put between a level 0 key and its first page number. + Default: a comma followed by a blank, '", "'. + +'delim_1' + Delimiter put between a level 1 key and its first page number. + Default: a comma followed by a blank, '", "'. + +'delim_2' + Delimiter between a level 2 key and its first page number. + Default: a comma followed by a blank, '", "'. + +'delim_n' + Delimiter between two page numbers for the same key (at any level). + Default: a comma followed by a blank, '", "'. + +'delim_r' + What is put between the starting and ending page numbers of a + range. Default: '"--"'. + +'line_max' + An integer. Maximum length of an index entry's line in the output, + beyond which the line wraps. Default: '72'. + +'indent_space' + What is inserted at the start of a wrapped line. Default: + '"\t\t"'. + +'indent_length' + A number. The length of the wrapped line indentation. The default + 'indent_space' is two tabs and each tab is eight spaces so the + default here is '16'. + +'page_precedence' + A document may have pages numbered in different ways. For example, + a book may have front matter pages numbered in lowercase roman + while main matter pages are in arabic. This string specifies the + order in which they will appear in the index. The 'makeindex' + command supports five different types of numerals: lowercase roman + 'r', and numeric or arabic 'n', and lowercase alphabetic 'a', and + uppercase roman 'R', and uppercase alphabetic 'A'. Default: + '"rnaRA"'. + + There are a number of other programs that do the job 'makeindex' +does. One is 'xindy' (), which does +internationalization and can process indexes for documents marked up +using LaTeX and a number of other languages. It is written in Lisp, +highly configurable, both in markup terms and in terms of the collating +order of the text, as described in its documentation. + + A more recent indexing program supporting Unicode is 'xindex', +written in Lua (). + + +File: latex2e.info, Node: \printindex, Prev: makeindex, Up: Indexes + +25.2.4 '\printindex' +-------------------- + +Synopsis: + + \printindex + + Place the index into the output. + + To get an index you must first include +'\usepackage{makeidx}\makeindex' in the document preamble and compile +the document, then run the system command 'makeindex', and then compile +the document again. *Note Indexes::, for further discussion and an +example of the use of '\printindex'. + + +File: latex2e.info, Node: Glossaries, Prev: Indexes, Up: Front/back matter + +25.3 Glossaries +=============== + +Synopsis: + + \usepackage{glossaries} \makeglossaries + ... + \newglossaryentry{LABEL}{SETTINGS} + ... + \gls{LABEL}. + ... + \printglossaries + + The 'glossaries' package allows you to make glossaries, including +multiple glossaries, as well as lists of acronyms. + + To get the output from this example, compile the document (for +instance with 'pdflatex filename'), then run the command line command +'makeglossaries filename', and then compile the document again. + + \documentclass{...} + \usepackage{glossaries} \makeglossaries + \newglossaryentry{tm}{% + name={Turing machine}, + description={A model of a machine that computes. The model is simple + but can compute anything any existing device can compute. + It is the standard model used in Computer Science.}, + } + \begin{document} + Everything begins with the definition of a \gls{tm}. + ... + \printglossaries + \end{document} + +That gives two things. In the main text it outputs '... definition of a +Turing machine'. In addition, in a separate sectional unit headed +'Glossary' there appears a description list. In boldface it says +'Turing machine' and the rest of the item says in normal type 'A model +of a machine ... Computer Science'. + + The command '\makeglossary' opens the file that will contain the +entry information, 'ROOT-FILE.glo'. Put the '\printglossaries' command +where you want the glossaries to appear in your document. + + The 'glossaries' package is very powerful. For instance, besides the +commands '\newglossaryentry' and '\gls', there are similar commands for +a list of acronyms. See the package documentations on CTAN. + +* Menu: + +* \newglossaryentry:: Declare the content of a glossary entry. +* \gls:: Give a page reference for a glossary entry. + + +File: latex2e.info, Node: \newglossaryentry, Next: \gls, Up: Glossaries + +25.3.1 '\newglossaryentry' +-------------------------- + +Synopsis, one of: + + \newglossaryentry{LABEL} + { + name={NAME}, + description={DESCRIPTION}, + OTHER OPTIONS, ... + } + + or + + \longnewglossaryentry{LABEL} + { + name={NAME}, + OTHER OPTIONS ..., + } + {DESCRIPTION} + + Declare a new entry for a glossary. The LABEL must be unique for the +document. The settings associated with the label are pairs: +'KEY=VALUE'. + + This puts the blackboard bold symbol for the real numbers ℝ, in the +glossary. + + \newglossaryentry{R} + { + name={\ensuremath{\mathbb{R}}}, + description={the real numbers}, + } + + Use the second command form if the DESCRIPTION spans more than one +paragraph. + + For a full list of KEYs see the package documentation on CTAN but +here are a few. + +'name' + (Required.) The word, phrase, or symbol that you are defining. + +'description' + (Required.) The description that will appear in the glossary. If + this has more than one paragraph then you must use the second + command form given in the synopsis. + +'plural' + The plural form of NAME. Refer to the plural form using '\glspl' + or '\Glspl' (*note \gls::). + +'sort' + How to place this entry in the list of entries that the glossary + holds. + +'symbol' + A symbol, such as a mathematical symbol, besides the name. + + +File: latex2e.info, Node: \gls, Prev: \newglossaryentry, Up: Glossaries + +25.3.2 '\gls' +------------- + +Synopsis, one of: + + \gls{LABEL} + \glspl{LABEL} + \Gls{LABEL} + \Glspl{LABEL} + + Refer to a glossary entry. The entries are declared with +'\newglossaryentry' (*note \newglossaryentry::). + + This + + \newglossaryentry{N}{% + name={the natural numbers}, + description={The numbers $0$, $1$, $2$, $\ldots$\@}, + symbol={\ensuremath{\mathbb{N}}}, + } + ... + Consider \gls{N}. + +gives the output 'Consider the natural numbers'. + + The second command form '\glspl{LABEL}' produces the plural of NAME +(by default it tries adding an 's'). The third form capitalizes the +first letter of NAME, as does the fourth form, which also takes the +plural. + + +File: latex2e.info, Node: Letters, Next: Input/output, Prev: Front/back matter, Up: Top + +26 Letters +********** + +Synopsis: + + \documentclass{letter} + \address{SENDERS ADDRESS} % return address + \signature{SENDER NAME} + \begin{document} + \begin{letter}{RECIPIENT ADDRESS} + \opening{SALUTATION} + LETTER BODY + \closing{CLOSING TEXT} + \end{letter} + ... + \end{document} + + Produce one or more letters. + + Each letter is in a separate 'letter' environment, whose argument +RECIPIENT ADDRESS often contains multiple lines separated with a double +backslash, ('\\'). For example, you might have: + + \begin{letter}{Ninon de l'Enclos \\ + l'h\^otel Sagonne} + ... + \end{letter} + + The start of the 'letter' environment resets the page number to 1, +and the footnote number to 1 also. + + The SENDER ADDRESS and SENDER NAME are common to all of the letters, +whether there is one or more, so these are best put in the preamble. As +with the recipient address, often SENDER ADDRESS contains multiple lines +separated by a double backslash ('\\'). LaTeX will put the SENDER NAME +under the closing, after a vertical space for the traditional +hand-written signature. + + Each 'letter' environment body begins with a required '\opening' +command such as '\opening{Dear Madam or Sir:}'. The LETTER BODY text is +ordinary LaTeX so it can contain everything from enumerated lists to +displayed math, except that commands such as '\chapter' that make no +sense in a letter are turned off. Each 'letter' environment body +typically ends with a '\closing' command such as '\closing{Yours,}'. + + Additional material may come after the '\closing'. You can say who +is receiving a copy of the letter with a command like '\cc{the Boss \\ +the Boss's Boss}'. There's a similar '\encl' command for a list of +enclosures. And, you can add a postscript with '\ps'. + + LaTeX's default is to indent the sender name and the closing above it +by a length of '\longindentation'. By default this is '0.5\textwidth'. +To make them flush left, put '\setlength{\longindentation}{0em}' in your +preamble. + + To set a fixed date use something like +'\renewcommand{\today}{1958-Oct-12}'. If put in your preamble then it +will apply to all the letters. + + This example shows only one 'letter' environment. The three lines +marked as optional are typically omitted. + + \documentclass{letter} + \address{Sender's street \\ Sender's town} + \signature{Sender's name \\ Sender's title} + % optional: \location{Mailbox 13} + % optional: \telephone{(102) 555-0101} + \begin{document} + \begin{letter}{Recipient's name \\ Recipient's address} + \opening{Sir:} + % optional: \thispagestyle{firstpage} + I am not interested in entering a business arrangement with you. + \closing{Your most humble, etc.,} + \end{letter} + \end{document} + + These commands are used with the 'letter' class. + +* Menu: + +* \address:: Sender's return address. +* \cc:: Carbon copy list. +* \closing:: Saying goodbye. +* \encl:: List of enclosed material. +* \location:: Sender's organizational location. +* \makelabels:: Make address labels. +* \name:: Sender's name, for the return address. +* \opening:: Saying hello. +* \ps:: Adding a postscript. +* \signature:: Sender's signature. +* \telephone:: Sender's phone number. + + +File: latex2e.info, Node: \address, Next: \cc, Up: Letters + +26.1 '\address' +=============== + +Synopsis: + + \address{SENDERS ADDRESS} + + Specify the return address, as it appears on the letter and on the +envelope. Separate multiple lines in SENDERS ADDRESS with a double +backslash, '\\'. + + Because it can apply to multiple letters this declaration is often +put in the preamble. However, it can go anywhere, including inside an +individual 'letter' environment. + + This command is optional: if you do not use it then the letter is +formatted with some blank space on top, for copying onto pre-printed +letterhead paper. If you do use the '\address' declaration then it is +formatted as a personal letter. + + Here is an example. + + \address{Stephen Maturin \\ + The Grapes of the Savoy} + + +File: latex2e.info, Node: \cc, Next: \closing, Prev: \address, Up: Letters + +26.2 '\cc' +========== + +Synopsis: + + \cc{NAME0 \\ + ... } + + Produce a list of names to which copies of the letter were sent. +This command is optional. If it appears then typically it comes after +'\closing'. Put the names on different lines by separating them with a +double backslash, '\\', as in: + + \cc{President \\ + Vice President} + + +File: latex2e.info, Node: \closing, Next: \encl, Prev: \cc, Up: Letters + +26.3 '\closing' +=============== + +Synopsis: + + \closing{TEXT} + + Produce the letter's closing. This is optional, but usual. It +appears at the end of a letter, above a handwritten signature. For +example: + + \closing{Regards,} + + +File: latex2e.info, Node: \encl, Next: \location, Prev: \closing, Up: Letters + +26.4 '\encl' +============ + +Synopsis: + + \encl{FIRST ENCLOSED OBJECT \\ + ... } + + Produce a list of things included with the letter. This command is +optional; when it is used, it typically is put after '\closing'. +Separate multiple lines with a double backslash, '\\'. + + \encl{License \\ + Passport} + + +File: latex2e.info, Node: \location, Next: \makelabels, Prev: \encl, Up: Letters + +26.5 '\location' +================ + +Synopsis: + + \location{TEXT} + + The TEXT appears centered at the bottom of the page. It only appears +if the page style is 'firstpage'. + + +File: latex2e.info, Node: \makelabels, Next: \name, Prev: \location, Up: Letters + +26.6 '\makelabels' +================== + +Synopsis: + + \makelabels % in preamble + + Optional, for a document that contains 'letter' environments. If you +just put '\makelabels' in the preamble then at the end of the document +you will get a sheet with labels for all the recipients, one for each +letter environment, that you can copy to a sheet of peel-off address +labels. + + Customize the labels by redefining the commands '\startlabels', +'\mlabel', and '\returnaddress' (and perhaps '\name') in the preamble. +The command '\startlabels' sets the width, height, number of columns, +etc., of the page onto which the labels are printed. The command +'\mlabel{RETURN ADDRESS}{RECIPIENT ADDRESS}' produces the two labels (or +one, if you choose to ignore the RETURN ADDRESS) for each letter +environment. The first argument, RETURN ADDRESS, is the value returned +by the macro '\returnaddress'. The second argument, RECIPIENT ADDRESS, +is the value passed in the argument to the 'letter' environment. By +default '\mlabel' ignores the first argument, the RETURN ADDRESS, +causing the default behavior described in the prior paragraph. + + This illustrates customization. Its output includes a page with two +columns having two labels each. + + \documentclass{letter} + \renewcommand*{\returnaddress}{Fred McGuilicuddy \\ + Oshkosh, Mineola 12305} + \newcommand*\originalMlabel{} + \let\originalMlabel\mlabel + \def\mlabel#1#2{\originalMlabel{}{#1}\originalMlabel{}{#2}} + \makelabels + ... + \begin{document} + \begin{letter}{A Einstein \\ + 112 Mercer Street \\ + Princeton, New Jersey, USA 08540} + ... + \end{letter} + \begin{letter}{K G\"odel \\ + 145 Linden Lane \\ + Princeton, New Jersey, USA 08540} + ... + \end{letter} + \end{document} + +The first column contains the return address twice. The second column +contains the address for each recipient. + + The package 'envlab' makes formatting the labels easier, with +standard sizes already provided. The preamble lines +'\usepackage[personalenvelope]{envlab}' and '\makelabels' are all that +you need to print envelopes. + + +File: latex2e.info, Node: \name, Next: \opening, Prev: \makelabels, Up: Letters + +26.7 '\name' +============ + +Synopsis: + + \name{NAME} + + Optional. Sender's name, used for printing on the envelope together +with the return address. + + +File: latex2e.info, Node: \opening, Next: \ps, Prev: \name, Up: Letters + +26.8 '\opening' +=============== + +Synopsis: + + \opening{SALUTATION} + + Required. Follows the '\begin{letter}{...}'. The argument +SALUTATION is mandatory. For instance: + + \opening{Dear John:} + + +File: latex2e.info, Node: \ps, Next: \signature, Prev: \opening, Up: Letters + +26.9 '\ps' +========== + +Synopsis: + + \ps{TEXT} + + Add a postscript. This command is optional and usually is used after +'\closing'. + + \ps{P.S. After you have read this letter, burn it. Or eat it.} + + +File: latex2e.info, Node: \signature, Next: \telephone, Prev: \ps, Up: Letters + +26.10 '\signature' +================== + +Synopsis: + + \signature{FIRST LINE \\ + ... } + + The sender's name. This command is optional, although its inclusion +is usual. + + The argument text appears at the end of the letter, after the +closing. LaTeX leaves some vertical space for a handwritten signature. +Separate multiple lines with a double backslash, '\\'. For example: + + \signature{J Fred Muggs \\ + White House} + + LaTeX's default for the vertical space from the '\closing' text down +to the '\signature' text is '6\medskipamount', which is six times +'\medskipamount' (where '\medskipamount' is equal to a '\parskip', which +in turn is defined by default here to 0.7em). + + This command is usually in the preamble, to apply to all the letters +in the document. To have it apply to one letter only, put it inside a +'letter' environment and before the '\closing'. + + You can include a graphic in the signature as here. + + \signature{\vspace{-6\medskipamount}\includegraphics{sig.png}\\ + My name} + +For this you must put '\usepackage{graphicx}' in the preamble (*note +Graphics::). + + +File: latex2e.info, Node: \telephone, Prev: \signature, Up: Letters + +26.11 '\telephone' +================== + +Synopsis: + + \telephone{NUMBER} + + The sender's telephone number. This is typically in the preamble, +where it applies to all letters. This only appears if the 'firstpage' +pagestyle is selected. If so, it appears on the lower right of the +page. + + +File: latex2e.info, Node: Input/output, Next: Command line interface, Prev: Letters, Up: Top + +27 Input/output +*************** + +LaTeX uses the ability to write to a file and later read it back in to +build document components such as a table of contents or index. You can +also read a file that other programs written, or write a file for others +to read. You can communicate with users through the terminal. And, you +can issue instructions for the operating system. + +* Menu: + +* \openin & \openout:: Open a file. +* \read:: Read text from a file. +* \typein:: Read text from the terminal. +* \typeout:: Write text to the terminal. +* \write:: Write text to a file or terminal. + + +File: latex2e.info, Node: \openin & \openout, Next: \read, Up: Input/output + +27.1 '\openin' & '\openout' +=========================== + +Synopsis: + + \openin NUMBER=FILENAME + + or: + + \openout NUMBER=FILENAME + + Open a file for reading material, or for writing it. In most +engines, the NUMBER must be between 0 and 15, as in '\openin3'; in +LuaLaTeX, NUMBER can be between 0 and 127. + + Here TeX opens the file 'presidents.tex' for reading. + + \newread\presidentsfile + \openin\presidentsfile=presidents + \typeout{presidentsfile is \the\presidentsfile} + \read\presidentsfile to\presidentline + \typeout{\presidentline} + +The '\newread' command allocates input stream numbers from 0 to 15 +(there is also a '\newwrite'). The '\presidentsfile' is more memorable +but under the hood it is still a number; the first '\typeout' gives +something like 'presidentsfile is 1'. In addition, '\newread' keeps +track of the allocation so that if you use too many then you get an +error like '! No room for a new \read'. The second '\typeout' gives the +first line of the file, something like '1 Washington, George'. + + Ordinarily TeX will not try to open the file until the next page +shipout. To change this, use '\immediate\openin NUMBER=FILENAME' or +'\immediate\openout NUMBER=FILENAME'. + + Close files with '\closein NUMBER' and '\closeout NUMBER'. + + How LaTeX handles filenames varies among distributions, and even can +vary among versions of a distribution. If the file does not have an +extension then TeX will add a '.tex'. This creates 'presidents.tex', +writes one line to it, and closes it. + + \newwrite\presidentsfile + \openout\presidentsfile=presidents + \write\presidentsfile{1 Washington, George} + \closeout\presidentsfile + +But filenames with a period can cause trouble: if TeX finds a FILENAME +of 'presidents.dat' it could look first for 'presidents.dat.tex' and +later for 'presidents.dat', or it could do the opposite. Your +distribution's documentation should say more, and if you find something +that works for you then you are good, but to ensure complete portability +the best thing is to use file names containing only the twenty six ASCII +letters (not case-sensitive) and the ten digits, along with underscore +and dash, and in particular with no dot or space. + + For '\openin', if TeX cannot find the file then it does not give an +error. It just considers that the stream is not open (test for this +with '\ifeof'; one recourse is the command '\InputIfFileExists', *note +Class and package commands::). If you try to use the same number twice, +LaTeX won't give you an error. If you try to use a bad number then you +get an error message like '! Bad number (16). = l.30 +\openin16=test.jh'. + + +File: latex2e.info, Node: \read, Next: \typein, Prev: \openin & \openout, Up: Input/output + +27.2 '\read' +============ + +Synopsis: + + \read NUMBER toMACRO + + Make the command MACRO contain the next line of input from text +stream NUMBER, as in '\read5 to\data'. + + This opens the file 'email.tex' for reading, puts the contents of the +first line into the command '\email', and then closes the file. + + \newread\recipientfile + \openin\recipientfile=email + \read\recipientfile to\email + \typeout{Email address: \email} + \closein\recipientfile + + If NUMBER is outside the range from 0 to 15 or if no file of that +number is open, or if the file has ended, then '\read' will take input +from the terminal (or exit if interaction is not allowed, e.g., +'\nonstopmode'; *note interaction modes::). (However, the natural way +in LaTeX to take input from the terminal is '\typein' (*note \typein::.) + + To read an entire file as additional LaTeX source, use '\input' +(*note \input::) or '\include' (*note \include & \includeonly::). + + A common reason to want to read from a data file is to do mail +merges. CTAN has a number of packages for that; one is 'datatool'. + + +File: latex2e.info, Node: \typein, Next: \typeout, Prev: \read, Up: Input/output + +27.3 '\typein' +============== + +Synopsis, one of: + + \typein{PROMPT-MSG} + \typein[CMD]{PROMPT-MSG} + + Print PROMPT-MSG on the terminal and cause LaTeX to stop and wait for +you to type a line of input. This line of input ends when you hit the +return key. + + For example, this + + As long as I live I shall never forget \typein{Enter student name:} + +coupled with this command line interaction + + Enter student name: + + \@typein=Aphra Behn + +gives the output '... never forget Aphra Behn'. + + The first command version, '\typein{PROMPT-MSG}', causes the input +you typed to be processed as if it had been included in the input file +in place of the '\typein' command. + + In the second command version the optional argument 'CMD' argument +must be a command name, that is, it must begin with a backslash, \. +This command name is then defined or redefined to be the input that you +typed. For example, this + + \typein[\student]{Enter student name:} + \typeout{Recommendation for \student .} + +gives this output on the command line, + + Enter student name: + + \student=John Dee + Recommendation for John Dee. + +where the user has entered 'John Dee.' + + +File: latex2e.info, Node: \typeout, Next: \write, Prev: \typein, Up: Input/output + +27.4 '\typeout' +=============== + +Synopsis: + + \typeout{MSG} + + Print 'msg' on the terminal and in the 'log' file. + + This + + \newcommand{\student}{John Dee} + \typeout{Recommendation for \student .} + +outputs 'Recommendation for John Dee'. Like what happens here with +'\student', commands that are defined with '\newcommand' or +'\renewcommand' (among others) are replaced by their definitions before +being printed. + + LaTeX's usual rules for treating multiple spaces as a single space +and ignoring spaces after a command name apply to 'msg'. Use the +command '\space' to get a single space, independent of surrounding +spaces. Use '^^J' to get a newline. Get a percent character with +'\csname @percentchar\endcsname'. + + This command can be useful for simple debugging, as here: + + \newlength{\jhlength} + \setlength{\jhlength}{5pt} + \typeout{The length is \the\jhlength.} + +produces on the command line 'The length is 5.0pt'. + + +File: latex2e.info, Node: \write, Prev: \typeout, Up: Input/output + +27.5 '\write' +============= + +Synopsis: + + \write NUMBER{STRING} + + Write STRING to the log file, to the terminal, or to a file opened by +'\openout'. For instance, '\write6' writes to text stream number 6. + + If the following appears in 'BASEFILE.tex' then it opens +'BASEFILE.jh', writes 'Hello World!' and a newline to it, and closes +that file. + + \newwrite\myfile + \immediate\openout\myfile=\jobname.jh % \jobname is root file basename + ... + \immediate\write\myfile{Hello world!} + ... + \immediate\closeout\myfile + +The '\newwrite' allocates a stream number, giving it a symbolic name to +make life easier, so that 'stream \newwrite\myfile\the\myfile' produces +something like 'stream 3'. Then '\openout' associates the stream number +with the given file name. TeX ultimately executed '\write3' which puts +the string in the file. + + Typically NUMBER is between 0 and 15, because typically LaTeX authors +follow the prior example and the number is allocated by the system. If +NUMBER is outside the range from 0 to 15 or if it is not associated with +an open file then LaTeX writes STRING to the log file. If NUMBER is +positive then in addition LaTeX writes STRING to the terminal. + + Thus, 'test \write-1{Hello World!}' puts 'Hello World!' followed by a +newline in the log file. (This is what the '\wlog' command does; *note +\wlog::). And '\write100{Hello World!}' puts the same in the log file +but also puts 'Hello World!' followed by a newline in the terminal +output. (But 16, 17, and 18 are special as NUMBER; see below.) + + In LuaTeX, instead of 16 output streams there are 256 (*note TeX +engines::). + + Use '\write\@auxout{STRING}' to write to the current '.aux' file, +which is associated with either the root file or with the current +include file; and use '\write\@mainaux{STRING}' to write to the main +'.aux'. These symbolic names are defined by LaTeX. + + By default LaTeX does not write STRING to the file right away. This +is because, for example, you may need '\write' to save the current page +number, but when TeX comes across a '\write' it typically does not know +what the page number is, since it has not yet done the page breaking. +So, you use '\write' in one of three contexts: + + \immediate\write\@auxout{STRING} %1 + \write\@auxout{STRING} %2 + \protected@write\@auxout{}{STRING} %3 + + 1. With the first, LaTeX writes STRING to the file immediately. Any + macros in STRING are fully expanded (just as in '\edef') so to + prevent expansion you must use '\noexpand', 'toks', etc., except + that you should use '#' instead of '##'). + + 2. With the second, STRING is stored on the current list of things (as + a TeX "whatsit" item) and kept until the page is shipped out and + likewise the macros are unexpanded until '\shipout'. At + '\shipout', STRING is fully expanded. + + 3. The third, '\protected@write', is like the second except that you + can use '\protect' to avoid expansion. The extra first argument + allows you to locally insert extra definitions to make more macros + protected or to have some other special definition for the write. + + As a simple example of expansion with '\write', STRING here contains +a control sequence '\triplex' which we've defined to be the text 'XYZ': + + \newwrite\jhfile + \openout\jhfile=test.jh + \newcommand{\triplex}{XYZ} + \write\jhfile{test \triplex test} + +This results in the file 'test.jh' containing the text 'test XYZtest' +followed by a newline. + + The cases where NUMBER is 16, 17, or 18 are special. Because of +'\write''s behavior when NUMBER is outside the range from 0 to 15 +described above, in Plain TeX '\write16' and '\write17' were sometimes +used to write to the log file and the terminal; however, in LaTeX, the +natural way to do that is with '\typeout' (*note \typeout::). The +'\write18' command is even more special; modern TeX systems use it for +giving commands to the operating system (*note \write18::). + + Ordinarily '\write' outputs a single line. You can include a newline +with '^^J'. Thus, this produces two lines in the log file: + + \wlog{Parallel lines have a lot in common.^^JBut they never meet.} + + A common case where authors need to write their own file is for +answers to exercises, or another situation where you want to write out +verbatim, without expanding the macros. CTAN has a number of packages +for this; one is 'answers'. + +* Menu: + +* \write and security:: Security. +* \message:: Write text to the log file and terminal. +* \wlog:: Write text to the log file. +* \write18:: Run an operating system command. + + +File: latex2e.info, Node: \write and security, Next: \message, Up: \write + +27.5.1 '\write' and security +---------------------------- + +The ability to write files raises security issues. If you compiled a +downloaded LaTeX file and it overwrote your password file then you would +be justifiably troubled. + + Thus, by default TeX systems only allow you to open files for writing +that are in the current directory or output directory, if specified +(*note output directory::), or in a subdirectory of those. So, this +code + + \newwrite\jhfile + \openout\jhfile=../test.jh + +gives an error like: + Not writing to ../test.jh (openout_any = p). + ! I can't write on file `../test.jh' + + You can get just such an error when using commands such as +'\include{../filename}' because LaTeX will try to open +'../filename.aux'. The simplest solution is to put the included files +in the same directory as the root file, or in subdirectories. + + +File: latex2e.info, Node: \message, Next: \wlog, Prev: \write and security, Up: \write + +27.5.2 '\message' +----------------- + +Synopsis: + + \message{STRING} + + Write STRING to the log file and the terminal. + + Typically, LaTeX authors use '\typeout' (*note \typeout::). It +allows you to use '\protect' on any fragile commands in STRING (*note +\protect::). But '\typeout' always inserts a newline at the end of +STRING while '\message' does not, so the latter can be useful. + + With this example document body. + + before\message{One Two}\message{Three}\message{Four^^JI} + \message{declare a thumb war.}After + +under some circumstances (see below) LaTeX writes the following to both +the terminal and the log file. + + One Two Three Four + I declare a thumb war. + +The '^^J' produces a newline. Also, in the output document, between +'before' and 'After' will be a single space (from the end of line +following 'I}'). + + While '\message' allows you more control over formatting, a gotcha is +that LaTeX may mess up that formatting because it inserts line breaks +depending on what it has already written. Contrast this document body, +where the 'Two' has moved, to the one given above. + + before\message{One}\message{Two Three}\message{Four^^JI} + \message{declare a thumb war.}After + + This can happen: when LaTeX is outputting the messages to the +terminal, now the message with 'One' is shorter and it fits at the end +of the output terminal line, and so LaTeX breaks the line between it and +the 'Two Three'. That line break appears also in the log file. This +line break insertion can depend on, for instance, the length of the full +path names of included files. So producing finely-formatted lines in a +way that is portable is hard, likely requiring starting your message at +the beginning of a line. + + +File: latex2e.info, Node: \wlog, Next: \write18, Prev: \message, Up: \write + +27.5.3 '\wlog' +-------------- + +Synopsis: + + \wlog{STRING} + + Write STRING to the log file. + + \wlog{Did you hear about the mathematician who hates negatives?} + \wlog{He'll stop at nothing to avoid them.} + + Ordinarily STRING appears in a single separate line. Use '^^J' to +insert a newline. + + \wlog{Helvetica and Times Roman walk into a bar.} + \wlog{The barman says,^^JWe don't serve your type.} + + +File: latex2e.info, Node: \write18, Prev: \wlog, Up: \write + +27.5.4 '\write18' +----------------- + +Synopsis: + + \write18{SHELL_COMMAND} + + Issue a command to the operating system shell. The operating system +runs the command and LaTeX's execution is blocked until that finishes. + + This sequence (on Unix) + + \usepackage{graphicx} % in preamble + ... + \newcommand{\fignum}{1} + \immediate\write18{cd pix && asy figure\fignum} + \includegraphics{pix\figure\fignum.pdf} + +will run Asymptote (the 'asy' program) on 'pix/figure1.asy', so that the +document can later read in the resulting graphic (*note +\includegraphics::). Like any '\write', here LaTeX expands macros in +SHELL_COMMAND so that '\fignum' is replaced by '1'. + + Another example is that you can automatically run BibTeX at the start +of each LaTeX run (*note Using BibTeX::) by including +'\immediate\write18{bibtex8 \jobname}' as the first line of the file. +Note that '\jobname' expands to the basename of the root file unless the +'--jobname' option is passed on the command line, in which case this is +the option argument. + + You sometimes need to do a multi-step process to get the information +that you want. This will insert into the input a list of all PDF files +in the current directory (but see 'texosquery' below): + + \immediate\write18{ls *.pdf > tmp.dat} + \input{tmp.dat} + + The standard behavior of any '\write' is to wait until a page is +being shipped out before expanding the macros or writing to the stream +(*note \write::). But sometimes you want it done now. For this, use +'\immediate\write18{SHELL_COMMAND}'. + + There are obvious security issues with allowing system commands +inside a LaTeX file. If you download a file off the net and it contains +commands to delete all your files then you would be unhappy. The +standard settings in modern distributions turn off full shell access. +You can turn it on, if you are sure the shell commands are safe, by +compiling with 'latex --enable-write18 FILENAME' (*note Command line +options::). (The '--shell-escape' option is a synonym, in TeX Live.) + + In the place of full shell access, modern distributions by default +use a restricted version that allows some commands to work, such as +those that run Metafont to generate missing fonts, even if you do not +use the 'enable-write18' option. By default this list of allowed +commands is short and features only commands that are under the control +of the distribution maintainers (*note Command line options::). + + The SHELL_COMMAND text is always passed to '/bin/sh' on Unix-like +operating systems, and the DOS command interpreter 'cmd.exe' on Windows. +Any different shell set by the user, and the 'SHELL' environment +variable, is ignored. + + If what you need is system information, such as the operating system +name, locale information, or directory contents, take a look at the +'texosquery' package, which provides a convenient and secure interface +for this, unlike the above examples using the raw '\write18': +. + + LaTeX provides a package 'shellesc' on top of the primitive +'\write18' command. Its primary purpose is to provide a command +'\ShellEscape' which works identically on all TeX engines; LuaTeX +intentionally did not retain '\write18' as a way to invoke a shell +command, so some engine-specific code is needed. The 'shellesc' package +also provides a command '\DelayedShellEscape', executed at '\output' +time, for the same reason. + + +File: latex2e.info, Node: Command line interface, Next: Document templates, Prev: Input/output, Up: Top + +28 Command line interface +************************* + +Synopsis (from a terminal command line): + + pdflatex OPTIONS ARGUMENT + + Run LaTeX on ARGUMENT. In place of 'pdflatex' you can also use (for +PDF output) 'xelatex' or 'lualatex', or (for DVI output) 'latex' or +'dvilualatex', among others (*note TeX engines::). + + For example, this will run LaTeX on the file 'thesis.tex', creating +the output 'thesis.pdf'. + + pdflatex thesis + +Note that '.tex' is the default file name extension. + + pdfTeX is an extension of the original TeX program, as are XeTeX and +LuaTeX (*note TeX engines::). The first two are completely backward +compatible and the latter, almost so. Perhaps the most fundamental new +feature for all three is that the original TeX output its own DVI +format, while the newer ones can output directly to PDF. This allows +them to take advantage of the extra features in PDF such as hyperlinks, +support for modern image formats such as JPG and PNG, and ubiquitous +viewing programs. In short, if you run 'pdflatex' or 'xelatex' or +'lualatex' then you will by default get PDF and have access to all its +modern features. If you run 'latex', or 'dvilualatex', then you will +get DVI. The description here assumes pdfLaTeX. + + *Note Command line options::, for a selection of the most useful +command line options. As to ARGUMENT, the usual case is that it does +not begin with a backslash, so the system takes it to be the name of a +file and it compiles that file. If ARGUMENT begins with a backslash +then the system will interpret it as a line of LaTeX input, which can be +used for special effects (*note Command line input::). + + If you gave no arguments or options then 'pdflatex' prompts for input +from the terminal. You can escape from this by entering 'CTRL-D'. + + If LaTeX finds an error in your document then by default it stops and +asks you about it. *Note Recovering from errors::, for an outline of +what to do. + +* Menu: + +* Command line options:: Commonly used command line options. +* Command line input:: Specify LaTeX code on the command line. +* Jobname:: How TeX sets the current job name. +* Recovering from errors:: When something goes wrong. + + +File: latex2e.info, Node: Command line options, Next: Command line input, Up: Command line interface + +28.1 Command line options +========================= + +These are the command-line options relevant to ordinary document +authoring. For a full list, try running 'latex --help' from the command +line. + + With many implementations you can specify command line options by +prefixing them with '-' or '--'. This is the case for both TeX Live +(including MacTeX) and MiKTeX. We will use both conventions +interchangeably. If an option takes a value, it can be specified either +as a separate argument ('--foo val'), or as one argument with an '=' +sign ('--foo=val'), but there can be no spaces around the '='. We will +generally use the '=' syntax. + +'-version' + Show the current version, like 'pdfTeX 3.14159265-2.6-1.40.16 (TeX + Live 2015/Debian)' along with a small amount of additional + information, and exit. + +'-help' + Give a brief usage message that is useful as a prompt and exit. + +'-interaction=MODE' + TeX compiles a document in one of four interaction modes: + 'batchmode', 'nonstopmode', 'scrollmode', 'errorstopmode'. In + "errorstopmode" (the default), TeX stops at each error and asks for + user intervention. In "batchmode" it prints nothing on the + terminal, errors are scrolled as if the user hit 'RETURN' at every + error, and missing files cause the job to abort. In "nonstopmode", + diagnostic message appear on the terminal but as in batch mode + there is no user interaction. In "scrollmode", TeX stops for + missing files or keyboard input, but nothing else. + + For instance, starting LaTeX with this command line + + pdflatex -interaction=batchmode FILENAME + + eliminates most terminal output. + +'-jobname=STRING' + Set the value of TeX's "jobname" to the string. The log file and + output file will then be named 'STRING.log' and 'STRING.pdf'. + *note Jobname::. + +'-output-directory=DIRECTORY' + Write files in the directory DIRECTORY. It must already exist. + This applies to all external files created by TeX or LaTeX, such as + the '.log' file for the run, the '.aux', '.toc', etc., files + created by LaTeX, as well as the main '.pdf' or '.dvi' output file + itself. + + When specified, the output directory DIRECTORY is also + automatically checked first for any file that it is input, so that + the external files can be read back in, if desired. The true + current directory (in which LaTeX was run) remains unchanged, and + is also checked for input files. + +'--enable-write18' +'--disable-write18' +'--shell-escape' +'--no-shell-escape' + Enable or disable '\write18{SHELL_COMMAND}' (*note \write18::). + The first two options are supported by both TeX Live and MiKTeX, + while the second two are synonyms supported by TeX Live. + + Enabling this functionality has major security implications, since + it allows a LaTeX file to run any command whatsoever. Thus, by + default, unrestricted '\write18' is not allowed. (The default for + TeX Live, MacTeX, and MiKTeX is to allow the execution of a limited + number of TeX-related programs, which they distribute.) + + For example, if you invoke LaTeX with the option 'no-shell-escape', + and in your document you call '\write18{ls -l}', then you do not + get an error but the log file says 'runsystem(ls -l)...disabled'. + +'-halt-on-error' + Stop processing at the first error. + +'-file-line-error' +'-no-file-line-error' + Enable or disable 'FILENAME:LINENO:ERROR'-style error messages. + These are only available with TeX Live or MacTeX. + + +File: latex2e.info, Node: Command line input, Next: Jobname, Prev: Command line options, Up: Command line interface + +28.2 Command line input +======================= + +As part of the command line invocation + + LATEX-ENGINE OPTIONS ARGUMENT + +you can specify arbitrary LaTeX input by starting ARGUMENT with a +backslash. (All the engines support this.) This allows you to do some +special effects. + + For example, this file (which uses the 'hyperref' package for +hyperlinks) can produce two kinds of output, one to be read on physical +paper and one to be read online. + + \ifdefined\paperversion % in preamble + \newcommand{\urlcolor}{black} + \else + \newcommand{\urlcolor}{blue} + \fi + \usepackage[colorlinks=true,urlcolor=\urlcolor]{hyperref} + ... + \href{https://www.ctan.org}{CTAN} % in body + ... + +Compiling this document 'book.tex' with the command line 'pdflatex book' +will give the 'CTAN' link in blue. But compiling it with + + pdflatex "\def\paperversion{}\input book.tex" + +has the link in black. We use double quotes to prevent interpretation +of the symbols by the command line shell. (This usually works on both +Unix and Windows systems, but there are many peculiarities to shell +quoting, so read your system documentation if need be.) + + In a similar way, from the single file 'main.tex' you can compile two +different versions. + + pdflatex -jobname=students "\def\student{}\input{main}" + pdflatex -jobname=teachers "\def\teachers{}\input{main}" + +The 'jobname' option is there because otherwise both files would be +called 'main.pdf' and the second would overwrite the first. (*note +Jobname::.) + + In this example we use the command line to select which parts of a +document to include. For a book named 'mybook.tex' and structured like +this. + + \documentclass{book} + \begin{document} + ... + \include{chap1} + \include{chap2} + ... + \end{document} + +the command line + + pdflatex "\includeonly{chap1}\input{mybook}" + +will give output that has the first chapter but no other chapter. *Note +Splitting the input::. + + +File: latex2e.info, Node: Jobname, Next: Recovering from errors, Prev: Command line input, Up: Command line interface + +28.3 Jobname +============ + +Running LaTeX creates a number of files, including the main PDF (or DVI) +output but also including others. These files are named with the +so-called "jobname". The most common case is also the simplest, where +for instance the command 'pdflatex thesis' creates 'thesis.pdf' and also +'thesis.log' and 'thesis.aux'. Here the job name is 'thesis'. + + In general, LaTeX is invoked as 'LATEX-ENGINE OPTIONS ARGUMENT', +where LATEX-ENGINE is 'pdflatex', 'lualatex', etc. (*note TeX +engines::). If ARGUMENT does not start with a backslash, as is the case +above with 'thesis', then TeX considers it to be the name of the file to +input as the main document. This file is referred to as the "root file" +(*note Splitting the input::, and *note \input::). The name of that +root file, without the '.tex' extension if any, is the jobname. If +ARGUMENT does start with a backslash, or if TeX is in interactive mode, +then it waits for the first '\input' command, and the jobname is the +argument to '\input'. + + There are two more possibilities for the jobname. It can be directly +specified with the '-jobname' option, as in 'pdflatex -jobname=myname' +(*note Command line input:: for a real example). + + The final possibility is 'texput', which is the final fallback +default if no other name is available to TeX. That is, if no '-jobname' +option was specified, and the compilation stops before any input file is +met, then the log file will be named 'texput.log'. + + A special case of this is that in LaTeX versions of (approximately) +2020 or later, the jobname is also 'texput' if the first '\input' occurs +as a result of being called by either '\documentclass' or +'\RequirePackage'. So this will produce a file named 'texput.pdf': + + pdflatex "\documentclass{minimal}\begin{document}Hello!\end{document}" + + However, this special case only applies to those two commands. Thus, +with + + pdflatex "\documentclass{article}\usepackage{lipsum}\input{thesis}" + +the output file is 'lipsum.pdf', as '\usepackage' calls '\input'. + + Within the document, the macro '\jobname' expands to the jobname. +(When you run LaTeX on a file whose name contains spaces, the string +returned by '\jobname' contains matching start and end quotes.) In the +expansion of that macro, all characters are of catcode 12 (other) except +that spaces are category 10, including letters that are normally +catcode 11. + + Because of this catcode situation, using the jobname in a conditional +can become complicated. One solution is to use the macro '\IfBeginWith' +from the 'xstring' package in its star variant, which is insensitive to +catcode. For example, in the following text the footnote "Including +Respublica Bananensis Francorum." is only present if the task name +starts with 'my-doc'. + + If a democracy is just a regime where citizens vote then + all banana republics \IfBeginWith*{\jobname}{my-doc}% + {\footnote{Including Respublica Bananensis Francorum.}}{} are + democracies. + + Manipulating the value of '\jobname' inside of a document does not +change the name of the output file or the log file. + + +File: latex2e.info, Node: Recovering from errors, Prev: Jobname, Up: Command line interface + +28.4 Recovering from errors +=========================== + +If LaTeX finds an error in your document then it gives you an error +message and prompts you with a question mark, '?'. For instance, +running LaTeX on this file + + \newcommand{\NP}{\ensuremath{\textbf{NP}}} + The \PN{} problem is a million dollar one. + +causes it show this, and wait for input. + + ! Undefined control sequence. + l.5 The \PN + {} problem is a million dollar one. + ? + +The simplest thing is to enter 'x' and 'RETURN' and fix the typo. You +could instead enter '?' and 'RETURN' to see other options. + + There are two other error scenarios. The first is that you forgot to +include the '\end{document}' or misspelled it. In this case LaTeX gives +you a '*' prompt. You can get back to the command line by typing +'\stop' and 'RETURN'; this command does its best to exit LaTeX +immediately, whatever state it may be in. + + The last scenario is that you mistyped the filename. For instance, +instead of 'pdflatex test' you might type 'pdflatex tste'. + + ! I can't find file `tste'. + <*> tste + + (Press Enter to retry, or Control-D to exit) + Please type another input file name: + +The simplest thing is to enter 'CTRL d' (holding the Control and d keys +down at the same time), and then retype the correct command line. + + +File: latex2e.info, Node: Document templates, Next: Index, Prev: Command line interface, Up: Top + +Appendix A Document templates +***************************** + +Although illustrative material, perhaps these document templates will be +useful. Additional template resources are listed at +. + +* Menu: + +* beamer template:: +* article template:: +* book template:: +* Larger book template:: + + +File: latex2e.info, Node: beamer template, Next: article template, Up: Document templates + +A.1 'beamer' template +===================== + +The 'beamer' class creates presentation slides. It has a vast array of +features, but here is a basic template: + +\documentclass{beamer} + +\title{Beamer Class template} +\author{Alex Author} +\date{July 31, 2020} + +\begin{document} + +\maketitle + +% without [fragile], any {verbatim} code gets mysterious errors. +\begin{frame}[fragile] + \frametitle{First Slide} + +\begin{verbatim} + This is \verbatim! +\end{verbatim} + +\end{frame} + +\end{document} + + The Beamer package on CTAN: . + + +File: latex2e.info, Node: article template, Next: book template, Prev: beamer template, Up: Document templates + +A.2 'article' template +====================== + +A simple template for an article. + +\documentclass{article} +\title{Article Class Template} +\author{Alex Author} + +\begin{document} +\maketitle + +\section{First section} +Some text. + +\subsection{First section, first subsection} +Additional text. + +\section{Second section} +Some more text. + +\end{document} + + +File: latex2e.info, Node: book template, Next: Larger book template, Prev: article template, Up: Document templates + +A.3 'book' template +=================== + +This is a straightforward template for a book. *Note Larger book +template::, for a more elaborate one. + +\documentclass{book} +\title{Book Class Template} +\author{Alex Author} + +\begin{document} +\maketitle + +\chapter{First} +Some text. + +\chapter{Second} +Some other text. + +\section{A subtopic} +The end. + +\end{document} + + +File: latex2e.info, Node: Larger book template, Prev: book template, Up: Document templates + +A.4 Larger 'book' template +========================== + +This is a somewhat elaborate template for a book. *Note book +template::, for a simpler one. + + This template uses '\frontmatter', '\mainmatter', and '\backmatter' +to control the typography of the three main areas of a book (*note +\frontmatter & \mainmatter & \backmatter::). The book has a +bibliography and an index. + + Also notable is that it uses '\include' and '\includeonly' (*note +Splitting the input::). While you are working on a chapter you can +comment out all the other chapter entries from the argument to +'\includeonly'. That will speed up compilation without losing any +information such as cross-references. (Material that does not need to +come on a new page is brought in with '\input' instead of '\include'. +You don't get the cross-reference benefit with '\input'.) + +\documentclass[titlepage]{book} +\usepackage{makeidx}\makeindex + +\title{Book Class Template} +\author{Alex Author} + +\includeonly{% +% frontcover, + preface, + chap1, +% appenA, + } + +\begin{document} +\frontmatter +\include{frontcover} + % maybe comment out while drafting: +\maketitle \input{dedication} \input{copyright} +\tableofcontents +\include{preface} + +\mainmatter +\include{chap1} +... +\appendix +\include{appenA} +... + +\backmatter +\bibliographystyle{apalike} +\addcontentsline{toc}{chapter}{Bibliography} +\bibliography +\addcontentsline{toc}{chapter}{Index} +\printindex + +\include{backcover} +\end{document} + + +File: latex2e.info, Node: Index, Prev: Document templates, Up: Top + +Index +***** + +[index] +* Menu: + +* $ inline math: math. (line 12) +* $$...$$ plain TeX display math: displaymath. (line 32) +* & for table cells: tabular. (line 32) +* * prompt: Recovering from errors. + (line 23) +* *, to \nocite all keys: Using BibTeX. (line 61) +* *-form of environment commands: \newenvironment & \renewenvironment. + (line 27) +* *-form of sectioning commands: Sectioning. (line 31) +* *-form, defining new commands: \newcommand & \renewcommand. + (line 27) +* --disable-write18 command-line option: Command line options. + (line 61) +* --enable-write18 command-line option: Command line options. + (line 61) +* --file-line-error command-line option: Command line options. + (line 82) +* --halt-on-error command-line option: Command line options. + (line 79) +* --help command-line option: Command line options. + (line 23) +* --interaction command-line option: Command line options. + (line 26) +* --jobname command-line option: Command line options. + (line 43) +* --no-file-line-error command-line option: Command line options. + (line 82) +* --no-shell-escape command-line option: Command line options. + (line 61) +* --output-directory command-line option: Command line options. + (line 48) +* --shell-escape command-line option: Command line options. + (line 61) +* --version command-line option: Command line options. + (line 18) +* -1, write stream number: \write. (line 30) +* .aux file: Output files. (line 33) +* .aux file and BibTeX commands: BibTeX error messages. + (line 6) +* .dvi file: Output files. (line 10) +* .fmt file: TeX engines. (line 6) +* .glo file: Glossaries. (line 43) +* .idx file: Indexes. (line 6) +* .idx file <1>: makeindex. (line 6) +* .ind file: makeindex. (line 6) +* .isty file: makeindex. (line 18) +* .lof file: Output files. (line 43) +* .lof file <1>: Table of contents etc.. + (line 6) +* .lof file <2>: \contentsline. (line 6) +* .log file: Output files. (line 28) +* .lot file: Output files. (line 43) +* .lot file <1>: Table of contents etc.. + (line 6) +* .lot file <2>: \contentsline. (line 6) +* .pdf file: Output files. (line 20) +* .tex, default extension: Command line interface. + (line 19) +* .toc file: Output files. (line 43) +* .toc file <1>: Table of contents etc.. + (line 6) +* .toc file <2>: \contentsline. (line 6) +* .xdv file: TeX engines. (line 42) +* /bin/sh, used by \write18: \write18. (line 60) +* 10pt option: Document class options. + (line 14) +* 11pt option: Document class options. + (line 14) +* 12pt option: Document class options. + (line 14) +* : for math: Colon character & \colon. + (line 6) +* [...] for optional arguments: LaTeX command syntax. + (line 6) +* \ (backslash-space): \(SPACE). (line 6) +* \ character starting commands: LaTeX command syntax. + (line 6) +* \!: Spacing in math mode. + (line 61) +* \" (umlaut accent): Accents. (line 25) +* \#: Reserved characters. (line 13) +* \$: Reserved characters. (line 13) +* \%: Reserved characters. (line 13) +* \&: Reserved characters. (line 13) +* \' (acute accent): Accents. (line 29) +* \' (tabbing): tabbing. (line 87) +* \(...\) inline math: math. (line 12) +* \*: \*. (line 6) +* \+: tabbing. (line 79) +* \,: Spacing in math mode. + (line 47) +* \-: tabbing. (line 83) +* \- (hyphenation): \- (hyphenation). (line 6) +* \. (dot-over accent): Accents. (line 32) +* \/: \/. (line 6) +* \:: Spacing in math mode. + (line 36) +* \;: Spacing in math mode. + (line 23) +* \<: tabbing. (line 75) +* \= (macron accent): Accents. (line 36) +* \= (tabbing): tabbing. (line 69) +* \>: tabbing. (line 73) +* \> <1>: Spacing in math mode. + (line 36) +* \> (tabbing): tabbing. (line 72) +* \@: \@. (line 6) +* \@auxout: \write. (line 45) +* \@beginparpenalty: list. (line 243) +* \@dottedtocline: \@dottedtocline. (line 6) +* \@endparpenalty: list. (line 251) +* \@fnsymbol: \footnote. (line 24) +* \@ifstar: \@ifstar. (line 6) +* \@itempenalty: list. (line 247) +* \@mainaux: \write. (line 45) +* \@startsection: \@startsection. (line 6) +* \a (tabbing): tabbing. (line 102) +* \a' (acute accent in tabbing): tabbing. (line 103) +* \a= (macron accent in tabbing): tabbing. (line 103) +* \aa (å): Additional Latin letters. + (line 12) +* \AA (Å): Additional Latin letters. + (line 12) +* \accent: \accent. (line 6) +* \acute: Math accents. (line 10) +* \addcontentsline: \addcontentsline. (line 6) +* \address: \address. (line 6) +* \addtocontents{EXT}{TEXT}: \addtocontents. (line 6) +* \addtocounter: \addtocounter. (line 6) +* \addtolength: \addtolength. (line 6) +* \addvspace: \addvspace. (line 6) +* \ae (æ): Additional Latin letters. + (line 16) +* \AE (Æ): Additional Latin letters. + (line 16) +* \aleph: Math symbols. (line 21) +* \Alph example: enumerate. (line 61) +* \alpha: Math symbols. (line 24) +* \alph{COUNTER}: \alph \Alph \arabic \roman \Roman \fnsymbol. + (line 14) +* \Alph{COUNTER}: \alph \Alph \arabic \roman \Roman \fnsymbol. + (line 19) +* \alsoname: \index. (line 58) +* \amalg: Math symbols. (line 27) +* \and for \author: \maketitle. (line 43) +* \angle: Math symbols. (line 30) +* \appendix: \appendix. (line 6) +* \approx: Math symbols. (line 34) +* \arabic{COUNTER}: \alph \Alph \arabic \roman \Roman \fnsymbol. + (line 24) +* \arccos: Math functions. (line 9) +* \arcsin: Math functions. (line 12) +* \arctan: Math functions. (line 15) +* \arg: Math functions. (line 18) +* \arraycolsep: array. (line 51) +* \arrayrulewidth: tabular. (line 157) +* \arraystretch: tabular. (line 163) +* \ast: Math symbols. (line 37) +* \asymp: Math symbols. (line 44) +* \AtBeginDocument: \AtBeginDocument. (line 6) +* \AtBeginDvi: Class and package commands. + (line 9) +* \AtEndDocument: \AtEndDocument. (line 6) +* \AtEndOfClass: Class and package commands. + (line 14) +* \AtEndOfPackage: Class and package commands. + (line 14) +* \author{NAME1 \and NAME2 \and ...}: \maketitle. (line 42) +* \a` (grave accent in tabbing): tabbing. (line 103) +* \b (bar-under accent): Accents. (line 51) +* \backmatter: \frontmatter & \mainmatter & \backmatter. + (line 6) +* \backslash: Math symbols. (line 47) +* \bar: Math accents. (line 13) +* \baselineskip: Low-level font commands. + (line 101) +* \baselineskip <1>: \baselineskip & \baselinestretch. + (line 6) +* \baselinestretch: \baselineskip & \baselinestretch. + (line 6) +* \begin: Environments. (line 6) +* \beta: Math symbols. (line 51) +* \bf: Font styles. (line 75) +* \bfseries: Font styles. (line 40) +* \bibdata: BibTeX error messages. + (line 16) +* \bibitem: \bibitem. (line 6) +* \bibliography: Using BibTeX. (line 6) +* \bibliography and internal \bibdata: BibTeX error messages. + (line 17) +* \bibliographystyle: Using BibTeX. (line 6) +* \bibliographystyle and internal \bibstyle: BibTeX error messages. + (line 20) +* \bibname: thebibliography. (line 44) +* \bibstyle: BibTeX error messages. + (line 19) +* \bigcap: Math symbols. (line 54) +* \bigcirc: Math symbols. (line 58) +* \bigcup: Math symbols. (line 61) +* \bigodot: Math symbols. (line 65) +* \bigoplus: Math symbols. (line 68) +* \bigotimes: Math symbols. (line 71) +* \bigskip: \bigskip & \medskip & \smallskip. + (line 28) +* \bigskipamount: \bigskip & \medskip & \smallskip. + (line 29) +* \bigsqcup: Math symbols. (line 80) +* \bigtriangledown: Math symbols. (line 74) +* \bigtriangleup: Math symbols. (line 77) +* \biguplus: Math symbols. (line 83) +* \bigvee: Math symbols. (line 87) +* \bigwedge: Math symbols. (line 90) +* \bmod: Math functions. (line 21) +* \boldmath: \boldmath & \unboldmath. + (line 6) +* \boldmath <1>: \boldmath & \unboldmath. + (line 14) +* \bot: Math symbols. (line 93) +* \bottomfraction: Floats. (line 94) +* \bottomfraction <1>: Floats. (line 95) +* \bowtie: Math symbols. (line 97) +* \Box: Math symbols. (line 100) +* \breve: Math accents. (line 16) +* \bullet: Math symbols. (line 105) +* \c (cedilla accent): Accents. (line 61) +* \cal: Font styles. (line 78) +* \cap: Math symbols. (line 108) +* \capitalacute: Accents. (line 29) +* \capitalbreve: Accents. (line 89) +* \capitalcaron: Accents. (line 93) +* \capitalcedilla: Accents. (line 61) +* \capitalcircumflex: Accents. (line 40) +* \capitaldieresis: Accents. (line 25) +* \capitaldotaccent: Accents. (line 65) +* \capitalgrave: Accents. (line 44) +* \capitalhungarumlaut: Accents. (line 69) +* \capitalmacron: Accents. (line 36) +* \capitalnewtie: Accents. (line 83) +* \capitalogonek: Accents. (line 73) +* \capitalring: Accents. (line 77) +* \capitaltie: Accents. (line 83) +* \capitaltilde: Accents. (line 48) +* \caption: \caption. (line 6) +* \caption <1>: figure. (line 43) +* \caption <2>: table. (line 42) +* \cc: \cc. (line 6) +* \cdot: Math symbols. (line 112) +* \cdots: Dots. (line 17) +* \centering: \centering. (line 6) +* \chapter: Sectioning. (line 6) +* \chapter <1>: \chapter. (line 6) +* \check: Math accents. (line 19) +* \CheckCommand: Class and package commands. + (line 21) +* \CheckCommand*: Class and package commands. + (line 21) +* \chi: Math symbols. (line 115) +* \circ: Math symbols. (line 118) +* \circle: \circle. (line 6) +* \citation: BibTeX error messages. + (line 22) +* \cite: \cite. (line 6) +* \cite and internal \citation: BibTeX error messages. + (line 23) +* \ClassError: Class and package commands. + (line 41) +* \ClassInfo: Class and package commands. + (line 41) +* \ClassInfoNoLine: Class and package commands. + (line 41) +* \ClassWarning: Class and package commands. + (line 41) +* \ClassWarningNoLine: Class and package commands. + (line 41) +* \cleardoublepage: \clearpage & \cleardoublepage. + (line 6) +* \clearpage: \clearpage & \cleardoublepage. + (line 6) +* \cline: \cline. (line 6) +* \closein: \openin & \openout. (line 6) +* \closeout: \openin & \openout. (line 6) +* \closing: \closing. (line 6) +* \clubsuit: Math symbols. (line 122) +* \colon: Colon character & \colon. + (line 6) +* \columnsep: \twocolumn. (line 20) +* \columnsep <1>: Page layout parameters. + (line 6) +* \columnsep <2>: Page layout parameters. + (line 9) +* \columnseprule: \twocolumn. (line 26) +* \columnseprule <1>: Page layout parameters. + (line 7) +* \columnseprule <2>: Page layout parameters. + (line 9) +* \columnwidth: \twocolumn. (line 33) +* \columnwidth <1>: Page layout parameters. + (line 8) +* \columnwidth <2>: Page layout parameters. + (line 9) +* \complement: Math symbols. (line 125) +* \cong: Math symbols. (line 131) +* \contentsline: Table of contents etc.. + (line 62) +* \contentsline <1>: \addcontentsline. (line 10) +* \contentsline <2>: \contentsline. (line 6) +* \coprod: Math symbols. (line 134) +* \copyright: Text symbols. (line 13) +* \cos: Math functions. (line 24) +* \cosh: Math functions. (line 27) +* \cot: Math functions. (line 30) +* \coth: Math functions. (line 33) +* \csc: Math functions. (line 36) +* \cup: Math symbols. (line 137) +* \CurrentOption: Class and package commands. + (line 60) +* \d (dot-under accent): Accents. (line 65) +* \dag: Text symbols. (line 17) +* \dagger: Math symbols. (line 141) +* \dashbox: \dashbox. (line 6) +* \dashv: Math symbols. (line 144) +* \date{TEXT}: \maketitle. (line 50) +* \day: \day & \month & \year. + (line 6) +* \dblfloatpagefraction: \twocolumn. (line 66) +* \dblfloatsep: \twocolumn. (line 72) +* \dbltextfloatsep: \twocolumn. (line 79) +* \dbltopfraction: \twocolumn. (line 45) +* \dbltopnumber: \twocolumn. (line 84) +* \ddag: Text symbols. (line 20) +* \ddagger: Math symbols. (line 148) +* \ddot: Math accents. (line 22) +* \ddots: Dots. (line 21) +* \DeclareFontEncoding: \DeclareFontEncoding. + (line 6) +* \DeclareGraphicsExtensions: \DeclareGraphicsExtensions. + (line 6) +* \DeclareGraphicsRule: \DeclareGraphicsRule. + (line 6) +* \DeclareOption: Class and package commands. + (line 66) +* \DeclareOption*: Class and package commands. + (line 66) +* \DeclareRobustCommand: Class and package commands. + (line 96) +* \DeclareRobustCommand*: Class and package commands. + (line 96) +* \DeclareTextAccent: \DeclareTextAccent. (line 6) +* \DeclareTextAccent <1>: \DeclareTextAccentDefault. + (line 6) +* \DeclareTextAccentDefault: \DeclareTextAccentDefault. + (line 6) +* \DeclareTextCommand: \DeclareTextCommand & \ProvideTextCommand. + (line 6) +* \DeclareTextCommand <1>: \DeclareTextCommandDefault & \ProvideTextCommandDefault. + (line 6) +* \DeclareTextCommandDefault: \DeclareTextCommandDefault & \ProvideTextCommandDefault. + (line 6) +* \DeclareTextComposite: \DeclareTextComposite. + (line 6) +* \DeclareTextCompositeCommand: \DeclareTextCompositeCommand. + (line 6) +* \DeclareTextSymbol: \DeclareTextCommand & \ProvideTextCommand. + (line 37) +* \DeclareTextSymbol <1>: \DeclareTextSymbol. (line 6) +* \DeclareTextSymbol <2>: \DeclareTextSymbolDefault. + (line 6) +* \DeclareTextSymbolDefault: \DeclareTextSymbolDefault. + (line 6) +* \deg: Math functions. (line 39) +* \DelayedShellEscape: \write18. (line 71) +* \Delta: Math symbols. (line 151) +* \delta: Math symbols. (line 154) +* \det: Math functions. (line 42) +* \dh (ð): Additional Latin letters. + (line 20) +* \DH (Ð): Additional Latin letters. + (line 20) +* \Diamond: Math symbols. (line 157) +* \diamond: Math symbols. (line 161) +* \diamondsuit: Math symbols. (line 165) +* \dim: Math functions. (line 45) +* \displaystyle: Math styles. (line 29) +* \div: Math symbols. (line 168) +* \dj: Additional Latin letters. + (line 26) +* \DJ: Additional Latin letters. + (line 26) +* \documentclass: Document classes. (line 6) +* \documentclass, and texput jobname: Jobname. (line 32) +* \dot: Math accents. (line 25) +* \doteq: Math symbols. (line 171) +* \dotfill: \hrulefill & \dotfill. + (line 6) +* \dots: Text symbols. (line 37) +* \doublerulesep: tabular. (line 168) +* \downarrow: Math symbols. (line 175) +* \Downarrow: Math symbols. (line 179) +* \ell: Math symbols. (line 183) +* \emph: Font styles. (line 60) +* \emptyset: Math symbols. (line 186) +* \encl: \encl. (line 6) +* \end: Environments. (line 6) +* \endinput: \endinput. (line 6) +* \enlargethispage: \enlargethispage. (line 6) +* \enspace: \enspace & \quad & \qquad. + (line 6) +* \enumi: enumerate. (line 47) +* \enumii: enumerate. (line 47) +* \enumiii: enumerate. (line 47) +* \enumiv: enumerate. (line 47) +* \epsilon: Math symbols. (line 189) +* \equiv: Math symbols. (line 195) +* \eta: Math symbols. (line 198) +* \evensidemargin: Document class options. + (line 84) +* \evensidemargin <1>: Page layout parameters. + (line 67) +* \evensidemargin <2>: Page layout parameters. + (line 68) +* \ExecuteOptions: Class and package commands. + (line 172) +* \exists: Math symbols. (line 201) +* \exp: Math functions. (line 48) +* \externaldocument: xr package. (line 6) +* \extracolsep: tabular. (line 108) +* \fbox: \fbox & \framebox. (line 6) +* \fboxrule: \framebox (picture). (line 31) +* \fboxrule <1>: \fbox & \framebox. (line 38) +* \fboxrule <2>: \fbox & \framebox. (line 37) +* \fboxsep: \framebox (picture). (line 31) +* \fboxsep <1>: \fbox & \framebox. (line 43) +* \fboxsep <2>: \fbox & \framebox. (line 42) +* \fill: \hfill. (line 18) +* \flat: Math symbols. (line 204) +* \floatpagefraction: Floats. (line 98) +* \floatpagefraction <1>: Floats. (line 99) +* \floatsep: Floats. (line 114) +* \floatsep <1>: Floats. (line 115) +* \flushbottom: \flushbottom. (line 6) +* \fnsymbol, and footnotes: \footnote. (line 24) +* \fnsymbol{COUNTER}: \alph \Alph \arabic \roman \Roman \fnsymbol. + (line 37) +* \fontdimen1: \/. (line 40) +* \fontencoding: Low-level font commands. + (line 11) +* \fontfamily: Low-level font commands. + (line 21) +* \fontseries: Low-level font commands. + (line 48) +* \fontshape: Low-level font commands. + (line 88) +* \fontsize: Low-level font commands. + (line 101) +* \footnote: \footnote. (line 6) +* \footnotemark: \footnotemark. (line 6) +* \footnoterule: \footnote. (line 34) +* \footnotesep: \footnote. (line 46) +* \footnotesize: Font sizes. (line 11) +* \footnotetext: \footnotetext. (line 6) +* \footskip: Page layout parameters. + (line 26) +* \footskip <1>: Page layout parameters. + (line 27) +* \forall: Math symbols. (line 207) +* \frac: \frac. (line 6) +* \frame: \frame. (line 6) +* \framebox: \framebox (picture). (line 6) +* \framebox <1>: \fbox & \framebox. (line 6) +* \frenchspacing: \frenchspacing & \nonfrenchspacing. + (line 6) +* \frontmatter: \frontmatter & \mainmatter & \backmatter. + (line 6) +* \frown: Math symbols. (line 210) +* \fussy: \fussy & \sloppy. (line 6) +* \Gamma: Math symbols. (line 213) +* \gamma: Math symbols. (line 216) +* \gcd: Math functions. (line 51) +* \ge: Math symbols. (line 219) +* \geq: Math symbols. (line 223) +* \gets: Math symbols. (line 227) +* \gg: Math symbols. (line 230) +* \gls: \gls. (line 6) +* \graphicspath: \graphicspath. (line 6) +* \graphpaper: \graphpaper. (line 6) +* \grave: Math accents. (line 28) +* \guillemotleft («): Text symbols. (line 29) +* \guillemotright (»): Text symbols. (line 30) +* \guilsinglleft (‹): Text symbols. (line 31) +* \guilsinglright (›): Text symbols. (line 32) +* \H (Hungarian umlaut accent): Accents. (line 69) +* \hat: Math accents. (line 31) +* \hbar: Math symbols. (line 233) +* \headheight: Page layout parameters. + (line 14) +* \headheight <1>: Page layout parameters. + (line 15) +* \headsep: Page layout parameters. + (line 19) +* \headsep <1>: Page layout parameters. + (line 20) +* \heartsuit: Math symbols. (line 236) +* \hfill: \hfill. (line 6) +* \hline: \hline. (line 6) +* \hom: Math functions. (line 54) +* \hookleftarrow: Math symbols. (line 239) +* \hookrightarrow: Math symbols. (line 242) +* \hrulefill: \hrulefill & \dotfill. + (line 6) +* \hsize: Page layout parameters. + (line 119) +* \hsize <1>: Page layout parameters. + (line 120) +* \hspace: \hspace. (line 6) +* \hss: \hss. (line 6) +* \huge: Font sizes. (line 11) +* \Huge: Font sizes. (line 11) +* \hyphenation: \hyphenation. (line 6) +* \i (dotless i): Accents. (line 19) +* \IfBeginWith* macro from xstring: Jobname. (line 53) +* \iff: Math symbols. (line 245) +* \IfFileExists: Class and package commands. + (line 128) +* \ignorespaces: \ignorespaces & \ignorespacesafterend. + (line 6) +* \ignorespacesafterend: \ignorespaces & \ignorespacesafterend. + (line 6) +* \ij (ij): Additional Latin letters. + (line 32) +* \IJ (IJ): Additional Latin letters. + (line 32) +* \Im: Math symbols. (line 249) +* \imath: Math symbols. (line 252) +* \immediate\write: \write. (line 60) +* \in: Math symbols. (line 256) +* \include: \include & \includeonly. + (line 6) +* \includegraphics: \includegraphics. (line 6) +* \includeonly: \include & \includeonly. + (line 6) +* \indent: \indent & \noindent. (line 6) +* \index: Indexes. (line 6) +* \index <1>: \index. (line 6) +* \indexentry: \index. (line 108) +* \indexspace: Produce the index manually. + (line 22) +* \indexspace <1>: makeindex. (line 59) +* \inf: Math functions. (line 57) +* \infty: Math symbols. (line 260) +* \input: \input. (line 6) +* \inputencoding: inputenc package. (line 50) +* \InputIfFileExists: Class and package commands. + (line 128) +* \int: Math symbols. (line 263) +* \intextsep: Floats. (line 118) +* \intextsep <1>: Floats. (line 119) +* \iota: Math symbols. (line 266) +* \it: Font styles. (line 81) +* \item: description. (line 31) +* \item <1>: enumerate. (line 29) +* \item <2>: itemize. (line 6) +* \item <3>: itemize. (line 30) +* \item <4>: Produce the index manually. + (line 22) +* \itemindent: list. (line 84) +* \itemsep: list. (line 88) +* \itshape: Font styles. (line 34) +* \j (dotless j): Accents. (line 19) +* \jmath: Math symbols. (line 272) +* \jobname: Jobname. (line 6) +* \jobname <1>: Jobname. (line 46) +* \Join: Math symbols. (line 269) +* \k (ogonek): Accents. (line 73) +* \kappa: Math symbols. (line 276) +* \ker: Math functions. (line 60) +* \kill: tabbing. (line 107) +* \l (ł): Additional Latin letters. + (line 36) +* \L (Ł): Additional Latin letters. + (line 36) +* \l@chapter: \contentsline. (line 6) +* \l@section: \contentsline. (line 6) +* \l@subsection: \contentsline. (line 6) +* \label: \label. (line 6) +* \labelenumi: enumerate. (line 56) +* \labelenumii: enumerate. (line 56) +* \labelenumiii: enumerate. (line 56) +* \labelenumiv: enumerate. (line 56) +* \labelitemi: itemize. (line 42) +* \labelitemii: itemize. (line 42) +* \labelitemiii: itemize. (line 42) +* \labelitemiv: itemize. (line 42) +* \labelsep: list. (line 100) +* \labelwidth: list. (line 105) +* \Lambda: Math symbols. (line 279) +* \lambda: Math symbols. (line 282) +* \land: Math symbols. (line 285) +* \langle: Math symbols. (line 289) +* \large: Font sizes. (line 11) +* \Large: Font sizes. (line 11) +* \LARGE: Font sizes. (line 11) +* \LastDeclaredEncoding: \LastDeclaredEncoding. + (line 6) +* \LaTeX: Text symbols. (line 23) +* \LaTeXe: Text symbols. (line 26) +* \lbrace: Math symbols. (line 293) +* \lbrack: Math symbols. (line 296) +* \lceil: Math symbols. (line 299) +* \ldots: Dots. (line 24) +* \ldots <1>: Text symbols. (line 36) +* \le: Math symbols. (line 303) +* \leadsto: Math symbols. (line 306) +* \left: \left & \right. (line 6) +* \Leftarrow: Math symbols. (line 312) +* \leftarrow: Math symbols. (line 316) +* \lefteqn: eqnarray. (line 44) +* \leftharpoondown: Math symbols. (line 320) +* \leftharpoonup: Math symbols. (line 323) +* \leftmargin: itemize. (line 58) +* \leftmargin <1>: list. (line 125) +* \leftmargini: itemize. (line 58) +* \leftmarginii: itemize. (line 58) +* \leftmarginiii: itemize. (line 58) +* \leftmarginiv: itemize. (line 58) +* \leftmarginv: itemize. (line 58) +* \leftmarginvi: itemize. (line 58) +* \Leftrightarrow: Math symbols. (line 326) +* \leftrightarrow: Math symbols. (line 330) +* \leq: Math symbols. (line 334) +* \lfloor: Math symbols. (line 337) +* \lg: Math functions. (line 63) +* \lhd: Math symbols. (line 340) +* \lim: Math functions. (line 66) +* \liminf: Math functions. (line 69) +* \limsup: Math functions. (line 72) +* \line: \line. (line 6) +* \linebreak: \linebreak & \nolinebreak. + (line 6) +* \lineskip: \baselineskip & \baselinestretch. + (line 45) +* \lineskiplimit: \baselineskip & \baselinestretch. + (line 45) +* \linespread: Low-level font commands. + (line 112) +* \linespread <1>: \baselineskip & \baselinestretch. + (line 6) +* \linethickness: \linethickness. (line 6) +* \linewidth: Page layout parameters. + (line 33) +* \linewidth <1>: Page layout parameters. + (line 34) +* \listoffigures: Table of contents etc.. + (line 6) +* \listoffigures <1>: \contentsline. (line 6) +* \listoftables: Table of contents etc.. + (line 6) +* \listoftables <1>: \contentsline. (line 6) +* \listparindent: list. (line 142) +* \ll: Math symbols. (line 346) +* \ln: Math functions. (line 75) +* \lnot: Math symbols. (line 349) +* \LoadClass: Class and package commands. + (line 148) +* \LoadClassWithOptions: Class and package commands. + (line 148) +* \location: \location. (line 6) +* \log: Math functions. (line 78) +* \long: \newcommand & \renewcommand. + (line 27) +* \longleftarrow: Math symbols. (line 352) +* \longleftrightarrow: Math symbols. (line 356) +* \longmapsto: Math symbols. (line 360) +* \longrightarrow: Math symbols. (line 364) +* \lor: Math symbols. (line 368) +* \lq: Text symbols. (line 42) +* \mainmatter: \frontmatter & \mainmatter & \backmatter. + (line 6) +* \makebox: \mbox & \makebox. (line 6) +* \makebox (for picture): \makebox (picture). (line 6) +* \makeglossary: Glossaries. (line 6) +* \makeglossary <1>: Glossaries. (line 43) +* \makeindex: Indexes. (line 6) +* \makelabel: list. (line 61) +* \makelabels: \makelabels. (line 6) +* \maketitle: \maketitle. (line 6) +* \mapsto: Math symbols. (line 372) +* \marginpar: Marginal notes. (line 6) +* \marginparpush: Page layout parameters. + (line 40) +* \marginparpush <1>: Page layout parameters. + (line 43) +* \marginparpush <2>: Marginal notes. (line 35) +* \marginparsep: Marginal notes. (line 40) +* \marginparwidth: Page layout parameters. + (line 42) +* \marginparwidth <1>: Page layout parameters. + (line 43) +* \marginparwidth <2>: Marginal notes. (line 44) +* \marginsep: Page layout parameters. + (line 41) +* \marginsep <1>: Page layout parameters. + (line 43) +* \markboth{LEFT-HEAD}{RIGHT-HEAD}: \pagestyle. (line 75) +* \markright{RIGHT}: \pagestyle. (line 84) +* \mathbf: Font styles. (line 111) +* \mathcal: Font styles. (line 127) +* \mathdollar: Math symbols. (line 832) +* \mathnormal: Font styles. (line 124) +* \mathparagraph: Math symbols. (line 835) +* \mathring: Math accents. (line 34) +* \mathrm: Font styles. (line 108) +* \mathsection: Math symbols. (line 838) +* \mathsf: Font styles. (line 114) +* \mathsterling: Math symbols. (line 841) +* \mathstrut: \mathstrut. (line 6) +* \mathtt: Font styles. (line 117) +* \mathunderscore: Math symbols. (line 844) +* \mathversion: Font styles. (line 129) +* \max: Math functions. (line 81) +* \mbox: \mbox & \makebox. (line 6) +* \mdseries: Font styles. (line 37) +* \medskip: \bigskip & \medskip & \smallskip. + (line 33) +* \medskipamount: \bigskip & \medskip & \smallskip. + (line 34) +* \medspace: Spacing in math mode. + (line 36) +* \message: \message. (line 6) +* \mho: Math symbols. (line 376) +* \mid: Math symbols. (line 379) +* \min: Math functions. (line 84) +* \models: Math symbols. (line 389) +* \month: \day & \month & \year. + (line 6) +* \mp: Math symbols. (line 393) +* \mu: Math symbols. (line 396) +* \multicolumn: \multicolumn. (line 6) +* \multiput: \multiput. (line 6) +* \nabla: Math symbols. (line 399) +* \name: \name. (line 6) +* \natural: Math symbols. (line 402) +* \ne: Math symbols. (line 405) +* \nearrow: Math symbols. (line 408) +* \NeedsTeXFormat: Class and package commands. + (line 182) +* \neg: Math symbols. (line 411) +* \negmedspace: Spacing in math mode. + (line 42) +* \negthickspace: Spacing in math mode. + (line 30) +* \negthinspace: Spacing in math mode. + (line 61) +* \negthinspace <1>: \thinspace & \negthinspace. + (line 6) +* \neq: Math symbols. (line 415) +* \newcommand: \newcommand & \renewcommand. + (line 6) +* \newcounter: \newcounter. (line 6) +* \newenvironment: \newenvironment & \renewenvironment. + (line 6) +* \newfont: \newfont. (line 6) +* \newglossaryentry: \newglossaryentry. (line 6) +* \newlength: \newlength. (line 6) +* \newline: \newline. (line 6) +* \: \(SPACE). (line 6) +* \newpage: \newpage. (line 6) +* \newsavebox: \newsavebox. (line 6) +* \newtheorem: \newtheorem. (line 6) +* \newtie: Accents. (line 83) +* \newwrite: \write. (line 24) +* \ng: Additional Latin letters. + (line 40) +* \NG: Additional Latin letters. + (line 40) +* \ni: Math symbols. (line 418) +* \nocite: \nocite. (line 6) +* \nocite and internal \citation: BibTeX error messages. + (line 23) +* \nocite {*}, for all keys: Using BibTeX. (line 61) +* \nocorr: Font styles. (line 22) +* \nocorrlist: Font styles. (line 22) +* \nofiles: \nofiles. (line 6) +* \noindent: \indent & \noindent. (line 6) +* \nolinebreak: \linebreak & \nolinebreak. + (line 6) +* \nonfrenchspacing: \frenchspacing & \nonfrenchspacing. + (line 6) +* \nonumber: eqnarray. (line 39) +* \nopagebreak: \pagebreak & \nopagebreak. + (line 6) +* \normalfont: Font styles. (line 58) +* \normalmarginpar: Marginal notes. (line 22) +* \normalsfcodes: \normalsfcodes. (line 6) +* \normalsize: Font sizes. (line 11) +* \not: Math symbols. (line 422) +* \notin: Math symbols. (line 430) +* \nu: Math symbols. (line 434) +* \numberline: \numberline. (line 6) +* \nwarrow: Math symbols. (line 437) +* \o (ø): Additional Latin letters. + (line 44) +* \O (Ø): Additional Latin letters. + (line 44) +* \obeycr: \obeycr & \restorecr. + (line 6) +* \oddsidemargin: Document class options. + (line 84) +* \oddsidemargin <1>: Page layout parameters. + (line 66) +* \oddsidemargin <2>: Page layout parameters. + (line 68) +* \odot: Math symbols. (line 440) +* \oe (œ): Additional Latin letters. + (line 48) +* \OE (Œ): Additional Latin letters. + (line 48) +* \oint: Math symbols. (line 444) +* \oldstylenums: Font styles. (line 133) +* \Omega: Math symbols. (line 447) +* \omega: Math symbols. (line 450) +* \ominus: Math symbols. (line 453) +* \onecolumn: \onecolumn. (line 6) +* \openin: \openin & \openout. (line 6) +* \opening: \opening. (line 6) +* \openout: \openin & \openout. (line 6) +* \oplus: Math symbols. (line 456) +* \OptionNotUsed: Class and package commands. + (line 200) +* \oslash: Math symbols. (line 460) +* \otimes: Math symbols. (line 463) +* \oval: \oval. (line 6) +* \overbrace{MATH}: Over- and Underlining. + (line 39) +* \overline{TEXT}: Over- and Underlining. + (line 20) +* \owns: Math symbols. (line 467) +* \P: Text symbols. (line 45) +* \PackageError: Class and package commands. + (line 41) +* \PackageInfo: Class and package commands. + (line 41) +* \PackageInfoNoLine: Class and package commands. + (line 41) +* \PackageWarning: Class and package commands. + (line 41) +* \PackageWarningNoLine: Class and package commands. + (line 41) +* \pagebreak: \pagebreak & \nopagebreak. + (line 6) +* \pagenumbering: \pagenumbering. (line 6) +* \pageref: \pageref. (line 6) +* \pagestyle: \pagestyle. (line 6) +* \paperheight: Page layout parameters. + (line 79) +* \paperheight <1>: Page layout parameters. + (line 80) +* \paperwidth: Page layout parameters. + (line 85) +* \paperwidth <1>: Page layout parameters. + (line 86) +* \par: \par. (line 6) +* \paragraph: Sectioning. (line 6) +* \paragraph <1>: \subsubsection & \paragraph & \subparagraph. + (line 6) +* \parallel: Math symbols. (line 471) +* \parbox: \parbox. (line 6) +* \parindent: minipage. (line 103) +* \parindent <1>: \indent & \noindent. (line 6) +* \parindent <2>: \parindent & \parskip. + (line 6) +* \parsep: list. (line 148) +* \parskip: \parindent & \parskip. + (line 6) +* \parskip example: itemize. (line 78) +* \part: Sectioning. (line 6) +* \part <1>: \part. (line 6) +* \partial: Math symbols. (line 474) +* \partopsep: list. (line 157) +* \PassOptionsToClass: Class and package commands. + (line 206) +* \PassOptionsToPackage: Class and package commands. + (line 206) +* \pdfpageheight: Document class options. + (line 40) +* \pdfpagewidth: Document class options. + (line 40) +* \perp: Math symbols. (line 477) +* \Phi: Math symbols. (line 481) +* \phi: Math symbols. (line 484) +* \Pi: Math symbols. (line 488) +* \pi: Math symbols. (line 491) +* \pm: Math symbols. (line 495) +* \pmod: Math functions. (line 87) +* \poptabs: tabbing. (line 113) +* \poptabs <1>: tabbing. (line 114) +* \pounds: Text symbols. (line 49) +* \Pr: Math functions. (line 90) +* \prec: Math symbols. (line 498) +* \preceq: Math symbols. (line 501) +* \prevdepth: \baselineskip & \baselinestretch. + (line 45) +* \prime: Math symbols. (line 505) +* \printglossaries: Glossaries. (line 6) +* \printglossaries <1>: Glossaries. (line 43) +* \printindex: \printindex. (line 6) +* \ProcessOptions: Class and package commands. + (line 243) +* \ProcessOptions*: Class and package commands. + (line 243) +* \prod: Math symbols. (line 514) +* \propto: Math symbols. (line 517) +* \protect: \protect. (line 6) +* \protected@write: \write. (line 70) +* \providecommand: \providecommand. (line 6) +* \ProvidesClass: Class and package commands. + (line 281) +* \ProvidesFile: Class and package commands. + (line 314) +* \ProvidesPackage: Class and package commands. + (line 281) +* \ProvideTextCommand: \DeclareTextCommand & \ProvideTextCommand. + (line 6) +* \ProvideTextCommand <1>: \DeclareTextCommandDefault & \ProvideTextCommandDefault. + (line 6) +* \ProvideTextCommandDefault: \DeclareTextCommandDefault & \ProvideTextCommandDefault. + (line 6) +* \ps: \ps. (line 6) +* \Psi: Math symbols. (line 520) +* \psi: Math symbols. (line 523) +* \pushtabs: tabbing. (line 116) +* \put: \put. (line 6) +* \qbezier: \qbezier. (line 6) +* \qquad: Spacing in math mode. + (line 75) +* \qquad <1>: \enspace & \quad & \qquad. + (line 6) +* \quad: Spacing in math mode. + (line 69) +* \quad <1>: \enspace & \quad & \qquad. + (line 6) +* \quotedblbase („): Text symbols. (line 53) +* \quotesinglbase (‚): Text symbols. (line 54) +* \r (ring accent): Accents. (line 77) +* \raggedbottom: \raggedbottom. (line 6) +* \raggedleft: \raggedleft. (line 6) +* \raggedright: \raggedright. (line 6) +* \raisebox: \raisebox. (line 6) +* \rangle: Math symbols. (line 526) +* \rbrace: Math symbols. (line 530) +* \rbrack: Math symbols. (line 533) +* \rceil: Math symbols. (line 537) +* \Re: Math symbols. (line 540) +* \read: \read. (line 6) +* \ref: \ref. (line 6) +* \reflectbox: \scalebox. (line 6) +* \refname: thebibliography. (line 44) +* \refstepcounter: \refstepcounter. (line 6) +* \renewenvironment: \newenvironment & \renewenvironment. + (line 6) +* \RequirePackage: Class and package commands. + (line 323) +* \RequirePackage, and texput jobname: Jobname. (line 32) +* \RequirePackageWithOptions: Class and package commands. + (line 323) +* \resizebox: \resizebox. (line 6) +* \restorecr: \obeycr & \restorecr. + (line 6) +* \restriction: Math symbols. (line 545) +* \revemptyset: Math symbols. (line 550) +* \reversemarginpar: Marginal notes. (line 22) +* \rfloor: Math symbols. (line 555) +* \rhd: Math symbols. (line 559) +* \rho: Math symbols. (line 565) +* \right: \left & \right. (line 6) +* \Rightarrow: Math symbols. (line 569) +* \rightarrow: Math symbols. (line 573) +* \rightharpoondown: Math symbols. (line 577) +* \rightharpoonup: Math symbols. (line 580) +* \rightleftharpoons: Math symbols. (line 583) +* \rightmargin: list. (line 172) +* \rm: Font styles. (line 84) +* \rmfamily: Font styles. (line 31) +* \roman{COUNTER}: \alph \Alph \arabic \roman \Roman \fnsymbol. + (line 27) +* \Roman{COUNTER}: \alph \Alph \arabic \roman \Roman \fnsymbol. + (line 32) +* \rotatebox: \rotatebox. (line 6) +* \rq: Text symbols. (line 57) +* \rule: \rule. (line 6) +* \S: Text symbols. (line 60) +* \samepage: \pagebreak & \nopagebreak. + (line 44) +* \savebox: \sbox & \savebox. (line 6) +* \sbox: \sbox & \savebox. (line 6) +* \sc: Font styles. (line 87) +* \scalebox: \scalebox. (line 6) +* \scriptscriptstyle: Math styles. (line 29) +* \scriptsize: Font sizes. (line 11) +* \scriptstyle: Math styles. (line 29) +* \scshape: Font styles. (line 52) +* \searrow: Math symbols. (line 586) +* \sec: Math functions. (line 93) +* \section: Sectioning. (line 6) +* \section <1>: \section. (line 6) +* \seename: \index. (line 58) +* \selectfont: Low-level font commands. + (line 118) +* \setcounter: \setcounter. (line 6) +* \setlength: \setlength. (line 6) +* \setminus: Math symbols. (line 589) +* \settodepth: \settodepth. (line 6) +* \settoheight: \settoheight. (line 6) +* \settowidth: \settowidth. (line 6) +* \sf: Font styles. (line 90) +* \sffamily: Font styles. (line 49) +* \sharp: Math symbols. (line 594) +* \ShellEscape: \write18. (line 71) +* \shipout and expansion: \write. (line 65) +* \shortstack: \shortstack. (line 6) +* \Sigma: Math symbols. (line 597) +* \sigma: Math symbols. (line 600) +* \signature: \signature. (line 11) +* \sim: Math symbols. (line 604) +* \simeq: Math symbols. (line 607) +* \sin: Math functions. (line 96) +* \sinh: Math functions. (line 99) +* \sl: Font styles. (line 93) +* \sloppy: \fussy & \sloppy. (line 6) +* \slshape: Font styles. (line 46) +* \small: Font sizes. (line 11) +* \smallint: Math symbols. (line 610) +* \smallskip: \bigskip & \medskip & \smallskip. + (line 38) +* \smallskipamount: \bigskip & \medskip & \smallskip. + (line 39) +* \smile: Math symbols. (line 614) +* \: \(SPACE). (line 6) +* \ <1>: \(SPACE). (line 6) +* \space: \include & \includeonly. + (line 88) +* \space <1>: \input. (line 32) +* \space <2>: \typeout. (line 22) +* \spacefactor: \spacefactor. (line 10) +* \spadesuit: Math symbols. (line 617) +* \sqcap: Math symbols. (line 620) +* \sqcup: Math symbols. (line 624) +* \sqrt: \sqrt. (line 6) +* \sqsubset: Math symbols. (line 628) +* \sqsubseteq: Math symbols. (line 633) +* \sqsupset: Math symbols. (line 637) +* \sqsupseteq: Math symbols. (line 642) +* \ss (ß): Additional Latin letters. + (line 52) +* \SS (SS): Additional Latin letters. + (line 52) +* \stackrel: \stackrel. (line 6) +* \star: Math symbols. (line 646) +* \stepcounter: \stepcounter. (line 6) +* \stop: Recovering from errors. + (line 23) +* \stretch: \stretch. (line 6) +* \strut: \strut. (line 6) +* \subitem: Produce the index manually. + (line 22) +* \subparagraph: Sectioning. (line 6) +* \subparagraph <1>: \subsubsection & \paragraph & \subparagraph. + (line 6) +* \subsection: Sectioning. (line 6) +* \subsection <1>: \subsection. (line 6) +* \subset: Math symbols. (line 653) +* \subseteq: Math symbols. (line 656) +* \subsubitem: Produce the index manually. + (line 22) +* \subsubsection: Sectioning. (line 6) +* \subsubsection <1>: \subsubsection & \paragraph & \subparagraph. + (line 6) +* \succ: Math symbols. (line 659) +* \succeq: Math symbols. (line 662) +* \sum: Math symbols. (line 666) +* \sup: Math functions. (line 102) +* \suppressfloats: Floats. (line 87) +* \supset: Math symbols. (line 669) +* \supseteq: Math symbols. (line 672) +* \surd: Math symbols. (line 675) +* \swarrow: Math symbols. (line 680) +* \symbol: Symbols by font position. + (line 6) +* \t (tie-after accent): Accents. (line 83) +* \: \(SPACE). (line 6) +* \tabbingsep: tabbing. (line 121) +* \tabcolsep: tabular. (line 172) +* \tableofcontents: Table of contents etc.. + (line 6) +* \tableofcontents <1>: \contentsline. (line 6) +* \tan: Math functions. (line 105) +* \tanh: Math functions. (line 108) +* \tau: Math symbols. (line 683) +* \telephone: \telephone. (line 6) +* \TeX: Text symbols. (line 64) +* \textascendercompwordmark: Text symbols. (line 102) +* \textasciicircum: Text symbols. (line 67) +* \textasciitilde: Text symbols. (line 70) +* \textasteriskcentered: Text symbols. (line 73) +* \textbackslash: Reserved characters. (line 18) +* \textbackslash <1>: Text symbols. (line 76) +* \textbar: Text symbols. (line 79) +* \textbardbl: Text symbols. (line 82) +* \textbf: Font styles. (line 40) +* \textbigcircle: Text symbols. (line 85) +* \textbraceleft: Text symbols. (line 88) +* \textbraceright: Text symbols. (line 91) +* \textbullet: Text symbols. (line 94) +* \textcapitalcompwordmark: Text symbols. (line 101) +* \textcircled{LETTER}: Text symbols. (line 97) +* \textcompwordmark: Text symbols. (line 100) +* \textcopyright: Text symbols. (line 14) +* \textdagger: Text symbols. (line 109) +* \textdaggerdbl: Text symbols. (line 112) +* \textdollar (or \$): Text symbols. (line 115) +* \textellipsis: Text symbols. (line 38) +* \textemdash (or ---): Text symbols. (line 118) +* \textendash (or --): Text symbols. (line 124) +* \texteuro: Text symbols. (line 127) +* \textexclamdown (or !`): Text symbols. (line 135) +* \textfiguredash: Text symbols. (line 138) +* \textfloatsep: Floats. (line 123) +* \textfloatsep <1>: Floats. (line 124) +* \textfraction: Floats. (line 102) +* \textfraction <1>: Floats. (line 103) +* \textgreater: Text symbols. (line 145) +* \textheight: Page layout parameters. + (line 91) +* \textheight <1>: Page layout parameters. + (line 92) +* \texthorizontalbar: Text symbols. (line 148) +* \textit: Font styles. (line 34) +* \textleftarrow: Text symbols. (line 156) +* \textless: Text symbols. (line 153) +* \textmd: Font styles. (line 37) +* \textnonbreakinghyphen: Text symbols. (line 159) +* \textnormal: Font styles. (line 58) +* \textordfeminine: Text symbols. (line 165) +* \textordmasculine: Text symbols. (line 166) +* \textparagraph: Text symbols. (line 46) +* \textperiodcentered: Text symbols. (line 169) +* \textquestiondown (or ?`): Text symbols. (line 172) +* \textquotedblleft (or ``): Text symbols. (line 175) +* \textquotedblright (or ''): Text symbols. (line 178) +* \textquoteleft (or `): Text symbols. (line 181) +* \textquoteright (or '): Text symbols. (line 184) +* \textquotesingle: Text symbols. (line 187) +* \textquotestraightbase: Text symbols. (line 190) +* \textquotestraightdblbase: Text symbols. (line 191) +* \textregistered: Text symbols. (line 194) +* \textrightarrow: Text symbols. (line 197) +* \textrm: Font styles. (line 31) +* \textsc: Font styles. (line 52) +* \textsection: Text symbols. (line 61) +* \textsf: Font styles. (line 49) +* \textsl: Font styles. (line 46) +* \textsterling: Text symbols. (line 50) +* \textstyle: Math styles. (line 29) +* \textthreequartersemdash: Text symbols. (line 200) +* \texttrademark: Text symbols. (line 203) +* \texttt: Font styles. (line 55) +* \texttwelveudash: Text symbols. (line 206) +* \textunderscore: Text symbols. (line 209) +* \textup: Font styles. (line 43) +* \textvisiblespace: Text symbols. (line 212) +* \textwidth: Page layout parameters. + (line 99) +* \textwidth <1>: Page layout parameters. + (line 100) +* \th (þ): Additional Latin letters. + (line 56) +* \TH (Þ): Additional Latin letters. + (line 56) +* \thanks{TEXT}: \maketitle. (line 56) +* \theta: Math symbols. (line 686) +* \thicklines: \thicklines. (line 6) +* \thickspace: Spacing in math mode. + (line 23) +* \thinlines: \thinlines. (line 6) +* \thinspace: Spacing in math mode. + (line 47) +* \thinspace <1>: \thinspace & \negthinspace. + (line 6) +* \thispagestyle: \thispagestyle. (line 6) +* \tilde: Math accents. (line 37) +* \times: Math symbols. (line 690) +* \tiny: Font sizes. (line 11) +* \title{TEXT}: \maketitle. (line 63) +* \to: Math symbols. (line 693) +* \today: \today. (line 6) +* \top: Math symbols. (line 697) +* \topfraction: Floats. (line 107) +* \topfraction <1>: Floats. (line 108) +* \topmargin: Page layout parameters. + (line 124) +* \topsep: list. (line 177) +* \topskip: Page layout parameters. + (line 131) +* \topskip <1>: Page layout parameters. + (line 132) +* \triangle: Math symbols. (line 701) +* \triangleleft: Math symbols. (line 704) +* \triangleright: Math symbols. (line 710) +* \tt: Font styles. (line 96) +* \ttfamily: Font styles. (line 55) +* \twocolumn: \twocolumn. (line 6) +* \typein: \typein. (line 6) +* \typeout: \typeout. (line 6) +* \u (breve accent): Accents. (line 89) +* \unboldmath: \boldmath & \unboldmath. + (line 6) +* \unboldmath <1>: \boldmath & \unboldmath. + (line 14) +* \underbar: Accents. (line 53) +* \underbrace{MATH}: Over- and Underlining. + (line 25) +* \underline{TEXT}: Over- and Underlining. + (line 9) +* \unitlength: picture. (line 28) +* \unlhd: Math symbols. (line 716) +* \unrhd: Math symbols. (line 722) +* \Uparrow: Math symbols. (line 728) +* \uparrow: Math symbols. (line 732) +* \Updownarrow: Math symbols. (line 736) +* \updownarrow: Math symbols. (line 741) +* \upharpoonright: Math symbols. (line 746) +* \uplus: Math symbols. (line 751) +* \upshape: Font styles. (line 43) +* \Upsilon: Math symbols. (line 756) +* \upsilon: Math symbols. (line 759) +* \usebox: \usebox. (line 6) +* \usecounter: \usecounter. (line 6) +* \usefont: Low-level font commands. + (line 126) +* \usepackage: Additional packages. (line 6) +* \UseTextAccent: \UseTextSymbol & \UseTextAccent. + (line 6) +* \UseTextSymbol: \UseTextSymbol & \UseTextAccent. + (line 6) +* \v (breve accent): Accents. (line 93) +* \value: \value. (line 6) +* \vanothing: Math symbols. (line 767) +* \varepsilon: Math symbols. (line 762) +* \varphi: Math symbols. (line 772) +* \varpi: Math symbols. (line 776) +* \varrho: Math symbols. (line 780) +* \varsigma: Math symbols. (line 784) +* \vartheta: Math symbols. (line 788) +* \vbox (plain TeX): minipage. (line 57) +* \vdash: Math symbols. (line 792) +* \vdots: Dots. (line 33) +* \vec: Math accents. (line 40) +* \vector: \vector. (line 6) +* \vee: Math symbols. (line 796) +* \verb: \verb. (line 6) +* \Vert: Math symbols. (line 800) +* \vert: Math symbols. (line 804) +* \vfill: \vfill. (line 6) +* \vline: \vline. (line 6) +* \vspace: \vspace. (line 6) +* \vtop plain TeX: minipage. (line 53) +* \wedge: Math symbols. (line 810) +* \widehat: Math accents. (line 43) +* \widetilde: Math accents. (line 46) +* \wlog: \wlog. (line 6) +* \wp: Math symbols. (line 814) +* \wr: Math symbols. (line 817) +* \write: \write. (line 6) +* \write and security: \write and security. (line 6) +* \write streams 16, 17, 18: \write. (line 86) +* \write18: \write18. (line 6) +* \write18, enabling: Command line options. + (line 61) +* \Xi: Math symbols. (line 820) +* \xi: Math symbols. (line 823) +* \xspace: xspace package. (line 6) +* \year: \day & \month & \year. + (line 6) +* \zeta: Math symbols. (line 826) +* \[...\] display math: displaymath. (line 26) +* \\ (for center): center. (line 16) +* \\ (for eqnarray): eqnarray. (line 29) +* \\ (for flushright): flushright. (line 12) +* \\ (for \shortstack objects): \shortstack. (line 35) +* \\ (tabbing): tabbing. (line 66) +* \\ for flushleft: flushleft. (line 14) +* \\ for letters: Letters. (line 48) +* \\ for tabular: tabular. (line 36) +* \\ for verse: verse. (line 21) +* \\ for \author: \maketitle. (line 43) +* \\ for \title: \maketitle. (line 64) +* \\ force line break: \\. (line 6) +* \\* (for eqnarray): eqnarray. (line 35) +* \^: Reserved characters. (line 18) +* \^ (circumflex accent): Accents. (line 40) +* \_: Reserved characters. (line 13) +* \` (grave accent): Accents. (line 44) +* \` (tabbing): tabbing. (line 93) +* \{: Reserved characters. (line 13) +* \|: Math symbols. (line 18) +* \}: Reserved characters. (line 13) +* \~: Reserved characters. (line 18) +* \~ (tilde accent): Accents. (line 48) +* ^ superscript: Subscripts & superscripts. + (line 6) +* ^^J, in \write: \write. (line 94) +* _ subscript: Subscripts & superscripts. + (line 6) +* {...} for required arguments: LaTeX command syntax. + (line 6) +* ~: ~. (line 6) +* a4paper option: Document class options. + (line 19) +* a5paper option: Document class options. + (line 19) +* abstract environment: abstract. (line 6) +* abstract package: abstract. (line 27) +* abstracts: abstract. (line 6) +* accents: Accents. (line 6) +* accents, defining: \DeclareFontEncoding. + (line 6) +* accents, defining <1>: \DeclareTextAccent. (line 6) +* accents, defining <2>: \DeclareTextAccentDefault. + (line 6) +* accents, defining <3>: \DeclareTextComposite. + (line 6) +* accents, defining <4>: \DeclareTextCompositeCommand. + (line 6) +* accents, defining <5>: \DeclareTextSymbolDefault. + (line 6) +* accents, mathematical: Math accents. (line 6) +* accessing any character of a font: Symbols by font position. + (line 6) +* acronyms, list of: Glossaries. (line 6) +* acute accent: Accents. (line 29) +* acute accent, math: Math accents. (line 11) +* additional packages, loading: Additional packages. (line 6) +* adjustbox package: Boxes. (line 9) +* ae ligature: Additional Latin letters. + (line 16) +* algorithm2e package: tabbing. (line 146) +* align environment, from amsmath: eqnarray. (line 6) +* aligning equations: eqnarray. (line 6) +* alignment via tabbing: tabbing. (line 6) +* amscd package: Arrows. (line 41) +* amsfonts package: Math formulas. (line 61) +* amsfonts package <1>: Arrows. (line 6) +* amsmath package: array. (line 55) +* amsmath package <1>: array. (line 63) +* amsmath package <2>: displaymath. (line 22) +* amsmath package <3>: equation. (line 20) +* amsmath package <4>: theorem. (line 28) +* amsmath package <5>: Math formulas. (line 61) +* amsmath package <6>: \left & \right. (line 30) +* amsmath package <7>: \bigl & \bigr etc.. (line 67) +* amsmath package <8>: Dots. (line 36) +* amsmath package <9>: Math functions. (line 111) +* amsmath package <10>: Spacing in math mode. + (line 23) +* amsmath package <11>: \phantom & \vphantom & \hphantom. + (line 56) +* amsmath package <12>: Colon character & \colon. + (line 15) +* amsmath package <13>: Colon character & \colon. + (line 19) +* amsmath package <14>: \thinspace & \negthinspace. + (line 26) +* amsmath package, replacing eqnarray: eqnarray. (line 6) +* amsthm package: theorem. (line 28) +* amsthm package <1>: \rule. (line 18) +* answers package: \write. (line 99) +* appendices: \appendix. (line 6) +* appendix: \appendix. (line 6) +* appendix package: \appendix. (line 29) +* aring: Additional Latin letters. + (line 12) +* array environment: array. (line 6) +* array package: array. (line 74) +* arrays, math: array. (line 6) +* arrow, left, in text: Text symbols. (line 157) +* arrow, right, in text: Text symbols. (line 198) +* arrows: Arrows. (line 6) +* article class: Document classes. (line 11) +* ascender height: Text symbols. (line 103) +* ASCII circumflex, in text: Text symbols. (line 68) +* ASCII tilde, in text: Text symbols. (line 71) +* asterisk, centered, in text: Text symbols. (line 74) +* Asymptote package: \line. (line 35) +* Asymptote package <1>: \strut. (line 62) +* Asymptote package <2>: \mbox & \makebox. (line 74) +* Asymptote package <3>: \write18. (line 13) +* at clause, in font definitions: \newfont. (line 19) +* at-sign: \@. (line 6) +* author, for titlepage: \maketitle. (line 43) +* auxiliary file: Output files. (line 33) +* b5paper option: Document class options. + (line 19) +* babel package: \chapter. (line 71) +* babel package <1>: thebibliography. (line 53) +* babel package <2>: Accents. (line 6) +* babel package <3>: \today. (line 13) +* babel package <4>: Table of contents etc.. + (line 123) +* babel package <5>: \index. (line 58) +* back matter of a book: \frontmatter & \mainmatter & \backmatter. + (line 6) +* background, colored: Colored pages. (line 6) +* backslash, in text: Text symbols. (line 77) +* bar, double vertical, in text: Text symbols. (line 83) +* bar, vertical, in text: Text symbols. (line 80) +* bar-over accent: Accents. (line 36) +* bar-over accent, math: Math accents. (line 14) +* bar-under accent: Accents. (line 51) +* basics of LaTeX: Overview. (line 6) +* batchmode: Command line options. + (line 27) +* beamer package: beamer template. (line 6) +* beamer template and class: beamer template. (line 6) +* beginning of document hook: \AtBeginDocument. (line 6) +* bibliography format, open: Document class options. + (line 67) +* bibliography, creating (automatically): Using BibTeX. (line 6) +* bibliography, creating (manually): thebibliography. (line 6) +* BibTeX error messages: BibTeX error messages. + (line 6) +* bibTeX, using: Using BibTeX. (line 6) +* big circle symbols, in text: Text symbols. (line 86) +* big point: Units of length. (line 20) +* bigfoot package: Footnotes of footnotes. + (line 6) +* black boxes, omitting: Document class options. + (line 53) +* blackboard bold: Blackboard bold. (line 6) +* bm package: \boldmath & \unboldmath. + (line 32) +* bold font: Font styles. (line 76) +* bold math: Font styles. (line 129) +* bold typewriter, avoiding: description. (line 36) +* boldface mathematics: \boldmath & \unboldmath. + (line 6) +* book class: Document classes. (line 11) +* book, back matter: \frontmatter & \mainmatter & \backmatter. + (line 6) +* book, end matter: \frontmatter & \mainmatter & \backmatter. + (line 6) +* book, front matter: \frontmatter & \mainmatter & \backmatter. + (line 6) +* book, main matter: \frontmatter & \mainmatter & \backmatter. + (line 6) +* bottomnumber: Floats. (line 130) +* bottomnumber <1>: Floats. (line 131) +* bounding box: \includegraphics. (line 121) +* box: \mbox & \makebox. (line 6) +* box, allocating new: \newsavebox. (line 6) +* box, bounding: \includegraphics. (line 121) +* box, colored: Colored boxes. (line 6) +* box, save: \sbox & \savebox. (line 6) +* box, use saved box: \usebox. (line 6) +* boxes: Boxes. (line 6) +* bp: Units of length. (line 19) +* brace, left, in text: Text symbols. (line 89) +* brace, right, in text: Text symbols. (line 92) +* braces: Delimiters. (line 6) +* brackets: Delimiters. (line 6) +* breaking lines: Line breaking. (line 6) +* breaking pages: Page breaking. (line 6) +* breaks, multiplication discretionary: \*. (line 6) +* breve accent: Accents. (line 89) +* breve accent, math: Math accents. (line 17) +* bug reporting: About this document. (line 24) +* bullet lists: itemize. (line 6) +* bullet symbol: Math symbols. (line 106) +* bullet, in text: Text symbols. (line 95) +* bulleted lists: itemize. (line 6) +* calligraphic fonts: Calligraphic. (line 6) +* calligraphic letters for math: Font styles. (line 79) +* cap height: Text symbols. (line 103) +* caption package: \caption. (line 72) +* captions: \caption. (line 6) +* caron accent: Accents. (line 93) +* catcode: \makeatletter & \makeatother. + (line 18) +* category code, character: \makeatletter & \makeatother. + (line 18) +* cc: Units of length. (line 32) +* cc list, in letters: \cc. (line 6) +* cedilla accent: Accents. (line 61) +* center environment: center. (line 6) +* centered asterisk, in text: Text symbols. (line 74) +* centered equations: Document class options. + (line 57) +* centered period, in text: Text symbols. (line 170) +* centering text, declaration for: \centering. (line 6) +* centering text, environment for: center. (line 6) +* centimeter: Units of length. (line 27) +* changing case of characters: Upper and lower case. + (line 6) +* chapter: Sectioning. (line 6) +* chapter <1>: \chapter. (line 6) +* chapter counter: Counters. (line 15) +* character category code: \makeatletter & \makeatother. + (line 18) +* character encoding: inputenc package. (line 15) +* character, invisible: \phantom & \vphantom & \hphantom. + (line 6) +* character, invisible <1>: \mathstrut. (line 6) +* characters, accented: Accents. (line 6) +* characters, case of: Upper and lower case. + (line 6) +* characters, non-English: Additional Latin letters. + (line 6) +* characters, reserved: Reserved characters. (line 6) +* characters, special: Reserved characters. (line 6) +* check accent: Accents. (line 93) +* check accent, math: Math accents. (line 20) +* cicero: Units of length. (line 33) +* circle symbol, big, in text: Text symbols. (line 86) +* circled letter, in text: Text symbols. (line 98) +* circumflex accent: Accents. (line 40) +* circumflex accent, math: Math accents. (line 32) +* circumflex, ASCII, in text: Text symbols. (line 68) +* citation key: \bibitem. (line 15) +* class and package commands: Class and package commands. + (line 6) +* class and package difference: Class and package construction. + (line 15) +* class and package structure: Class and package structure. + (line 6) +* class file example: Class and package structure. + (line 39) +* class file layout: Class and package structure. + (line 6) +* class options: Document class options. + (line 6) +* class options <1>: Class and package structure. + (line 6) +* class options <2>: Class and package commands. + (line 66) +* classes of documents: Document classes. (line 6) +* cleveref package: Cross references. (line 38) +* cleveref package <1>: \ref. (line 29) +* cleveref package <2>: \footnotemark. (line 48) +* CLI: Command line interface. + (line 6) +* clock option to slides class: Document class options. + (line 99) +* closing letters: \closing. (line 6) +* closing quote: Text symbols. (line 58) +* cm: Units of length. (line 26) +* cm-super package: fontenc package. (line 36) +* cmd.exe, used by \write18: \write18. (line 60) +* code, typesetting: verbatim. (line 6) +* colon character: Colon character & \colon. + (line 6) +* color: Color. (line 6) +* color <1>: Define colors. (line 6) +* color <2>: Colored text. (line 6) +* color <3>: Colored boxes. (line 6) +* color <4>: Colored pages. (line 6) +* color models: Color models. (line 6) +* color package commands: Commands for color. (line 6) +* color package options: Color package options. + (line 6) +* color, define: Define colors. (line 6) +* colored boxes: Colored boxes. (line 6) +* colored page: Colored pages. (line 6) +* colored text: Colored text. (line 6) +* command line interface: Command line interface. + (line 6) +* command syntax: LaTeX command syntax. + (line 6) +* commands, class and package: Class and package commands. + (line 6) +* commands, defining new ones: \newcommand & \renewcommand. + (line 6) +* commands, defining new ones <1>: \providecommand. (line 6) +* commands, document class: Class and package construction. + (line 6) +* commands, graphics package: Commands for graphics. + (line 6) +* commands, ignore spaces: \ignorespaces & \ignorespacesafterend. + (line 6) +* commands, ignore spaces <1>: xspace package. (line 6) +* commands, redefining: \newcommand & \renewcommand. + (line 6) +* commands, run from LaTeX: \write18. (line 6) +* commands, star-variants: \@ifstar. (line 6) +* composite word mark, in text: Text symbols. (line 103) +* comprehensive package: Math symbols. (line 6) +* computer programs, typesetting: verbatim. (line 6) +* configuration, graphics package: Graphics package configuration. + (line 6) +* contents file: Output files. (line 43) +* copyright symbol: Text symbols. (line 15) +* counters, a list of: Counters. (line 6) +* counters, defining new: \newcounter. (line 6) +* counters, getting value of: \value. (line 6) +* counters, printing: \alph \Alph \arabic \roman \Roman \fnsymbol. + (line 6) +* counters, setting: \setcounter. (line 6) +* cprotect package: verbatim. (line 25) +* cprotect package <1>: \verb. (line 45) +* creating pictures: picture. (line 6) +* creating tables: table. (line 6) +* credit footnote: \maketitle. (line 57) +* cross references: Cross references. (line 6) +* cross references, resolving: Output files. (line 33) +* cross referencing with page number: \pageref. (line 6) +* cross referencing, across documents: xr package. (line 6) +* cross referencing, symbolic: \ref. (line 6) +* CTAN: CTAN. (line 6) +* curly braces: Delimiters. (line 6) +* currency, dollar: Text symbols. (line 116) +* currency, euro: Text symbols. (line 128) +* dagger, double, in text: Text symbols. (line 113) +* dagger, in text: Text symbols. (line 18) +* dagger, in text <1>: Text symbols. (line 110) +* DANTE e.V.: CTAN. (line 20) +* datatool package: \read. (line 31) +* date, for titlepage: \maketitle. (line 51) +* date, today's: \today. (line 6) +* datetime package: \today. (line 27) +* dbltopnumber: Floats. (line 134) +* dbltopnumber <1>: Floats. (line 135) +* dcolumn package: array. (line 74) +* dd: Units of length. (line 29) +* declaration form of font size commands: Font sizes. (line 24) +* declaration form of font style commands: Font styles. (line 8) +* define color: Define colors. (line 6) +* defining a new command: \newcommand & \renewcommand. + (line 6) +* defining a new command <1>: \providecommand. (line 6) +* defining new environments: \newenvironment & \renewenvironment. + (line 6) +* defining new fonts: \newfont. (line 6) +* defining new theorems: \newtheorem. (line 6) +* definitions: Definitions. (line 6) +* delayed \write: \write. (line 65) +* delimiters: Delimiters. (line 6) +* delimiters, paired: \left & \right. (line 6) +* delim_0: makeindex. (line 109) +* delim_1: makeindex. (line 113) +* delim_2: makeindex. (line 117) +* delim_n: makeindex. (line 121) +* delim_r: makeindex. (line 125) +* description: \newglossaryentry. (line 46) +* description environment: description. (line 6) +* description lists, creating: description. (line 6) +* design size, in font definitions: \newfont. (line 19) +* didot point: Units of length. (line 30) +* dieresis accent: Accents. (line 25) +* difference between class and package: Class and package construction. + (line 15) +* dimen plain TeX: Lengths. (line 9) +* directory listings, from system: \write18. (line 65) +* discretionary breaks, multiplication: \*. (line 6) +* discretionary hyphenation: \discretionary. (line 6) +* display math mode: Modes. (line 31) +* display style: Math styles. (line 18) +* displaying quoted text with paragraph indentation: quotation & quote. + (line 6) +* displaying quoted text without paragraph indentation: quotation & quote. + (line 6) +* displaymath environment: displaymath. (line 6) +* displaymath environment <1>: Math formulas. (line 6) +* document class commands: Class and package construction. + (line 6) +* document class options: Document class options. + (line 6) +* document class, defined: Starting and ending. (line 17) +* document classes: Document classes. (line 6) +* document environment: document. (line 6) +* document root name: Jobname. (line 6) +* document templates: Document templates. (line 6) +* dollar sign: Text symbols. (line 116) +* dot accent: Accents. (line 32) +* dot over accent, math: Math accents. (line 26) +* dot-over accent: Accents. (line 32) +* dot-under accent: Accents. (line 65) +* dotless i: Accents. (line 19) +* dotless i, math: Math symbols. (line 253) +* dotless j: Accents. (line 19) +* dotless j, math: Math symbols. (line 273) +* dots: Dots. (line 6) +* double angle quotation marks: Text symbols. (line 33) +* double dagger, in text: Text symbols. (line 21) +* double dagger, in text <1>: Text symbols. (line 113) +* double dot accent, math: Math accents. (line 23) +* double guillemets: Text symbols. (line 33) +* double left quote: Text symbols. (line 176) +* double low-9 quotation mark: Text symbols. (line 55) +* double quote, straight base: Text symbols. (line 192) +* double right quote: Text symbols. (line 179) +* double spacing: \baselineskip & \baselinestretch. + (line 6) +* double vertical bar, in text: Text symbols. (line 83) +* doublestruck: Blackboard bold. (line 6) +* draft option: Document class options. + (line 49) +* dvilualatex-dev: TeX engines. (line 63) +* dvipdfmx command: Output files. (line 10) +* dvips command: Output files. (line 10) +* dvitype command: Output files. (line 10) +* e-dash: Text symbols. (line 125) +* e-TeX: TeX engines. (line 16) +* ellipses: Dots. (line 6) +* ellipsis: Text symbols. (line 39) +* em: Units of length. (line 46) +* em <1>: Units of length. (line 45) +* em-dash: Text symbols. (line 119) +* em-dash, three-quarters: Text symbols. (line 201) +* em-dash, two-thirds: Text symbols. (line 207) +* emphasis: Font styles. (line 60) +* enclosure list: \encl. (line 6) +* encoding, font: \DeclareFontEncoding. + (line 6) +* encoding, of input files: inputenc package. (line 15) +* end matter of a book: \frontmatter & \mainmatter & \backmatter. + (line 6) +* end of document hook: \AtEndDocument. (line 6) +* ending and starting: Starting and ending. (line 6) +* engines, TeX: TeX engines. (line 6) +* enlarge current page: \enlargethispage. (line 6) +* enumerate environment: enumerate. (line 6) +* enumi counter: Counters. (line 15) +* enumii counter: Counters. (line 15) +* enumiii counter: Counters. (line 15) +* enumitem package: itemize. (line 66) +* enumitem package <1>: list. (line 255) +* enumiv counter: Counters. (line 15) +* environment: Starting and ending. (line 25) +* environment form of font size commands: Font sizes. (line 47) +* environment, abstract: abstract. (line 6) +* environment, array: array. (line 6) +* environment, center: center. (line 6) +* environment, description: description. (line 6) +* environment, displaymath: displaymath. (line 6) +* environment, displaymath <1>: Math formulas. (line 6) +* environment, document: document. (line 6) +* environment, enumerate: enumerate. (line 6) +* environment, eqnarray: eqnarray. (line 6) +* environment, equation: equation. (line 6) +* environment, equation <1>: Math formulas. (line 6) +* environment, figure: figure. (line 6) +* environment, filecontents: filecontents. (line 6) +* environment, filecontents*: filecontents. (line 6) +* environment, flushleft: flushleft. (line 6) +* environment, flushright: flushright. (line 6) +* environment, itemize: itemize. (line 6) +* environment, letter: letter. (line 6) +* environment, list: list. (line 6) +* environment, math: math. (line 6) +* environment, math <1>: Math formulas. (line 6) +* environment, minipage: minipage. (line 6) +* environment, picture: picture. (line 6) +* environment, quotation: quotation & quote. (line 6) +* environment, quote: quotation & quote. (line 6) +* environment, sloppypar: sloppypar. (line 6) +* environment, tabbing: tabbing. (line 6) +* environment, table: table. (line 6) +* environment, tabular: tabular. (line 6) +* environment, thebibliography: thebibliography. (line 6) +* environment, theindex: Produce the index manually. + (line 9) +* environment, theorem: theorem. (line 6) +* environment, theorem-like: \newtheorem. (line 6) +* environment, titlepage: titlepage. (line 6) +* environment, verbatim: verbatim. (line 6) +* environment, verse: verse. (line 6) +* environments: Environments. (line 6) +* environments, defining: \newenvironment & \renewenvironment. + (line 6) +* envlab package: \makelabels. (line 55) +* EPS files: \includegraphics. (line 6) +* eqnarray environment: eqnarray. (line 6) +* equation counter: Counters. (line 15) +* equation environment: equation. (line 6) +* equation environment <1>: Math formulas. (line 6) +* equation number, cross referencing: \ref. (line 6) +* equation numbers, left vs. right: Document class options. + (line 63) +* equation numbers, omitting: eqnarray. (line 39) +* equations, aligning: eqnarray. (line 6) +* equations, environment for: equation. (line 6) +* equations, flush left vs. centered: Document class options. + (line 57) +* error messages, from BibTeX: BibTeX error messages. + (line 6) +* errorstopmode: Command line options. + (line 27) +* es-zet German letter: Additional Latin letters. + (line 52) +* etex command: TeX engines. (line 16) +* eth, Icelandic letter: Additional Latin letters. + (line 20) +* etoolbox package: Class and package commands. + (line 110) +* euro symbol: Text symbols. (line 128) +* eurosym package: Text symbols. (line 128) +* eurosym package <1>: Text symbols. (line 129) +* ex: Units of length. (line 42) +* ex <1>: Units of length. (line 41) +* exclamation point, upside-down: Text symbols. (line 136) +* executivepaper option: Document class options. + (line 19) +* expl3 package: LaTeX command syntax. + (line 35) +* expl3 package <1>: Upper and lower case. + (line 48) +* exponent: Subscripts & superscripts. + (line 6) +* expressions: Expressions. (line 6) +* extended Latin: Additional Latin letters. + (line 6) +* external commands: \write18. (line 6) +* external files, writing: filecontents. (line 6) +* fallback jobname: Jobname. (line 27) +* families, of fonts: Low-level font commands. + (line 21) +* fancyhdr package: Page styles. (line 12) +* fancyhdr package <1>: \pagestyle. (line 13) +* fancyvrb package: tabbing. (line 146) +* fancyvrb package <1>: verbatim. (line 41) +* feminine ordinal symbol: Text symbols. (line 167) +* figure counter: Counters. (line 15) +* figure dash character: Text symbols. (line 139) +* figure environment: figure. (line 6) +* figure number, cross referencing: \ref. (line 6) +* figures, footnotes in: minipage. (line 107) +* figures, inserting: figure. (line 6) +* file, closing: \openin & \openout. (line 6) +* file, opening: \openin & \openout. (line 6) +* file, reading: \read. (line 6) +* file, root: Splitting the input. (line 10) +* file, root <1>: Jobname. (line 6) +* filecontents environment: filecontents. (line 6) +* filecontents* environment: filecontents. (line 6) +* filename for current job: Command line options. + (line 43) +* final option: Document class options. + (line 49) +* first-latex-doc document: About this document. (line 36) +* fixed-width font: Font styles. (line 97) +* flafter package: Floats. (line 82) +* fleqn option: Document class options. + (line 49) +* float package: Floats. (line 52) +* float page: Floats. (line 58) +* flush left equations: Document class options. + (line 57) +* flushing floats and starting a page: \clearpage & \cleardoublepage. + (line 6) +* flushleft environment: flushleft. (line 6) +* flushright environment: flushright. (line 6) +* font catalogue: Low-level font commands. + (line 21) +* font commands, low-level: Low-level font commands. + (line 6) +* font dimension, slant: \/. (line 40) +* font encoding: fontenc package. (line 6) +* font encoding <1>: \DeclareTextAccent. (line 6) +* font encoding, declaring: \DeclareFontEncoding. + (line 6) +* font size: Low-level font commands. + (line 101) +* font sizes: Font sizes. (line 6) +* font styles: Font styles. (line 6) +* font symbols, by number: Symbols by font position. + (line 6) +* fontenc: fontenc package. (line 6) +* fonts: Fonts. (line 6) +* fonts, new commands for: \newfont. (line 6) +* fonts, script: Calligraphic. (line 6) +* fontspec package: fontenc package. (line 17) +* footer style: \pagestyle. (line 6) +* footer, parameters for: Page layout parameters. + (line 6) +* footnote counter: Counters. (line 15) +* footnote number, cross referencing: \ref. (line 6) +* footnote parameters: \footnote. (line 34) +* footnote, in a table: Footnotes in a table. + (line 6) +* footnote, in section headings: Footnotes in section headings. + (line 6) +* footnote, of a footnote: Footnotes of footnotes. + (line 6) +* footnotes in figures: minipage. (line 107) +* footnotes, creating: Footnotes. (line 6) +* footnotes, in a minipage: \footnote. (line 62) +* footnotes, symbols instead of numbers: \footnote. (line 24) +* force option for filecontents: filecontents. (line 31) +* format files, TeX: TeX engines. (line 6) +* formulas, environment for: equation. (line 6) +* formulas, math: Math formulas. (line 6) +* forward reference: Cross references. (line 27) +* forward references, resolving: Output files. (line 33) +* fraction: \frac. (line 6) +* fragile commands: \protect. (line 6) +* frame rule width: \fbox & \framebox. (line 38) +* frame, line width: \fbox & \framebox. (line 38) +* frame, separation from contents: \fbox & \framebox. (line 43) +* French quotation marks: Text symbols. (line 33) +* front matter of a book: \frontmatter & \mainmatter & \backmatter. + (line 6) +* functions, math: Math functions. (line 6) +* geometry package: Document class options. + (line 40) +* geometry package <1>: Document class options. + (line 44) +* global options: Document class options. + (line 6) +* global options <1>: Additional packages. (line 15) +* glossaries: Glossaries. (line 6) +* glossary: Glossaries. (line 6) +* glossary, entries: \newglossaryentry. (line 6) +* glossary, entry reference: \gls. (line 6) +* glue plain TeX: Lengths. (line 9) +* glue register, plain TeX: \newlength. (line 6) +* graphics: Graphics. (line 6) +* graphics <1>: Graphics package configuration. + (line 6) +* graphics <2>: \includegraphics. (line 6) +* graphics package: Graphics. (line 6) +* graphics package <1>: Graphics package configuration. + (line 6) +* graphics package <2>: \includegraphics. (line 6) +* graphics package commands: Commands for graphics. + (line 6) +* graphics package options: Graphics package options. + (line 6) +* graphics packages: \line. (line 35) +* graphics, resizing: \scalebox. (line 6) +* graphics, resizing <1>: \resizebox. (line 6) +* graphics, scaling: \scalebox. (line 6) +* graphics, scaling <1>: \resizebox. (line 6) +* graphpap package: \graphpaper. (line 11) +* grave accent: Accents. (line 44) +* grave accent, math: Math accents. (line 29) +* greater than symbol, in text: Text symbols. (line 146) +* greek letters: Math symbols. (line 6) +* Greek letters: Greek letters. (line 6) +* grfext package: \DeclareGraphicsExtensions. + (line 39) +* group, and environments: Environments. (line 18) +* group_skip: makeindex. (line 58) +* hacek accent: Accents. (line 93) +* Halmos symbol: \rule. (line 13) +* hard space: ~. (line 6) +* hat accent: Accents. (line 40) +* hat accent, math: Math accents. (line 32) +* header style: \pagestyle. (line 6) +* header, parameters for: Page layout parameters. + (line 6) +* hello, world: Starting and ending. (line 6) +* here, putting floats: Floats. (line 52) +* home page for manual: About this document. (line 6) +* horizontal bar character: Text symbols. (line 149) +* horizontal paragraph indentation: \parindent & \parskip. + (line 6) +* horizontal space: \hss. (line 6) +* horizontal space, stretchable: \hss. (line 6) +* horizontal spacing: \phantom & \vphantom & \hphantom. + (line 6) +* hungarian umlaut accent: Accents. (line 69) +* hyperref package: \footnotemark. (line 48) +* hyperref package <1>: \footnotemark. (line 57) +* hyperref package <2>: \pagenumbering. (line 45) +* hyperref package <3>: \contentsline. (line 37) +* hyperref package <4>: Command line input. (line 14) +* hyphen character, non-breaking: Text symbols. (line 160) +* hyphenation, defining: \hyphenation. (line 6) +* hyphenation, discretionary: \discretionary. (line 6) +* hyphenation, forcing: \- (hyphenation). (line 6) +* hyphenation, preventing: \mbox & \makebox. (line 6) +* háček accent, math: Math accents. (line 20) +* Icelandic eth: Additional Latin letters. + (line 20) +* Icelandic thorn: Additional Latin letters. + (line 56) +* idx file: \index. (line 108) +* ij letter, Dutch: Additional Latin letters. + (line 32) +* immediate \write: \write. (line 60) +* implementations of TeX: TeX engines. (line 6) +* importing graphics: \includegraphics. (line 6) +* in: Units of length. (line 16) +* inch: Units of length. (line 17) +* including graphics: \includegraphics. (line 6) +* indent, forcing: \indent & \noindent. (line 6) +* indentation of paragraphs, in minipage: minipage. (line 103) +* indentfirst package: \part. (line 50) +* indentfirst package <1>: \chapter. (line 58) +* indentfirst package <2>: \section. (line 60) +* indentfirst package <3>: \subsection. (line 48) +* indentfirst package <4>: \subsubsection & \paragraph & \subparagraph. + (line 59) +* indentfirst package <5>: \indent & \noindent. (line 48) +* indent_length: makeindex. (line 137) +* indent_space: makeindex. (line 133) +* index entries, subentries: \index. (line 26) +* index entries, 'see' and 'see also': \index. (line 58) +* index entry: \index. (line 6) +* index package: \index. (line 103) +* index, multiple: Indexes. (line 51) +* index, page range: \index. (line 42) +* index, printing: \printindex. (line 6) +* index, processing: makeindex. (line 6) +* index, producing manually: Produce the index manually. + (line 6) +* index, style file: makeindex. (line 18) +* indexes: Indexes. (line 6) +* infinite horizontal stretch: \hfill. (line 6) +* infinite vertical stretch: \vfill. (line 6) +* inline formulas: math. (line 6) +* inner paragraph mode: Modes. (line 51) +* input file: Splitting the input. (line 6) +* input, on command line: Command line input. (line 6) +* input/output: Input/output. (line 6) +* input/output, to terminal: Input/output. (line 6) +* inputenc: inputenc package. (line 6) +* inserting figures: figure. (line 6) +* insertions of special characters: Special insertions. (line 6) +* interface, command line: Command line interface. + (line 6) +* interline space: \baselineskip & \baselinestretch. + (line 6) +* internal vertical mode: Modes. (line 41) +* invisible character: \phantom & \vphantom & \hphantom. + (line 6) +* invisible character <1>: \mathstrut. (line 6) +* italic correction: \/. (line 6) +* italic font: Font styles. (line 82) +* itemize environment: itemize. (line 6) +* item_0: makeindex. (line 82) +* item_01: makeindex. (line 91) +* item_1: makeindex. (line 85) +* item_12: makeindex. (line 100) +* item_2: makeindex. (line 88) +* item_x1: makeindex. (line 95) +* item_x2: makeindex. (line 104) +* jobname: Command line options. + (line 43) +* jobname <1>: Jobname. (line 6) +* JPEG files: \includegraphics. (line 6) +* JPG files: \includegraphics. (line 6) +* justification, ragged left: \raggedleft. (line 6) +* justification, ragged right: \raggedright. (line 6) +* Knuth, Donald E.: Overview. (line 6) +* label: Cross references. (line 6) +* labelled lists, creating: description. (line 6) +* Lamport TeX: Overview. (line 27) +* Lamport, Leslie: Overview. (line 6) +* landscape option: Document class options. + (line 49) +* landscape orientation: Document class options. + (line 60) +* latex: TeX engines. (line 14) +* latex command: Output files. (line 10) +* LaTeX format (.fmt) files: TeX engines. (line 6) +* LaTeX logo: Text symbols. (line 24) +* LaTeX overview: Overview. (line 6) +* LaTeX Project team: About this document. (line 20) +* LaTeX vs. LaTeX2e: About this document. (line 16) +* latex-dev: TeX engines. (line 64) +* latex-doc-ptr document: About this document. (line 33) +* LaTeX2e logo: Text symbols. (line 27) +* LaTeX3 syntax: LaTeX command syntax. + (line 35) +* email address: About this document. (line 20) +* latexsym package: Arrows. (line 6) +* Latin letters, additional: Additional Latin letters. + (line 6) +* layout commands: Layout. (line 6) +* layout, page parameters for: Page layout parameters. + (line 6) +* leaders, dots in table of contents: \@dottedtocline. (line 27) +* leading: \baselineskip & \baselinestretch. + (line 6) +* left angle quotation marks: Text symbols. (line 33) +* left arrow, in text: Text symbols. (line 157) +* left brace, in text: Text symbols. (line 89) +* left quote: Text symbols. (line 43) +* left quote, double: Text symbols. (line 176) +* left quote, single: Text symbols. (line 182) +* left-hand equation numbers: Document class options. + (line 63) +* left-justifying text: \raggedright. (line 6) +* left-justifying text, environment for: flushleft. (line 6) +* left-to-right mode: Modes. (line 18) +* legalpaper option: Document class options. + (line 19) +* lengths, adding to: \addtolength. (line 6) +* lengths, allocating new: \newlength. (line 6) +* lengths, defining and using: Lengths. (line 6) +* lengths, setting: \setlength. (line 6) +* leqno option: Document class options. + (line 49) +* less than symbol, in text: Text symbols. (line 154) +* lethead_flag: makeindex. (line 65) +* lethead_prefix: makeindex. (line 74) +* lethead_suffix: makeindex. (line 78) +* letter class: Document classes. (line 11) +* letter environment: letter. (line 6) +* letterpaper option: Document class options. + (line 19) +* letters, accented: Accents. (line 6) +* letters, additional Latin: Additional Latin letters. + (line 6) +* letters, ending: \closing. (line 6) +* letters, starting: \opening. (line 6) +* letters, writing: Letters. (line 6) +* line break, forcing: \\. (line 6) +* line breaking: Line breaking. (line 6) +* line breaks, changing: \fussy & \sloppy. (line 6) +* line breaks, forcing: \linebreak & \nolinebreak. + (line 6) +* line breaks, multiplication discretionary: \*. (line 6) +* line breaks, preventing: \linebreak & \nolinebreak. + (line 6) +* lines in tables: tabular. (line 6) +* line_max: makeindex. (line 129) +* lining numerals: Font styles. (line 133) +* lining text up in tables: tabular. (line 6) +* lining text up using tab stops: tabbing. (line 6) +* list environment: list. (line 6) +* list items, specifying counter: \usecounter. (line 6) +* list of figures file: Output files. (line 43) +* list of tables file: Output files. (line 43) +* listings package: tabbing. (line 146) +* listings package <1>: verbatim. (line 35) +* listings package <2>: \verb. (line 42) +* lists of items: itemize. (line 6) +* lists of items, generic: list. (line 6) +* lists of items, numbered: enumerate. (line 6) +* lmodern package: fontenc package. (line 36) +* loading additional packages: Additional packages. (line 6) +* locale information, from system: \write18. (line 65) +* log file: Output files. (line 28) +* log file, writing to: \write. (line 30) +* logo, LaTeX: Text symbols. (line 24) +* logo, LaTeX2e: Text symbols. (line 27) +* logo, TeX: Text symbols. (line 65) +* long command: Class and package commands. + (line 23) +* low-9 quotation marks, single and double: Text symbols. (line 55) +* low-level font commands: Low-level font commands. + (line 6) +* lowercase: Upper and lower case. + (line 6) +* LR box: picture. (line 93) +* LR mode: Modes. (line 18) +* lrbox: lrbox. (line 6) +* lshort document: About this document. (line 39) +* lualatex: TeX engines. (line 32) +* lualatex-dev: TeX engines. (line 65) +* LuaTeX: TeX engines. (line 33) +* LuaTeX, 256 output streams in: \write. (line 42) +* m-width: Units of length. (line 46) +* macron accent: Accents. (line 36) +* macron accent, math: Math accents. (line 14) +* macros2e package: \makeatletter & \makeatother. + (line 38) +* Madsen, Lars: eqnarray. (line 6) +* mail merges: \read. (line 31) +* main matter of a book: \frontmatter & \mainmatter & \backmatter. + (line 6) +* make a box: \mbox & \makebox. (line 6) +* makeidx package: \printindex. (line 12) +* makeindex program: makeindex. (line 6) +* makeindex, program: makeindex. (line 6) +* makeindex, style file: makeindex. (line 18) +* making a title page: titlepage. (line 6) +* making paragraphs: Making paragraphs. (line 6) +* marginal notes: Marginal notes. (line 6) +* masculine ordinal symbol: Text symbols. (line 167) +* matching brackets: \left & \right. (line 6) +* matching parentheses: \left & \right. (line 6) +* math accents: Math accents. (line 6) +* math environment: math. (line 6) +* math environment <1>: Math formulas. (line 6) +* math formulas: Math formulas. (line 6) +* math functions: Math functions. (line 6) +* math miscellany: Math miscellany. (line 6) +* math mode: Modes. (line 28) +* math mode, entering: Math formulas. (line 6) +* math mode, spacing: Spacing in math mode. + (line 6) +* math mode, spacing <1>: \phantom & \vphantom & \hphantom. + (line 6) +* math mode, spacing <2>: \mathstrut. (line 6) +* math mode, vertical space: \smash. (line 6) +* math styles: Math styles. (line 6) +* math symbols: Math symbols. (line 6) +* math, arrows: Arrows. (line 6) +* math, bold: Font styles. (line 129) +* mathtools package: Math formulas. (line 61) +* mathtools package <1>: Delimiters. (line 41) +* mathtools package <2>: \left & \right. (line 30) +* mathtools package <3>: Over- and Underlining. + (line 44) +* mathtools package <4>: \smash. (line 60) +* mathtools package <5>: \phantom & \vphantom & \hphantom. + (line 48) +* MetaPost package: \line. (line 35) +* mfirstuc package: Upper and lower case. + (line 45) +* mhchem package: Subscripts & superscripts. + (line 62) +* millimeter: Units of length. (line 24) +* minipage environment: minipage. (line 6) +* minipage, creating a: minipage. (line 6) +* minted package: tabbing. (line 146) +* minted package <1>: verbatim. (line 35) +* minted package <2>: \verb. (line 42) +* mirrors of CTAN: CTAN. (line 25) +* mm: Units of length. (line 23) +* modes: Modes. (line 6) +* monospace font: Font styles. (line 97) +* moving arguments: \protect. (line 19) +* mpfootnote counter: \footnote. (line 62) +* mpfootnote counter <1>: Counters. (line 15) +* mu: Units of length. (line 52) +* mu, math unit: Units of length. (line 53) +* multicolumn text: \twocolumn. (line 6) +* multilingual support: Accents. (line 6) +* multind package: Indexes. (line 51) +* multiple indexes: Indexes. (line 51) +* multiplication, discretionary: \*. (line 6) +* name: \newglossaryentry. (line 43) +* name of document root: Jobname. (line 6) +* NBSP: ~. (line 6) +* nested \include, not allowed: \include & \includeonly. + (line 106) +* new class commands: Class and package construction. + (line 6) +* new command, check: Class and package commands. + (line 21) +* new command, definition: Class and package commands. + (line 96) +* new commands, defining: \newcommand & \renewcommand. + (line 6) +* new commands, defining <1>: \providecommand. (line 6) +* new line, output as input: \obeycr & \restorecr. + (line 6) +* new line, starting: \\. (line 6) +* new line, starting (paragraph mode): \newline. (line 6) +* new page, starting: \newpage. (line 6) +* newline, in \write: \write. (line 94) +* noheader option for filecontents: filecontents. (line 34) +* non-breaking hyphen character: Text symbols. (line 160) +* non-English characters: Additional Latin letters. + (line 6) +* nonstopmode: Command line options. + (line 27) +* nosearch option for filecontents: filecontents. (line 37) +* notes in the margin: Marginal notes. (line 6) +* notitlepage option: Document class options. + (line 49) +* null delimiter: \left & \right. (line 6) +* numbered items, specifying counter: \usecounter. (line 6) +* numerals, old-style: Font styles. (line 133) +* oblique font: Font styles. (line 94) +* oe ligature: Additional Latin letters. + (line 48) +* ogonek: Accents. (line 73) +* old-style numerals: Font styles. (line 133) +* one-column output: \onecolumn. (line 6) +* onecolumn option: Document class options. + (line 78) +* oneside option: Document class options. + (line 78) +* open a file: \openin & \openout. (line 6) +* openany option: Document class options. + (line 78) +* openbib option: Document class options. + (line 49) +* opening quote: Text symbols. (line 43) +* openright option: Document class options. + (line 78) +* OpenType fonts: TeX engines. (line 6) +* operating system information: \write18. (line 65) +* options, class: Class and package commands. + (line 66) +* options, color package: Color package options. + (line 6) +* options, command line: Command line options. + (line 6) +* options, document class: Document class options. + (line 6) +* options, document class <1>: Class and package structure. + (line 6) +* options, global: Additional packages. (line 15) +* options, graphics package: Graphics package options. + (line 6) +* options, package: Class and package structure. + (line 6) +* options, package <1>: Class and package commands. + (line 66) +* ordinals, feminine and masculine: Text symbols. (line 167) +* oslash: Additional Latin letters. + (line 44) +* OT1: fontenc package. (line 6) +* outer paragraph mode: Modes. (line 51) +* output directory for all external files: Command line options. + (line 48) +* overbar accent: Accents. (line 36) +* overdot accent, math: Math accents. (line 26) +* overlining: Over- and Underlining. + (line 6) +* overview of LaTeX: Overview. (line 6) +* overwrite option for filecontents: filecontents. (line 31) +* package file layout: Class and package structure. + (line 6) +* package options: Class and package structure. + (line 6) +* package options <1>: Class and package commands. + (line 66) +* package, abstract: abstract. (line 27) +* package, adjustbox: Boxes. (line 9) +* package, algorithm2e: tabbing. (line 146) +* package, amscd: Arrows. (line 41) +* package, amsfonts: Math formulas. (line 61) +* package, amsfonts <1>: Arrows. (line 6) +* package, amsmath: array. (line 55) +* package, amsmath <1>: array. (line 63) +* package, amsmath <2>: displaymath. (line 22) +* package, amsmath <3>: equation. (line 20) +* package, amsmath <4>: theorem. (line 28) +* package, amsmath <5>: Math formulas. (line 61) +* package, amsmath <6>: \left & \right. (line 30) +* package, amsmath <7>: \bigl & \bigr etc.. (line 67) +* package, amsmath <8>: Dots. (line 36) +* package, amsmath <9>: Math functions. (line 111) +* package, amsmath <10>: Spacing in math mode. + (line 23) +* package, amsmath <11>: \phantom & \vphantom & \hphantom. + (line 56) +* package, amsmath <12>: Colon character & \colon. + (line 15) +* package, amsmath <13>: Colon character & \colon. + (line 19) +* package, amsmath <14>: \thinspace & \negthinspace. + (line 26) +* package, amsthm: theorem. (line 28) +* package, amsthm <1>: \rule. (line 18) +* package, answers: \write. (line 99) +* package, appendix: \appendix. (line 29) +* package, array: array. (line 74) +* package, Asymptote: \line. (line 35) +* package, Asymptote <1>: \strut. (line 62) +* package, Asymptote <2>: \mbox & \makebox. (line 74) +* package, Asymptote <3>: \write18. (line 13) +* package, babel: \chapter. (line 71) +* package, babel <1>: thebibliography. (line 53) +* package, babel <2>: Accents. (line 6) +* package, babel <3>: \today. (line 13) +* package, babel <4>: Table of contents etc.. + (line 123) +* package, babel <5>: \index. (line 58) +* package, beamer: beamer template. (line 6) +* package, bigfoot: Footnotes of footnotes. + (line 6) +* package, bm: \boldmath & \unboldmath. + (line 32) +* package, caption: \caption. (line 72) +* package, cleveref: Cross references. (line 38) +* package, cleveref <1>: \ref. (line 29) +* package, cleveref <2>: \footnotemark. (line 48) +* package, cm-super: fontenc package. (line 36) +* package, comprehensive: Math symbols. (line 6) +* package, cprotect: verbatim. (line 25) +* package, cprotect <1>: \verb. (line 45) +* package, datatool: \read. (line 31) +* package, datetime: \today. (line 27) +* package, dcolumn: array. (line 74) +* package, enumitem: itemize. (line 66) +* package, enumitem <1>: list. (line 255) +* package, envlab: \makelabels. (line 55) +* package, etoolbox: Class and package commands. + (line 110) +* package, eurosym: Text symbols. (line 128) +* package, eurosym <1>: Text symbols. (line 129) +* package, expl3: LaTeX command syntax. + (line 35) +* package, expl3 <1>: Upper and lower case. + (line 48) +* package, fancyhdr: Page styles. (line 12) +* package, fancyhdr <1>: \pagestyle. (line 13) +* package, fancyvrb: tabbing. (line 146) +* package, fancyvrb <1>: verbatim. (line 41) +* package, flafter: Floats. (line 82) +* package, float: Floats. (line 52) +* package, fontspec: fontenc package. (line 17) +* package, geometry: Document class options. + (line 40) +* package, geometry <1>: Document class options. + (line 44) +* package, graphpap: \graphpaper. (line 11) +* package, grfext: \DeclareGraphicsExtensions. + (line 39) +* package, hyperref: \footnotemark. (line 48) +* package, hyperref <1>: \footnotemark. (line 57) +* package, hyperref <2>: \pagenumbering. (line 45) +* package, hyperref <3>: \contentsline. (line 37) +* package, hyperref <4>: Command line input. (line 14) +* package, indentfirst: \part. (line 50) +* package, indentfirst <1>: \chapter. (line 58) +* package, indentfirst <2>: \section. (line 60) +* package, indentfirst <3>: \subsection. (line 48) +* package, indentfirst <4>: \subsubsection & \paragraph & \subparagraph. + (line 59) +* package, indentfirst <5>: \indent & \noindent. (line 48) +* package, index: \index. (line 103) +* package, latexsym: Arrows. (line 6) +* package, listings: tabbing. (line 146) +* package, listings <1>: verbatim. (line 35) +* package, listings <2>: \verb. (line 42) +* package, lmodern: fontenc package. (line 36) +* package, macros2e: \makeatletter & \makeatother. + (line 38) +* package, makeidx: \printindex. (line 12) +* package, mathtools: Math formulas. (line 61) +* package, mathtools <1>: Delimiters. (line 41) +* package, mathtools <2>: \left & \right. (line 30) +* package, mathtools <3>: Over- and Underlining. + (line 44) +* package, mathtools <4>: \smash. (line 60) +* package, mathtools <5>: \phantom & \vphantom & \hphantom. + (line 48) +* package, MetaPost: \line. (line 35) +* package, mfirstuc: Upper and lower case. + (line 45) +* package, mhchem: Subscripts & superscripts. + (line 62) +* package, minted: tabbing. (line 146) +* package, minted <1>: verbatim. (line 35) +* package, minted <2>: \verb. (line 42) +* package, multind: Indexes. (line 51) +* package, pict2e: \line. (line 35) +* package, polyglossia: Accents. (line 6) +* package, polyglossia <1>: \today. (line 13) +* package, polyglossia <2>: Table of contents etc.. + (line 124) +* package, polyglossia <3>: \index. (line 58) +* package, PSTricks: \line. (line 35) +* package, setspace: \baselineskip & \baselinestretch. + (line 79) +* package, shellesc: \write18. (line 71) +* package, showidx: Indexes. (line 51) +* package, siunitx: minipage. (line 137) +* package, siunitx <1>: ~. (line 42) +* package, suffix: \@ifstar. (line 64) +* package, tablefootnote: Footnotes in a table. + (line 53) +* package, texosquery: \write18. (line 65) +* package, textcase: Upper and lower case. + (line 42) +* package, TikZ: \line. (line 35) +* package, TikZ <1>: \strut. (line 62) +* package, TikZ <2>: \mbox & \makebox. (line 73) +* package, tikz-cd: Arrows. (line 41) +* package, titlesec: Sectioning. (line 60) +* package, titlesec <1>: \part. (line 54) +* package, titlesec <2>: \chapter. (line 79) +* package, titlesec <3>: \section. (line 64) +* package, titlesec <4>: \subsection. (line 52) +* package, titlesec <5>: \subsubsection & \paragraph & \subparagraph. + (line 63) +* package, tocbibbind: Table of contents etc.. + (line 110) +* package, tocloft: Table of contents etc.. + (line 110) +* package, tocloft <1>: \contentsline. (line 34) +* package, ulem: Over- and Underlining. + (line 15) +* package, unicode-math: Greek letters. (line 37) +* package, url: \verb. (line 39) +* package, verbatimbox: verbatim. (line 41) +* package, xparse: LaTeX command syntax. + (line 35) +* package, xparse <1>: \@ifstar. (line 64) +* package, xr: xr package. (line 6) +* package, xr-hyper: xr package. (line 6) +* package, xspace: xspace package. (line 6) +* package, xstring: Jobname. (line 53) +* packages, loading additional: Additional packages. (line 6) +* page break, forcing: \pagebreak & \nopagebreak. + (line 6) +* page break, preventing: \pagebreak & \nopagebreak. + (line 6) +* page breaking: Page breaking. (line 6) +* page counter: Counters. (line 15) +* page layout parameters: Page layout parameters. + (line 6) +* page number, cross referencing: \pageref. (line 6) +* page numbering style: \pagenumbering. (line 6) +* page style, this page: \thispagestyle. (line 6) +* page styles: Page styles. (line 6) +* page, colored: Colored pages. (line 6) +* page_precedence: makeindex. (line 142) +* paired delimiters: \left & \right. (line 6) +* paragraph: Sectioning. (line 6) +* paragraph <1>: \subsubsection & \paragraph & \subparagraph. + (line 6) +* paragraph counter: Counters. (line 15) +* paragraph indentation: \parindent & \parskip. + (line 6) +* paragraph indentation, in minipage: minipage. (line 103) +* paragraph indentations in quoted text: quotation & quote. (line 6) +* paragraph indentations in quoted text, omitting: quotation & quote. + (line 6) +* paragraph mode: Modes. (line 12) +* paragraph mode <1>: \parbox. (line 6) +* paragraph symbol: Text symbols. (line 47) +* paragraph, ending: \par. (line 6) +* paragraph, in a box: \parbox. (line 6) +* paragraphs: Making paragraphs. (line 6) +* parameters, for footnotes: \footnote. (line 34) +* parameters, page layout: Page layout parameters. + (line 6) +* parent directories, cannot write to: \write and security. (line 22) +* parentheses: Delimiters. (line 6) +* part: Sectioning. (line 6) +* part <1>: \part. (line 6) +* part counter: Counters. (line 15) +* pattern, current tab stops, tabbing: tabbing. (line 51) +* pc: Units of length. (line 13) +* PDF graphic files: \includegraphics. (line 6) +* pdflatex: TeX engines. (line 15) +* pdflatex command: Output files. (line 20) +* pdflatex-dev: TeX engines. (line 66) +* pdfTeX: Output files. (line 20) +* pdfTeX engine: TeX engines. (line 16) +* period, abbreviation-ending: \@. (line 6) +* period, centered, in text: Text symbols. (line 170) +* period, sentence-ending: \@. (line 6) +* period, spacing after: \@. (line 6) +* pica: Units of length. (line 14) +* pict2e package: \line. (line 35) +* picture environment: picture. (line 6) +* pictures, creating: picture. (line 6) +* pilcrow: Text symbols. (line 47) +* placement of floats: Floats. (line 31) +* platex: TeX engines. (line 54) +* platex-dev: TeX engines. (line 67) +* plural: \newglossaryentry. (line 51) +* PNG files: \includegraphics. (line 6) +* poetry, an environment for: verse. (line 6) +* point: Units of length. (line 10) +* polish l: Additional Latin letters. + (line 36) +* polyglossia package: Accents. (line 6) +* polyglossia package <1>: \today. (line 13) +* polyglossia package <2>: Table of contents etc.. + (line 124) +* polyglossia package <3>: \index. (line 58) +* portrait orientation: Document class options. + (line 60) +* position, in picture: picture. (line 64) +* positional parameter: \newcommand & \renewcommand. + (line 54) +* postamble: makeindex. (line 54) +* PostScript point: Units of length. (line 20) +* postscript, in letters: \ps. (line 6) +* pounds symbol: Text symbols. (line 51) +* preamble: makeindex. (line 50) +* preamble, defined: Starting and ending. (line 22) +* prerelease testing: TeX engines. (line 70) +* prompt, *: Recovering from errors. + (line 23) +* pronunciation: Overview. (line 27) +* PSTricks package: \line. (line 35) +* pt: Units of length. (line 9) +* quad: Spacing in math mode. + (line 69) +* question mark, upside-down: Text symbols. (line 173) +* quotation environment: quotation & quote. (line 6) +* quotation marks, French: Text symbols. (line 33) +* quote environment: quotation & quote. (line 6) +* quote, single straight: Text symbols. (line 188) +* quote, straight base: Text symbols. (line 192) +* quoted text with paragraph indentation, displaying: quotation & quote. + (line 6) +* quoted text without paragraph indentation, displaying: quotation & quote. + (line 6) +* radical: \sqrt. (line 6) +* ragged left text: \raggedleft. (line 6) +* ragged left text, environment for: flushright. (line 6) +* ragged right text: \raggedright. (line 6) +* ragged right text, environment for: flushleft. (line 6) +* read a file: \read. (line 6) +* redefining environments: \newenvironment & \renewenvironment. + (line 6) +* reference, forward: Cross references. (line 27) +* references, resolving forward: Output files. (line 33) +* registered symbol: Text symbols. (line 195) +* relation, text above: \stackrel. (line 6) +* release candidates: TeX engines. (line 70) +* remarks in the margin: Marginal notes. (line 6) +* report class: Document classes. (line 11) +* reporting bugs: About this document. (line 24) +* reserved characters: Reserved characters. (line 6) +* resizing: \scalebox. (line 6) +* resizing <1>: \resizebox. (line 6) +* restricted shell access: \write18. (line 53) +* right angle quotation marks: Text symbols. (line 33) +* right arrow, in text: Text symbols. (line 198) +* right brace, in text: Text symbols. (line 92) +* right quote: Text symbols. (line 58) +* right quote, double: Text symbols. (line 179) +* right quote, single: Text symbols. (line 185) +* right-hand equation numbers: Document class options. + (line 63) +* right-justifying text: \raggedleft. (line 6) +* right-justifying text, environment for: flushright. (line 6) +* rigid lengths: Lengths. (line 9) +* ring accent: Accents. (line 77) +* ring accent, math: Math accents. (line 35) +* robust commands: \protect. (line 6) +* roman font: Font styles. (line 85) +* root file: Splitting the input. (line 10) +* root file <1>: Jobname. (line 6) +* roots: \sqrt. (line 6) +* rotating graphics: \rotatebox. (line 6) +* rotating text: \rotatebox. (line 6) +* rotation: \rotatebox. (line 6) +* row, tabbing: tabbing. (line 46) +* rubber lengths: Lengths. (line 9) +* rubber lengths, defining new: \newlength. (line 6) +* running header and footer: Page layout parameters. + (line 6) +* running header and footer style: \pagestyle. (line 6) +* samepage environment: \pagebreak & \nopagebreak. + (line 44) +* sans serif font: Font styles. (line 91) +* scaled point: Units of length. (line 36) +* scaling: \scalebox. (line 6) +* scaling <1>: \resizebox. (line 6) +* script fonts: Calligraphic. (line 6) +* script letters for math: Font styles. (line 79) +* script style: Math styles. (line 23) +* scriptscript style: Math styles. (line 26) +* scrollmode: Command line options. + (line 27) +* secnumdepth: Sectioning. (line 69) +* secnumdepth counter: Sectioning. (line 70) +* section: Sectioning. (line 6) +* section <1>: \section. (line 6) +* section counter: Counters. (line 15) +* section number, cross referencing: \ref. (line 6) +* section numbers, printing: Sectioning. (line 70) +* section symbol: Text symbols. (line 62) +* section, redefining: \@startsection. (line 6) +* sectioning commands: Sectioning. (line 6) +* sectioning, part: \part. (line 6) +* security and \write: \write and security. (line 6) +* see and see also index entries: \index. (line 58) +* self-contained sources: filecontents. (line 42) +* series, of fonts: Low-level font commands. + (line 48) +* setspace package: \baselineskip & \baselinestretch. + (line 79) +* setting counters: \setcounter. (line 6) +* sh, used by \write18: \write18. (line 60) +* shapes, of fonts: Low-level font commands. + (line 88) +* sharp S letters: Additional Latin letters. + (line 52) +* shell access: \write18. (line 6) +* shell escape: Command line options. + (line 61) +* SHELL, environment variables: \write18. (line 60) +* shellesc package: \write18. (line 71) +* showidx package: Indexes. (line 51) +* simulating typed text: verbatim. (line 6) +* single angle quotation marks: Text symbols. (line 33) +* single guillemets: Text symbols. (line 33) +* single left quote: Text symbols. (line 182) +* single low-9 quotation mark: Text symbols. (line 55) +* single quote, straight: Text symbols. (line 188) +* single right quote: Text symbols. (line 185) +* siunitx package: minipage. (line 137) +* siunitx package <1>: ~. (line 42) +* sizes of text: Font sizes. (line 6) +* skip plain TeX: Lengths. (line 9) +* skip register, plain TeX: \newlength. (line 6) +* slanted font: Font styles. (line 94) +* slides class: Document classes. (line 11) +* sloppypar environment: sloppypar. (line 6) +* sloppypar environment <1>: sloppypar. (line 6) +* slot, font: \DeclareTextAccent. (line 13) +* small caps font: Font styles. (line 88) +* sort: \newglossaryentry. (line 55) +* source files, making self-contained: filecontents. (line 42) +* sp: Units of length. (line 35) +* space between lines: \baselineskip & \baselinestretch. + (line 6) +* space, inserting horizontal: \hss. (line 6) +* space, inserting vertical: \addvspace. (line 6) +* space, negative thin: \thinspace & \negthinspace. + (line 6) +* space, thin: \thinspace & \negthinspace. + (line 6) +* space, unbreakable: ~. (line 6) +* space, vertical: \vspace. (line 6) +* spaces: Spaces. (line 6) +* spaces, ignore around commands: \ignorespaces & \ignorespacesafterend. + (line 6) +* spaces, ignore around commands <1>: xspace package. (line 6) +* spacing within math mode: Spacing in math mode. + (line 6) +* spacing, inter-sentence: \frenchspacing & \nonfrenchspacing. + (line 6) +* spacing, inter-sentence <1>: \normalsfcodes. (line 6) +* spacing, math mode: \phantom & \vphantom & \hphantom. + (line 6) +* spacing, math mode <1>: \mathstrut. (line 6) +* Spanish ordinals, feminine and masculine: Text symbols. (line 167) +* special characters: Reserved characters. (line 6) +* special characters <1>: Additional Latin letters. + (line 6) +* special insertions: Special insertions. (line 6) +* specifier, float placement: Floats. (line 31) +* splitting the input file: Splitting the input. (line 6) +* square root: \sqrt. (line 6) +* stack math: \stackrel. (line 6) +* star-variants, commands: \@ifstar. (line 6) +* starred form, defining new commands: \newcommand & \renewcommand. + (line 27) +* starting a new page: \newpage. (line 6) +* starting a new page and clearing floats: \clearpage & \cleardoublepage. + (line 6) +* starting and ending: Starting and ending. (line 6) +* starting on a right-hand page: \clearpage & \cleardoublepage. + (line 6) +* sterling symbol: Text symbols. (line 51) +* straight double quote, base: Text symbols. (line 192) +* straight quote, base: Text symbols. (line 192) +* straight single quote: Text symbols. (line 188) +* stretch, infinite horizontal: \hfill. (line 6) +* stretch, infinite vertical: \vfill. (line 6) +* stretch, omitting vertical: \raggedbottom. (line 6) +* strut: \footnote. (line 47) +* strut <1>: \strut. (line 6) +* strut, math: \mathstrut. (line 6) +* styles of text: Font styles. (line 6) +* styles, page: Page styles. (line 6) +* subparagraph: Sectioning. (line 6) +* subparagraph <1>: \subsubsection & \paragraph & \subparagraph. + (line 6) +* subparagraph counter: Counters. (line 15) +* subscript: Subscripts & superscripts. + (line 6) +* subsection: Sectioning. (line 6) +* subsection <1>: \subsection. (line 6) +* subsection counter: Counters. (line 15) +* subsubsection: Sectioning. (line 6) +* subsubsection <1>: \subsubsection & \paragraph & \subparagraph. + (line 6) +* subsubsection counter: Counters. (line 15) +* suffix package: \@ifstar. (line 64) +* superscript: Subscripts & superscripts. + (line 6) +* symbol: \newglossaryentry. (line 59) +* symbol, defining: \DeclareTextSymbol. (line 6) +* symbols, arrows: Arrows. (line 6) +* symbols, boldface: \boldmath & \unboldmath. + (line 6) +* symbols, math: Math symbols. (line 6) +* symbols, text: Text symbols. (line 6) +* system commands, run from LaTeX: \write18. (line 6) +* system information: \write18. (line 65) +* T1: fontenc package. (line 6) +* tab stops, using: tabbing. (line 6) +* tabbing environment: tabbing. (line 6) +* table counter: Counters. (line 15) +* table environment: table. (line 6) +* table of contents: \contentsline. (line 6) +* table of contents entry, create dotted line: \@dottedtocline. + (line 6) +* table of contents entry, manually adding: \addcontentsline. (line 6) +* table of contents file: Output files. (line 43) +* table of contents, avoiding footnotes: Footnotes in section headings. + (line 6) +* table of contents, creating: Table of contents etc.. + (line 6) +* table of contents, sectioning numbers printed: Sectioning. (line 81) +* tablefootnote package: Footnotes in a table. + (line 53) +* tables, creating: table. (line 6) +* tabular environment: tabular. (line 6) +* template (simple), article: article template. (line 6) +* template, beamer: beamer template. (line 6) +* template, book: book template. (line 6) +* template, book <1>: Larger book template. + (line 6) +* templates, document: Document templates. (line 6) +* terminal input/output: Input/output. (line 6) +* terminal, writing to: \write. (line 30) +* TeX format (.fmt) files: TeX engines. (line 6) +* TeX logo: Text symbols. (line 65) +* texosquery package: \write18. (line 65) +* texput, jobname default: Jobname. (line 27) +* text style: Math styles. (line 21) +* text symbols: Text symbols. (line 6) +* text, resizing: \scalebox. (line 6) +* text, resizing <1>: \resizebox. (line 6) +* text, scaling: \scalebox. (line 6) +* text, scaling <1>: \resizebox. (line 6) +* textcase package: Upper and lower case. + (line 42) +* textcomp package: Text symbols. (line 6) +* thanks, for titlepage: \maketitle. (line 57) +* thebibliography environment: thebibliography. (line 6) +* theindex: Produce the index manually. + (line 6) +* theindex environment: Produce the index manually. + (line 9) +* theorem environment: theorem. (line 6) +* theorem-like environment: \newtheorem. (line 6) +* theorems, defining: \newtheorem. (line 6) +* theorems, typesetting: theorem. (line 6) +* thin space: Spacing in math mode. + (line 47) +* thin space <1>: \thinspace & \negthinspace. + (line 6) +* thin space, negative: Spacing in math mode. + (line 61) +* thin space, negative <1>: \thinspace & \negthinspace. + (line 6) +* thorn, Icelandic letter: Additional Latin letters. + (line 56) +* three-quarters em-dash: Text symbols. (line 201) +* tie: ~. (line 6) +* tie-after accent: Accents. (line 83) +* TikZ package: \line. (line 35) +* TikZ package <1>: \strut. (line 62) +* TikZ package <2>: \mbox & \makebox. (line 73) +* tikz-cd package: Arrows. (line 41) +* tilde accent: Accents. (line 48) +* tilde accent, math: Math accents. (line 38) +* tilde, ASCII, in text: Text symbols. (line 71) +* title page, separate or run-in: Document class options. + (line 71) +* title pages, creating: titlepage. (line 6) +* title, for titlepage: \maketitle. (line 64) +* titlepage environment: titlepage. (line 6) +* titlepage option: Document class options. + (line 49) +* titles, making: \maketitle. (line 6) +* titlesec package: Sectioning. (line 60) +* titlesec package <1>: \part. (line 54) +* titlesec package <2>: \chapter. (line 79) +* titlesec package <3>: \section. (line 64) +* titlesec package <4>: \subsection. (line 52) +* titlesec package <5>: \subsubsection & \paragraph & \subparagraph. + (line 63) +* tocbibbind package: Table of contents etc.. + (line 110) +* tocdepth: Sectioning. (line 80) +* tocdepth counter: Sectioning. (line 81) +* tocloft package: Table of contents etc.. + (line 110) +* tocloft package <1>: \contentsline. (line 34) +* today's date: \today. (line 6) +* tombstone: \rule. (line 13) +* topmargin: Page layout parameters. + (line 125) +* topnumber: Floats. (line 138) +* topnumber <1>: Floats. (line 139) +* totalnumber: Floats. (line 142) +* totalnumber <1>: Floats. (line 143) +* trademark symbol: Text symbols. (line 204) +* transcript file: Output files. (line 28) +* TrueType fonts: TeX engines. (line 6) +* TS1 encoding: Text symbols. (line 6) +* two-column output: \twocolumn. (line 6) +* two-thirds em-dash: Text symbols. (line 207) +* twocolumn option: Document class options. + (line 78) +* twoside option: Document class options. + (line 78) +* type styles: Font styles. (line 6) +* typed text, simulating: verbatim. (line 6) +* typeface sizes: Font sizes. (line 6) +* typefaces: Fonts. (line 6) +* typewriter font: Font styles. (line 97) +* typewriter labels in lists: description. (line 36) +* ulem package: Over- and Underlining. + (line 15) +* umlaut accent: Accents. (line 25) +* unbreakable space: ~. (line 6) +* underbar: Accents. (line 53) +* underlining: Over- and Underlining. + (line 6) +* underscore, in text: Text symbols. (line 210) +* Unicode: inputenc package. (line 15) +* Unicode input, native: TeX engines. (line 6) +* unicode-math package: Greek letters. (line 37) +* units, of length: Units of length. (line 6) +* unofficial nature of this manual: About this document. (line 20) +* unordered lists: itemize. (line 6) +* uplatex: TeX engines. (line 55) +* uplatex-dev: TeX engines. (line 68) +* uppercase: Upper and lower case. + (line 6) +* url package: \verb. (line 39) +* using BibTeX: Using BibTeX. (line 6) +* UTF-8, default LaTeX input encoding: Overview. (line 12) +* UTF-8, engine support for: TeX engines. (line 6) +* UTF-8, font support for: fontenc package. (line 6) +* variables, a list of: Counters. (line 6) +* vector symbol, math: Math accents. (line 41) +* verbatim environment: verbatim. (line 6) +* verbatim text: verbatim. (line 6) +* verbatim text, inline: \verb. (line 6) +* verbatimbox package: verbatim. (line 41) +* verse environment: verse. (line 6) +* vertical bar, double, in text: Text symbols. (line 83) +* vertical bar, in text: Text symbols. (line 80) +* vertical mode: Modes. (line 36) +* vertical space: \vspace. (line 6) +* vertical space <1>: \addvspace. (line 6) +* vertical space before paragraphs: \parindent & \parskip. + (line 6) +* vertical spacing: \phantom & \vphantom & \hphantom. + (line 6) +* vertical spacing <1>: \mathstrut. (line 6) +* vertical spacing, math mode: \smash. (line 6) +* visible space: \verb. (line 31) +* visible space symbol, in text: Text symbols. (line 213) +* weights, of fonts: Low-level font commands. + (line 58) +* whatsit item: \write. (line 65) +* white space: Spaces. (line 6) +* wide hat accent, math: Math accents. (line 44) +* wide tilde accent, math: Math accents. (line 47) +* widths, of fonts: Low-level font commands. + (line 70) +* Wright, Joseph: Upper and lower case. + (line 48) +* writing external files: filecontents. (line 6) +* writing letters: Letters. (line 6) +* x-height: Units of length. (line 42) +* xdvi command: Output files. (line 10) +* xdvipdfmx: TeX engines. (line 42) +* xelatex: TeX engines. (line 41) +* xelatex-dev: TeX engines. (line 69) +* XeTeX: TeX engines. (line 42) +* xindex program: makeindex. (line 159) +* xindy program: makeindex. (line 152) +* xparse package: LaTeX command syntax. + (line 35) +* xparse package <1>: \@ifstar. (line 64) +* xr package: xr package. (line 6) +* xr-hyper package: xr package. (line 6) +* xspace package: xspace package. (line 6) +* xstring package: Jobname. (line 53) + + + +Tag Table: +Node: Top1861 +Node: About this document3668 +Node: Overview5406 +Node: Starting and ending7186 +Ref: Starting & ending7321 +Node: Output files8498 +Ref: output files dvi8777 +Ref: output files pdf9289 +Ref: output files log9614 +Ref: output files aux9811 +Node: TeX engines10780 +Ref: tex engines latex11316 +Ref: tex engines lualatex12151 +Ref: tex engines xelatex12612 +Node: LaTeX command syntax14572 +Node: Environment16499 +Node: CTAN17726 +Node: Document classes19182 +Ref: document classes article19622 +Ref: document classes book19710 +Ref: document classes letter19895 +Ref: document classes report19971 +Ref: document classes slides20123 +Node: Document class options20576 +Node: Additional packages23658 +Node: Class and package construction24289 +Node: Class and package structure25739 +Node: Class and package commands28034 +Node: Fonts45639 +Ref: Typefaces45742 +Node: fontenc package47704 +Node: \DeclareFontEncoding51956 +Node: \DeclareTextAccent53376 +Node: \DeclareTextAccentDefault54295 +Node: \DeclareTextCommand & \ProvideTextCommand55370 +Node: \DeclareTextCommandDefault & \ProvideTextCommandDefault57622 +Node: \DeclareTextComposite58934 +Node: \DeclareTextCompositeCommand60019 +Node: \DeclareTextSymbol60701 +Node: \DeclareTextSymbolDefault61810 +Node: \LastDeclaredEncoding62770 +Node: \UseTextSymbol & \UseTextAccent63394 +Node: Font styles64604 +Node: Font sizes68834 +Node: Low-level font commands71713 +Ref: low level font commands fontencoding71997 +Ref: low level font commands fontfamily72553 +Ref: low level font commands fontseries73316 +Ref: low level font commands fontshape74448 +Ref: low level font commands fontsize74765 +Ref: low level font commands linespread75342 +Ref: low level font commands selectfont75623 +Ref: low level font commands usefont75949 +Node: Layout76192 +Node: \onecolumn76758 +Node: \twocolumn77117 +Ref: twocolumn columnsep77740 +Ref: twocolumn columnseprule77984 +Ref: twocolumn columnwidth78284 +Ref: twocolumn dbltopfraction78867 +Ref: twocolumn dblfloatpagefraction79833 +Ref: twocolumn dblfloatsep80071 +Ref: twocolumn dbltextfloatsep80380 +Ref: twocolumn dbltopnumber80560 +Node: \flushbottom81509 +Node: \raggedbottom82609 +Node: Page layout parameters83145 +Ref: page layout parameters columnsep83374 +Ref: page layout parameters columnseprule83374 +Ref: page layout parameters columnwidth83374 +Ref: page layout parameters headheight83628 +Ref: page layout parameters headsep83795 +Ref: page layout parameters footskip84107 +Ref: page layout parameters linewidth84441 +Ref: page layout parameters marginparpush84794 +Ref: page layout parameters marginsep84794 +Ref: page layout parameters marginparwidth84794 +Ref: page layout parameters oddsidemargin85834 +Ref: page layout parameters evensidemargin85834 +Ref: page layout parameters paperheight86379 +Ref: page layout parameters paperwidth86605 +Ref: page layout parameters textheight86829 +Ref: page layout parameters textwidth87218 +Ref: page layout parameters hsize88156 +Ref: page layout parameters topmargin88353 +Ref: page layout parameters topskip88657 +Node: \baselineskip & \baselinestretch88873 +Node: Floats94043 +Ref: floats bottomfraction98381 +Ref: floats floatpagefraction98508 +Ref: floats textfraction98619 +Ref: floats topfraction98821 +Ref: floats floatsep99073 +Ref: floats intextsep99183 +Ref: floats textfloatsep99398 +Ref: floats bottomnumber99662 +Ref: floats dbltopnumber99770 +Ref: floats topnumber99889 +Ref: floats totalnumber99993 +Node: \caption100610 +Node: Sectioning103336 +Ref: sectioning secnumdepth106445 +Ref: Sectioning/secnumdepth106445 +Ref: sectioning tocdepth107071 +Ref: Sectioning/tocdepth107071 +Node: \part108111 +Node: \chapter110283 +Node: \section114056 +Node: \subsection117323 +Node: \subsubsection & \paragraph & \subparagraph120037 +Node: \appendix122694 +Node: \frontmatter & \mainmatter & \backmatter123987 +Node: \@startsection125396 +Ref: startsection name126885 +Ref: \@startsection/name126885 +Ref: startsection level127339 +Ref: \@startsection/level127339 +Ref: startsection indent128220 +Ref: \@startsection/indent128220 +Ref: startsection beforeskip128483 +Ref: \@startsection/beforeskip128483 +Ref: startsection afterskip130004 +Ref: \@startsection/afterskip130004 +Ref: startsection style131315 +Ref: \@startsection/style131315 +Node: Cross references135016 +Node: \label137165 +Node: \pageref138985 +Node: \ref139775 +Node: xr package140770 +Node: Environments142591 +Node: abstract144593 +Node: array146182 +Node: center149095 +Node: \centering150848 +Node: description152345 +Node: displaymath154562 +Node: document156258 +Node: \AtBeginDocument156688 +Node: \AtEndDocument157312 +Node: enumerate157956 +Ref: enumerate enumi159819 +Ref: enumerate enumii159819 +Ref: enumerate enumiii159819 +Ref: enumerate enumiv159819 +Ref: enumerate labelenumi160217 +Ref: enumerate labelenumii160217 +Ref: enumerate labelenumiii160217 +Ref: enumerate labelenumiv160217 +Node: eqnarray160756 +Node: equation162752 +Node: figure163420 +Node: filecontents165593 +Node: flushleft167502 +Node: \raggedright168529 +Node: flushright169727 +Node: \raggedleft170551 +Node: itemize171422 +Ref: itemize labelitemi172970 +Ref: itemize labelitemii172970 +Ref: itemize labelitemiii172970 +Ref: itemize labelitemiv172970 +Ref: itemize leftmargin173629 +Ref: itemize leftmargini173629 +Ref: itemize leftmarginii173629 +Ref: itemize leftmarginiii173629 +Ref: itemize leftmarginiv173629 +Ref: itemize leftmarginv173629 +Ref: itemize leftmarginvi173629 +Node: letter175111 +Node: list175349 +Ref: list makelabel177796 +Ref: list itemindent179181 +Ref: list itemsep179318 +Ref: list labelsep179971 +Ref: list labelwidth180134 +Ref: list leftmargin181145 +Ref: list listparindent181998 +Ref: list parsep182229 +Ref: list partopsep182697 +Ref: list rightmargin183495 +Ref: list topsep183680 +Ref: list beginparpenalty187256 +Ref: list itempenalty187355 +Ref: list endparpenalty187459 +Node: \item188330 +Node: trivlist189576 +Node: math191104 +Node: minipage191410 +Node: picture197007 +Node: \put203237 +Node: \multiput203802 +Node: \qbezier204541 +Node: \graphpaper206014 +Node: \line206808 +Node: \linethickness208782 +Node: \thinlines209247 +Node: \thicklines209658 +Node: \circle210043 +Node: \oval210585 +Node: \shortstack211600 +Node: \vector213298 +Node: \makebox (picture)214212 +Node: \framebox (picture)215394 +Node: \frame216867 +Node: \dashbox217307 +Node: quotation & quote218442 +Node: tabbing219338 +Node: table225241 +Node: tabular227324 +Ref: tabular arrayrulewidth233661 +Ref: tabular arraystrech233901 +Ref: tabular doublerulesep234122 +Ref: tabular tabcolsep234258 +Node: \multicolumn234779 +Node: \vline238656 +Node: \cline240047 +Node: \hline240757 +Node: thebibliography241439 +Node: \bibitem243922 +Node: \cite246224 +Node: \nocite248092 +Node: Using BibTeX248600 +Node: BibTeX error messages250980 +Node: theorem251885 +Node: titlepage252807 +Node: verbatim254090 +Node: \verb255903 +Node: verse257996 +Node: Line breaking259223 +Node: \\260589 +Node: \obeycr & \restorecr263019 +Node: \newline263830 +Node: \- (hyphenation)264871 +Node: \discretionary266511 +Node: \fussy & \sloppy267402 +Node: sloppypar268217 +Node: \hyphenation269363 +Node: \linebreak & \nolinebreak269983 +Node: Page breaking271163 +Node: \clearpage & \cleardoublepage273193 +Node: \newpage274713 +Node: \enlargethispage276011 +Node: \pagebreak & \nopagebreak276967 +Node: Footnotes278938 +Node: \footnote280084 +Ref: footnote footnoterule281352 +Ref: footnote footnotesep281963 +Node: \footnotemark283029 +Node: \footnotetext285386 +Node: Footnotes in section headings285988 +Node: Footnotes in a table286821 +Node: Footnotes of footnotes289752 +Node: Definitions290450 +Node: \newcommand & \renewcommand291497 +Node: Control sequences296932 +Node: \providecommand298286 +Node: \makeatletter & \makeatother299449 +Node: \@ifstar301551 +Node: \newcounter305036 +Node: \newlength306756 +Node: \newsavebox307679 +Node: \newenvironment & \renewenvironment308658 +Node: \newtheorem313982 +Node: \newfont317554 +Node: \protect318787 +Node: \ignorespaces & \ignorespacesafterend321175 +Node: xspace package323925 +Node: Counters326124 +Node: \alph \Alph \arabic \roman \Roman \fnsymbol327825 +Node: \usecounter330460 +Node: \value331319 +Node: \setcounter332372 +Node: \addtocounter332968 +Node: \refstepcounter333422 +Node: \stepcounter334091 +Node: \day & \month & \year334644 +Node: Lengths335553 +Node: Units of length340131 +Ref: units of length pt340334 +Ref: units of length pc340451 +Ref: units of length in340474 +Ref: units of length bp340500 +Ref: units of length mm340631 +Ref: units of length cm340663 +Ref: units of length dd340692 +Ref: units of length cc340724 +Ref: units of length sp340749 +Ref: Lengths/ex340891 +Ref: units of length ex340891 +Ref: Lengths/em341034 +Ref: units of length em341034 +Ref: units of length mu341375 +Node: \setlength341904 +Node: \addtolength343016 +Node: \settodepth344439 +Node: \settoheight345427 +Node: \settowidth346425 +Node: \stretch347430 +Node: Expressions348632 +Node: Making paragraphs353446 +Node: \par355761 +Node: \indent & \noindent357807 +Node: \parindent & \parskip359342 +Node: Marginal notes360671 +Ref: marginal notes marginparpush362067 +Ref: marginal notes marginparsep362241 +Ref: marginal notes marginparwidth362373 +Node: Math formulas362722 +Node: Subscripts & superscripts365770 +Node: Math symbols367931 +Node: Arrows391702 +Node: \boldmath & \unboldmath392890 +Node: Blackboard bold394409 +Node: Calligraphic395211 +Node: Delimiters395742 +Node: \left & \right397929 +Node: \bigl & \bigr etc.401966 +Node: Dots404736 +Ref: ellipses cdots405167 +Ref: ellipses ddots405312 +Ref: ellipses ldots405393 +Ref: ellipses vdots405810 +Node: Greek letters406994 +Node: Math functions408673 +Node: Math accents410317 +Node: Over- and Underlining411216 +Node: Spacing in math mode413043 +Ref: spacing in math mode thickspace413974 +Ref: spacing in math mode medspace414425 +Ref: Spacing in math mode/\thinspace414881 +Ref: spacing in math mode thinspace414881 +Ref: spacing in math mode negthinspace415362 +Ref: spacing in math mode quad415735 +Ref: spacing in math mode qquad415991 +Node: \smash416329 +Node: \phantom & \vphantom & \hphantom418546 +Node: \mathstrut421120 +Node: Math styles422082 +Node: Math miscellany424816 +Node: Colon character & \colon425313 +Node: \*426006 +Node: \frac426586 +Node: \sqrt426957 +Node: \stackrel427543 +Node: Modes427816 +Ref: modes paragraph mode428266 +Ref: modes lr mode428578 +Ref: modes math mode429184 +Ref: modes vertical mode429519 +Ref: modes internal vertical mode429737 +Ref: modes inner paragraph mode430260 +Ref: modes outer paragraph mode430260 +Node: \ensuremath430674 +Node: Page styles431379 +Node: \maketitle432142 +Node: \pagenumbering435160 +Node: \pagestyle437147 +Node: \thispagestyle440655 +Node: Spaces441609 +Node: \enspace & \quad & \qquad442929 +Node: \hspace443844 +Node: \hfill445701 +Node: \hss446765 +Node: \spacefactor447459 +Node: \@450866 +Ref: \AT450986 +Node: \frenchspacing & \nonfrenchspacing452926 +Ref: \frenchspacing453044 +Node: \normalsfcodes453846 +Node: \(SPACE)454114 +Ref: Leading blanks456054 +Node: ~456367 +Node: \thinspace & \negthinspace459177 +Node: \/460375 +Node: \hrulefill & \dotfill462333 +Node: \bigskip & \medskip & \smallskip463693 +Ref: bigskip464511 +Ref: medskip464715 +Ref: smallskip464924 +Node: \bigbreak & \medbreak & \smallbreak465585 +Node: \strut466571 +Node: \vspace469808 +Node: \vfill471369 +Node: \addvspace472297 +Node: Boxes474343 +Node: \mbox & \makebox475049 +Ref: mbox makebox depth476317 +Ref: mbox makebox height476317 +Ref: mbox makebox width476317 +Ref: mbox makebox totalheight476317 +Node: \fbox & \framebox478412 +Ref: fbox framebox fboxrule479719 +Ref: fbox framebox fboxsep479909 +Node: \parbox481011 +Node: \raisebox483317 +Ref: raisebox depth484290 +Ref: raisebox height484290 +Ref: raisebox width484290 +Ref: raisebox totalheight484290 +Node: \sbox & \savebox484991 +Node: lrbox488077 +Node: \usebox488963 +Node: Color489402 +Node: Color package options490201 +Node: Color models491847 +Ref: color models cmyk492644 +Ref: color models gray493007 +Ref: color models rgb493156 +Ref: color models RGB493493 +Ref: color models named493868 +Node: Commands for color494156 +Node: Define colors494571 +Node: Colored text495296 +Node: Colored boxes497693 +Node: Colored pages499082 +Node: Graphics499775 +Node: Graphics package options501901 +Node: Graphics package configuration504654 +Node: \graphicspath505456 +Node: \DeclareGraphicsExtensions508414 +Node: \DeclareGraphicsRule510232 +Node: Commands for graphics513411 +Node: \includegraphics513916 +Ref: includegraphics width518979 +Ref: includegraphics height519512 +Ref: includegraphics totalheight519918 +Ref: includegraphics keepaspectratio520182 +Ref: includegraphics viewport521868 +Ref: includegraphics trim522263 +Ref: includegraphics clip522719 +Ref: includegraphics page522979 +Ref: includegraphics pagebox523070 +Ref: includegraphics interpolate523935 +Ref: includegraphics quiet524140 +Ref: includegraphics draft524301 +Ref: includegraphics bb525106 +Ref: includegraphics bbllx525504 +Ref: includegraphics bblly525504 +Ref: includegraphics bburx525504 +Ref: includegraphics bbury525504 +Ref: includegraphics natwidth525646 +Ref: includegraphics natheight525646 +Ref: includegraphics hiresbb525832 +Ref: includegraphics type526594 +Ref: includegraphics ext526634 +Ref: includegraphics read526737 +Ref: includegraphics command526854 +Node: \rotatebox527100 +Node: \scalebox529954 +Node: \resizebox531034 +Node: Special insertions532228 +Node: Reserved characters533096 +Node: Upper and lower case534356 +Node: Symbols by font position536742 +Node: Text symbols537421 +Node: Accents542405 +Node: \accent544690 +Node: Additional Latin letters546516 +Ref: Non-English characters546698 +Node: inputenc package547717 +Ref: \inputencoding549933 +Node: \rule550269 +Node: \today551457 +Node: Splitting the input552393 +Node: \endinput554135 +Node: \include & \includeonly555438 +Node: \input559735 +Node: Front/back matter560950 +Node: Table of contents etc.561283 +Node: \@dottedtocline567044 +Node: \addcontentsline568631 +Node: \addtocontents571694 +Node: \contentsline573911 +Node: \nofiles575689 +Node: \numberline576440 +Node: Indexes577604 +Node: Produce the index manually580398 +Node: \index581562 +Node: makeindex586773 +Ref: makeindex preamble588441 +Ref: makeindex postamble588587 +Ref: makeindex group skip588683 +Ref: makeindex letheadflag589023 +Ref: makeindex lethead prefix589484 +Ref: makeindex lethead suffix589636 +Ref: makeindex item 0589784 +Ref: makeindex item 1589864 +Ref: makeindex item 2589939 +Ref: makeindex item 01590017 +Ref: makeindex item x1590122 +Ref: makeindex item 12590327 +Ref: makeindex item x2590435 +Ref: makeindex delim 0590595 +Ref: makeindex delim 1590725 +Ref: makeindex delim 2590855 +Ref: makeindex delim n590981 +Ref: makeindex delim r591116 +Ref: makeindex line max591224 +Ref: makeindex indent space591359 +Ref: makeindex indent length591454 +Ref: makeindex page precedence591639 +Node: \printindex592676 +Node: Glossaries593149 +Node: \newglossaryentry595116 +Node: \gls596583 +Node: Letters597377 +Node: \address600995 +Node: \cc601806 +Node: \closing602248 +Node: \encl602562 +Node: \location602976 +Node: \makelabels603240 +Node: \name605557 +Node: \opening605798 +Node: \ps606079 +Node: \signature606368 +Node: \telephone607596 +Node: Input/output607961 +Node: \openin & \openout608691 +Node: \read611451 +Node: \typein612638 +Node: \typeout613898 +Node: \write614938 +Node: \write and security619665 +Node: \message620609 +Node: \wlog622438 +Node: \write18622939 +Node: Command line interface626430 +Ref: Command line626594 +Node: Command line options628747 +Ref: interaction modes629750 +Ref: output directory630704 +Node: Command line input632403 +Node: Jobname634520 +Node: Recovering from errors637763 +Node: Document templates639191 +Node: beamer template639625 +Node: article template640267 +Node: book template640730 +Node: Larger book template641209 +Node: Index642751 +Ref: Command Index642837 + +End Tag Table + + +Local Variables: +coding: utf-8 +End: diff --git a/assets/info/mairix-el.info.gz b/assets/info/mairix-el.info.gz new file mode 100644 index 00000000..0a9ab8a1 Binary files /dev/null and b/assets/info/mairix-el.info.gz differ diff --git a/assets/info/make.info-1.gz b/assets/info/make.info-1.gz new file mode 100644 index 00000000..3563c6ae Binary files /dev/null and b/assets/info/make.info-1.gz differ diff --git a/assets/info/make.info-2.gz b/assets/info/make.info-2.gz new file mode 100644 index 00000000..51350c17 Binary files /dev/null and b/assets/info/make.info-2.gz differ diff --git a/assets/info/make.info-3.gz b/assets/info/make.info-3.gz new file mode 100644 index 00000000..a34e5750 Binary files /dev/null and b/assets/info/make.info-3.gz differ diff --git a/assets/info/make.info.gz b/assets/info/make.info.gz new file mode 100644 index 00000000..cc5c5f67 Binary files /dev/null and b/assets/info/make.info.gz differ diff --git a/assets/info/message.info.gz b/assets/info/message.info.gz new file mode 100644 index 00000000..34d5e60f Binary files /dev/null and b/assets/info/message.info.gz differ diff --git a/assets/info/mh-e.info.gz b/assets/info/mh-e.info.gz new file mode 100644 index 00000000..8f921fae Binary files /dev/null and b/assets/info/mh-e.info.gz differ diff --git a/assets/info/modus-themes.info.gz b/assets/info/modus-themes.info.gz new file mode 100644 index 00000000..11c02fe8 Binary files /dev/null and b/assets/info/modus-themes.info.gz differ diff --git a/assets/info/mu4e.info.gz b/assets/info/mu4e.info.gz new file mode 100644 index 00000000..8d6065b1 Binary files /dev/null and b/assets/info/mu4e.info.gz differ diff --git a/assets/info/newsticker.info.gz b/assets/info/newsticker.info.gz new file mode 100644 index 00000000..58df47a0 Binary files /dev/null and b/assets/info/newsticker.info.gz differ diff --git a/assets/info/nxml-mode.info.gz b/assets/info/nxml-mode.info.gz new file mode 100644 index 00000000..16323bd3 Binary files /dev/null and b/assets/info/nxml-mode.info.gz differ diff --git a/assets/info/octave-mode.info.gz b/assets/info/octave-mode.info.gz new file mode 100644 index 00000000..466f77ef Binary files /dev/null and b/assets/info/octave-mode.info.gz differ diff --git a/assets/info/org.info.gz b/assets/info/org.info.gz new file mode 100644 index 00000000..6e727144 Binary files /dev/null and b/assets/info/org.info.gz differ diff --git a/assets/info/parted.info.gz b/assets/info/parted.info.gz new file mode 100644 index 00000000..c40ff0bc Binary files /dev/null and b/assets/info/parted.info.gz differ diff --git a/assets/info/pcl-cvs.info.gz b/assets/info/pcl-cvs.info.gz new file mode 100644 index 00000000..58c9a5c2 Binary files /dev/null and b/assets/info/pcl-cvs.info.gz differ diff --git a/assets/info/pcl.info b/assets/info/pcl.info new file mode 100644 index 00000000..9a77790c --- /dev/null +++ b/assets/info/pcl.info @@ -0,0 +1,29024 @@ +This is pcl.info, produced by makeinfo version 5.2 from pcl.texi. + + +File: pcl.info, Node: Top, Next: Letter, Prev: (dir), Up: (dir) + +Practical Common Lisp +===================== + +Unofficial Texinfo Format (C) Peter Seibel + + “that book is dead sexy” — ‘Xach on #lisp’ + (*Note Blurbs::.) + +This page, and the pages it links to, contain text of the Common Lisp +book Practical Common Lisp published by Apress +(https://www.apress.com/us/book/9781590592397). These pages now contain +the final text as it appears in the book. If you find errors in these +pages, please send email to . These pages will +remain online in perpetuity -— I hope they will serve as a useful +introduction to Common Lisp for folks who are curious about Lisp but +maybe not yet curious enough to shell out big bucks for a dead-tree book +and a good Common Lisp tutorial for folks who want to get down to real +coding right away. However, don't let that stop you from buying the +printed version available from Apress at your favorite local or online +bookseller. For the complete bookstore browsing experience, you can +read the letter to the reader (*Note Letter::.) that appears on the +back cover of the treeware edition of the book. + +* Menu: + +* Letter:: Letter to the Reader +* Blurbs:: Blurbs +* Chapter 1:: Introduction: Why Lisp? +* Chapter 2:: Lather, Rinse, Repeat: A Tour of the REPL +* Chapter 3:: Practical: A Simple Database +* Chapter 4:: Syntax and Semantics +* Chapter 5:: Functions +* Chapter 6:: Variables +* Chapter 7:: Macros: Standard Control Constructs +* Chapter 8:: Macros: Defining Your Own +* Chapter 9:: Practical: Building a Unit Test Framework +* Chapter 10:: Numbers, Characters, and Strings +* Chapter 11:: Collections +* Chapter 12:: They Called It LISP for a Reason: List Processing +* Chapter 13:: Beyond Lists: Other Uses for Cons Cells +* Chapter 14:: Files and File I/O +* Chapter 15:: Practical: A Portable Pathname Library +* Chapter 16:: Object Reorientation: Generic Functions +* Chapter 17:: Object Reorientation: Classes +* Chapter 18:: A Few FORMAT Recipes +* Chapter 19:: Beyond Exception Handling: Conditions and Restarts +* Chapter 20:: The Special Operators +* Chapter 21:: Programming in the Large: Packages and Symbols +* Chapter 22:: LOOP for Black Belts +* Chapter 23:: Practical: A Spam Filter +* Chapter 24:: Practical: Parsing Binary Files +* Chapter 25:: Practical: An ID3 Parser +* Chapter 26:: Practical: Web Programming with AllegroServe +* Chapter 27:: Practical: An MP3 Database +* Chapter 28:: Practical: A Shoutcast Server +* Chapter 29:: Practical: An MP3 Browser +* Chapter 30:: Practical: An HTML Generation Library, the Interpreter +* Chapter 31:: Practical: An HTML Generation Library, the Compiler +* Chapter 32:: Conclusion: What's Next? + + -- The Detailed Node Listing -- + +1. Introduction: Why Lisp? + +* 1-1:: Why Lisp? +* 1-2:: Where It Began +* 1-2-1:: But I learned Lisp Once, And IT Wasn't Like what you're describing +* 1-3:: Who This Book Is For + +2. Lather, Rinse, Repeat: A Tour of the REPL + +* 2-1:: Choosing a Lisp Implementation +* 2-2:: Getting Up and Running with Lisp in a Box +* 2-3:: Free Your Mind: Interactive Programming +* 2-4:: Experimenting in the REPL +* 2-5:: "Hello, World," Lisp Style +* 2-6:: Saving Your Work + +3. Practical: A Simple Database + +* 3-1:: CDs and Records +* 3-2:: Filing CDs +* 3-3:: Looking at the Database Contents +* 3-4:: Improving the User Interaction +* 3-5:: Saving and Loading the Database +* 3-6:: Querying the Database +* 3-7:: Updating Existing Records-Another Use for WHERE +* 3-8:: Removing Duplication and Winning Big +* 3-9:: Wrapping Up + +4. Syntax and Semantics + +* 4-1:: What's with All the Parentheses? +* 4-2:: Breaking Open the Black Box +* 4-3:: S-expressions +* 4-4:: S-expressions As Lisp Forms +* 4-5:: Function Calls +* 4-6:: Special Operators +* 4-7:: Macros +* 4-8:: Truth, Falsehood, and Equality +* 4-9:: Formatting Lisp Code + +5. Functions + +* 5-1:: Defining New Functions +* 5-2:: Function Parameter Lists +* 5-3:: Optional Parameters +* 5-4:: Rest Parameters +* 5-5:: Keyword Parameters +* 5-6:: Mixing Different Parameter Types +* 5-7:: Function Return Values +* 5-8:: Functions As Data, a.k.a. Higher-Order Functions +* 5-9:: Anonymous Functions + +6. Variables + +* 6-1:: Variable Basics +* 6-2:: Lexical Variables and Closures +* 6-3:: Dynamic, a.k.a. Special, Variables +* 6-4:: Constants +* 6-5:: Assignment +* 6-6:: Generalized Assignment +* 6-7:: Other Ways to Modify Places + +7. Macros: Standard Control Constructs + +* 7-1:: WHEN and UNLESS +* 7-2:: COND +* 7-3:: AND, OR, and NOT +* 7-4:: Looping +* 7-5:: DOLIST and DOTIMES +* 7-6:: DO +* 7-7:: The Mighty LOOP + +8. Macros: Defining Your Own + +* 8-1:: The Story of Mac: A Just-So Story +* 8-2:: Macro Expansion Time vs. Runtime +* 8-3:: DEF MACRO +* 8-4:: A Sample Macro: do-primes +* 8-5:: Macro Parameters +* 8-6:: Generating the Expansion +* 8-7:: Plugging the Leaks +* 8-8:: Macro-Writing Macros +* 8-8-1:: Another classic macro-writing MACRO: ONCE-ONLY +* 8-9:: Beyond Simple Macros + +9. Practical: Building a Unit Test Framework + +* 9-1:: Two First Tries +* 9-2:: Refactoring +* 9-3:: Fixing the Return Value +* 9-4:: Better Result Reporting +* 9-5:: An Abstraction Emerges +* 9-6:: A Hierarchy of Tests +* 9-7:: Wrapping Up + +10. Numbers, Characters, and Strings + +* 10-1:: Numbers +* 10-2:: Numeric Literals +* 10-3:: Basic Math +* 10-4:: Numeric Comparisons +* 10-5:: Higher Math +* 10-6:: Characters +* 10-7:: Character Comparisons +* 10-8:: Strings +* 10-9:: String Comparisons + +11. Collections + +* 11-1:: Vectors +* 11-2:: Subtypes of Vector +* 11-3:: Vectors As Sequences +* 11-4:: Sequence Iterating Functions +* 11-5:: Higher-Order Function Variants +* 11-6:: Whole Sequence Manipulations +* 11-7:: Sorting and Merging +* 11-8:: Subsequence Manipulations +* 11-9:: Sequence Predicates +* 11-10:: Sequence Mapping Functions +* 11-11:: Hash Tables +* 11-12:: Hash Table Iteration + +12. They Called It LISP for a Reason: List Processing + +* 12-1:: "There Is No List" +* 12-2:: Functional Programming and Lists +* 12-3:: "Destructive" Operations +* 12-4:: Combining Recycling with Shared Structure +* 12-5:: List-Manipulation Functions +* 12-6:: Mapping +* 12-7:: Other Structures + +13. Beyond Lists: Other Uses for Cons Cells + +* 13-1:: Trees +* 13-2:: Sets +* 13-3:: Lookup Tables: Alists and Plists +* 13-4:: DESTRUCTURING-BIND + +14. Files and File I/O + +* 14-1:: Reading File Data +* 14-2:: Reading Binary Data +* 14-3:: Bulk Reads +* 14-4:: File Output +* 14-5:: Closing Files +* 14-6:: Filenames +* 14-6-1:: How We Got Here +* 14-7:: How Pathnames Represent Filenames +* 14-8:: Constructing New Pathnames +* 14-9:: Two Representations of Directory Names +* 14-10:: Interacting with the File System +* 14-11:: Other Kinds of I/O + +15. Practical: A Portable Pathname Library + +* 15-1:: The API +* 15-2:: *FEATURES* and Read-Time Conditionalization +* 15-2-1:: Packaging the Library +* 15-3:: Listing a Directory +* 15-4:: Testing a File's Existence +* 15-5:: Walking a Directory Tree + +16. Object Reorientation: Generic Functions + +* 16-1:: Generic Functions and Classes +* 16-2:: Generic Functions and Methods +* 16-3:: DEFGENERIC +* 16-4:: DEFMETHOD +* 16-5:: Method Combination +* 16-6:: The Standard Method Combination +* 16-7:: Other Method Combinations +* 16-8:: Multimethods +* 16-8-1:: Multimethods vs. Method Overloading +* 16-9:: To Be Continued . . . + +17. Object Reorientation: Classes + +* 17-1:: DEFCLASS +* 17-1-1:: What Are "User-Defined Classes"? +* 17-2:: Slot Specifiers +* 17-3:: Object Initialization +* 17-4:: Accessor Functions +* 17-5:: WITH-SLOTS and WITH-ACCESSORS +* 17-6:: Class-Allocated Slots +* 17-7:: Slots and Inheritance +* 17-8:: Multiple Inheritance +* 17-9:: Good Object-Oriented Design + +18. A Few FORMAT Recipes + +* 18-1:: The FORMAT Function +* 18-2:: FORMAT Directives +* 18-3:: Basic Formatting +* 18-4:: Character and Integer Directives +* 18-5:: Floating-Point Directives +* 18-6:: English-Language Directives +* 18-7:: Conditional Formatting +* 18-8:: Iteration +* 18-9:: Hop, Skip, Jump +* 18-10:: And More . . . + +19. Beyond Exception Handling: Conditions and Restarts + +* 19-1:: The Lisp Way +* 19-2:: Conditions +* 19-3:: Condition Handlers +* 19-3-1:: JAVA-STYLE EXCEPTON HANDLING +* 19-4:: Restarts +* 19-5:: Providing Multiple Restarts +* 19-6:: Other Uses for Conditions +* 19-6-1:: Writing Robust Software + +20. The Special Operators + +* 20-1:: Controlling Evaluation +* 20-2:: Manipulating the Lexical Environment +* 20-3:: Local Flow of Control +* 20-4:: Unwinding the Stack +* 20-5:: Multiple Values +* 20-6:: EVAL-WHEN +* 20-7:: Other Special Operators + +21. Programming in the Large: Packages and Symbols + +* 21-1:: How the Reader Uses Packages +* 21-2:: A Bit of Package and Symbol Vocabulary +* 21-3:: Three Standard Packages +* 21-4:: Defining Your Own Packages +* 21-5:: Packaging Reusable Libraries +* 21-6:: Importing Individual Names +* 21-7:: Packaging Mechanics +* 21-8:: Package Gotchas + +22. LOOP for Black Belts + +* 22-1:: The Parts of a LOOP +* 22-2:: Iteration Control +* 22-3:: Counting Loops +* 22-4:: Looping Over Collections and Packages +* 22-5:: Equals-Then Iteration +* 22-6:: Local Variables +* 22-7:: Destructuring Variables +* 22-8:: Value Accumulation +* 22-9:: Unconditional Execution +* 22-10:: Conditional Execution +* 22-11:: Setting Up and Tearing Down +* 22-12:: Termination Tests +* 22-13:: Putting It All Together + +23. Practical: A Spam Filter + +* 23-1:: The Heart of a Spam Filter +* 23-2:: Training the Filter +* 23-3:: Per-Word Statistics +* 23-4:: Combining Probabilities +* 23-5:: Inverse Chi Square +* 23-6:: Training the Filter +* 23-7:: Testing the Filter +* 23-8:: A Couple of Utility Functions +* 23-9:: Analyzing the Results +* 23-10:: What's Next + +24. Practical: Parsing Binary Files + +* 24-1:: Binary Files +* 24-2:: Binary Format Basics +* 24-3:: Strings in Binary Files +* 24-4:: Composite Structures +* 24-5:: Designing the Macros +* 24-6:: Making the Dream a Reality +* 24-7:: Reading Binary Objects +* 24-8:: Writing Binary Objects +* 24-9:: Adding Inheritance and Tagged Structures +* 24-10:: Keeping Track of Inherited Slots +* 24-11:: Tagged Structures +* 24-12:: Primitive Binary Types +* 24-13:: The Current Object Stack + +25. Practical: An ID3 Parser + +* 25-1:: Structure of an ID3v2 Tag +* 25-2:: Defining a Package +* 25-3:: Integer Types +* 25-4:: String Types +* 25-5:: ID3 Tag Header +* 25-6:: ID3 Frames +* 25-7:: Detecting Tag Padding +* 25-8:: Supporting Multiple Versions of ID3 +* 25-9:: Versioned Frame Base Classes +* 25-10:: Versioned Concrete Frame Classes +* 25-11:: What Frames Do You Actually Need? +* 25-12:: Text Information Frames +* 25-13:: Comment Frames +* 25-14:: Extracting Information from an ID3 Tag + +26. Practical: Web Programming with AllegroServe + +* 26-1:: A 30-Second Intro to Server-Side Web Programming +* 26-2:: AllegroServe +* 26-3:: Generating Dynamic Content with AllegroServe +* 26-4:: Generating HTML +* 26-5:: HTML Macros +* 26-6:: Query Parameters +* 26-7:: Cookies +* 26-8:: A Small Application Framework +* 26-9:: The Implementation + +27. Practical: An MP3 Database + +* 27-1:: The Database +* 27-1-1:: The Package +* 27-2:: Defining a Schema +* 27-3:: Inserting Values +* 27-4:: Querying the Database +* 27-5:: Matching Functions +* 27-6:: Getting at the Results +* 27-7:: Other Database Operations + +28. Practical: A Shoutcast Server + +* 28-1:: The Shoutcast Protocol +* 28-2:: Song Sources +* 28-2-1:: The Package +* 28-3:: Implementing Shoutcast + +29. Practical: An MP3 Browser + +* 29-1:: Playlists +* 29-1-1:: The Package +* 29-2:: Playlists As Song Sources +* 29-3:: Manipulating the Playlist +* 29-4:: Query Parameter Types +* 29-5:: Boilerplate HTML +* 29-6:: The Browse Page +* 29-7:: The Playlist +* 29-8:: Finding a Playlist +* 29-9:: Running the App + +30. Practical: An HTML Generation Library, the Interpreter + +* 30-1:: Designing a Domain-Specific Language +* 30-2:: The FOO Language +* 30-3:: Character Escaping +* 30-3-1:: The Package +* 30-4:: Indenting Printer +* 30-5:: HTML Processor Interface +* 30-6:: The Pretty Printer Backend +* 30-6-1:: Using Conditions to Have Your Cake and Eat It Too +* 30-7:: The Basic Evaluation Rule +* 30-8:: What's Next? + +31. Practical: An HTML Generation Library, the Compiler + +* 31-1:: The Compiler +* 31-2:: FOO Special Operators +* 31-3:: FOO Macros +* 31-4:: The Public API +* 31-5:: The End of the Line + +32. Conclusion: What's Next? + +* 32-1:: Finding Lisp Libraries +* 32-2:: Interfacing with Other Languages +* 32-3:: Make It Work, Make It Right, Make It Fast +* 32-4:: Delivering Applications +* 32-5:: Where to Go Next + + + + +File: pcl.info, Node: Letter, Next: Blurbs, Prev: Top, Up: Top + +Letter to the Reader +******************** + +Dear Reader, + +‘Practical Common Lisp’ ... isn't that an oxymoron? If you're like +most programmers, you probably know something about Lisp -— from a +comp sci course in college or from learning enough Elisp to customize +Emacs a bit. Or maybe you just know someone who won't shut up about +Lisp, the greatest language ever. But you probably never figured you'd +see ‘practical’ and ‘Lisp’ in the same book title. + +Yet, you're reading this; you must want to know more. Maybe you believe +learning Lisp will make you a better programmer in any language. Or +maybe you just want to know what those Lisp fanatics are yammering about +all the time. Or maybe you have learned some Lisp but haven't quite +made the leap to using it to write interesting software. + +If any of those is true, this book is for you. Using Common Lisp, an +ANSI standardized, industrial-strength dialect of Lisp, I show you how +to write software that goes well beyond silly academic exercises or +trivial editor customizations. And I show you how Lisp -— even with +many of its features adopted by other languages—still has a few tricks +up its sleeve. + +But unlike many Lisp books, this one doesn't just touch on a few of +Lisp's greatest features and then leave you on your own to actually use +them. I cover all the language features you'll need to write real +programs and devote well over a third of the book to developing +nontrivial software —- a statistical spam filter, a library for +parsing binary files, and a server for streaming MP3s over a network +complete with an online MP3 database and Web interface. + +So turn the book over, open it up, and see for yourself how eminently +practical using the greatest language ever invented can be. + +Sincerely, + +Peter Seibel + + +File: pcl.info, Node: Blurbs, Next: Chapter 1, Prev: Letter, Up: Top + + “I have been complimented many times and they always embarrass + me; I always feel that they have not said enough.” —Mark Twain + +Blurbs +****** + + “that book is dead sexy” — ‘Xach on #lisp’ + + ------------------------------------------------------------- + + “Peter Seibel offers a fresh view of Lisp and its possibilities + for elegantly solving problems. In Practical Common Lisp, he gives + enough basic information to let you quickly see the power of the + functional language paradigm. He then dazzles you with examples + that seem almost magical in their simplicity and power. This read + is pure fun from start to finish.” - ‘Gary Pollice’, from Dr. + Dobb's Portal May-17-2006 article on the 2006 Jolt Awards + (http://www.drdobbs.com/joltawards/2006-jolt-awards/187900423?pgno=4) + + ------------------------------------------------------------- + + “Peter Seibel's ‘Practical Common Lisp’ is just what the + title implies: an excellent introduction to Common Lisp for someone + who wants to dive in and start using the language for real work. + The book is very well written and is fun to read—at least for + those of us whose idea of fun extends to learning new programming + languages. + + Rather than spending a lot of time on abstract discussion of Lisp's + place in the universe of programming lnaguages, Seibel dives right + in, guiding the reader through a series of programming examples of + increasing complexity. This approach places the most emphasis on + those parts of Common Lisp that skilled programmers use the most, + without getting bogged down in the odd corners of Common Lisp that + even the experts must look up in the manual. The result of + Seibel's example-driven approach is to give the reader an excellent + appreciation of the power of Common Lisp in building complex, + evolving software systems with a minimum of effort. + + There are already many good books on Common Lisp that offer a more + abstract and comparative approach, but a good 'Here's how you do + it—and why' book, aimed at the working programmer, is a valuable + contribution, both to current Common Lisp users and those who + should be.” — ‘Scott E. Fahlman, Research Professor of + Computer Science, Carnegie Mellon Universit’ + + ------------------------------------------------------------- + + “This book shows the power of Lisp not only in the areas that it + has traditionally been noted for—such as developing a complete + unit test framework in only 26 lines of code—but also in new + areas such as parsing binary MP3 files, building a web application + for browsing a collection of songs, and streaming audio over the + web. Many readers will be surprised that Lisp allows you to do all + this with conciseness similar to scripting languages such as + Python, efficiency similar to C++, and unparalleled flexibility in + designing your own language extensions.” - ‘Peter Norvig, + Director of Search Quality, Google Inc; author of Paradigms of + Artificial Intelligence Programming: Case Studies in Common Lisp’ + + ------------------------------------------------------------- + + “I wish this book had already existed when I started learning + Lisp. It's not that there aren't other good books about (Common) + Lisp out there, but none of them has such a pragmatic, up-to-date + approach. And let's not forget that Peter covers topics like + pathnames or conditions and restarts which are completely ignored + in the rest of the Lisp literature. + + If you're new to Lisp and want to dive right in don't hesitate to + buy this book. Once you've read it and worked with it you can + continue with the ‘classics’ like Graham, Norvig, Keene, or + Steele.” — ‘Edi Weitz, maintainer of the Common Lisp Cookbook + and author of CL-PPCRE regular expression library.’ + + ------------------------------------------------------------- + + “Two prehensile toes up!” - ‘Kenny Tilton, comp.lang.lisp + demon, reporting on behalf of his development team.’ + + ------------------------------------------------------------- + + “Experienced programmers learn best from examples and it is + delightful to see that Lisp is finally being served with Seibel's + example-rich tutorial text. Especially delightful is the fact that + this book includes so many examples that fall within the realm of + problems today's programmers might be called upon to tackle, such + as Web development and streaming media. - ‘Philip Greenspun, + author of Software Engineering for Internet Applications, MIT + Department of Electrical Engineering and Computer Science’ + + ------------------------------------------------------------- + + “‘Practical Common Lisp’ is an excellent book that covers the + breadth of the Common Lisp language and also demonstrates the + unique features of Common Lisp with real-world applications that + the reader can run and extend. This book not only shows what + Common Lisp is but also why every programmer should be familiar + with Lisp.” - ‘John Foderaro, Senior Scientist, Franz Inc.’ + + ------------------------------------------------------------- + + “The Maxima Project frequently gets queries from potential new + contributors who would like to learn Common Lisp. I am pleased to + finally have a book that I can recommend to them without + reservation. Peter Seibel's clear, direct style allows the reader + to quickly appreciate the power of Common Lisp. His many included + examples, which focus on contemporary programming problems, + demonstrate that Lisp is much more than an academic programming + language. ‘Practical Common Lisp’ is a welcome addition to the + literature.” — ‘James Amundson, Maxima Project Leader’ + + ------------------------------------------------------------- + + “I like the interspersed Practical chapters on 'real' and useful + programs. We need books of this kind telling the world that + crunching strings and numbers into trees or graphs is easily done + in Lisp. — ‘Professor Christian Queinnec, Universite Paris 6 + (Pierre et Marie Curie)’ + + ------------------------------------------------------------- + + “One of the most important parts of learning a programming + language is learning its proper programming style. This is hard to + teach, but it can be painlessly absorbed from ‘Practical Common + Lisp’. Just reading the practical examples made me a better + programmer in any language.” - ‘Peter Scott, Lisp programmer’ + + ------------------------------------------------------------- + + “Finally, a Lisp book for the rest of us. If you want to learn + how to write a factorial function, this is not your book. Seibel + writes for the practical programmer, emphasizing the + engineer/artist over the scientist, subtly and gracefully implying + the power of the language while solving understandable real-world + problems. + + In most chapters, the reading of the chapter feels just like the + experience of writing a program, starting with a little + understanding, then having that understanding grow, like building + the shoulders upon which you can then stand. When Seibel + introduced macros as an aside while building a test framework, I + was shocked at how such a simple example made me really 'get' them. + Narrative context is extremely powerful and the technical books + that use it are a cut above. Congrats!” — ‘Keith Irwin, Lisp + Programmer’ + + ------------------------------------------------------------- + + “While learning Lisp, one is often refered to the CL HyperSpec if + they do not know what a particular function does, however, I found + that I often did not 'get it' just reading the HyperSpec. When I + had a problem of this manner, I turned to ‘Practical Common + Lisp’ every single time—it is by far the most readable source + on the subject that shows you how to program, not just tell you.” + — ‘Philip Haddad, Lisp Programmer’ + + ------------------------------------------------------------- + + “With the IT world evolving at an ever increasing pace, + professionals need the most powerful tools available. This is why + Common Lisp—the most powerful, flexible, and stable programming + language ever—is seeing such a rise in popularity. ‘Practical + Common Lisp’ is the long-awaited book that will help you harness + the power of Common Lisp to tackle today's complex real world + problems.” — ‘Marc Battyani, author of CL-PDF, + CL-TYPESETTING, and mod_lisp.’ + + ------------------------------------------------------------- + + “Please don't assume Common Lisp is only useful for Databases, + Unit Test Frameworks, Spam Filters, ID3 Parsers, Web Programming, + Shoutcast Servers, HTML Generation Interpreters, and HTML + Generation Compilers just because these are the only things + happened to be implemented in the book ‘Practical Common Lisp’. + — ‘Tobias C. Rittweiler, Lisp Programmer’ + + ------------------------------------------------------------- + + “When I met Peter, who just started writing this book, I asked to + myself (not to him, of course) ‘why yet another book on Common + Lisp, when there are many nice introductory books?’ One year + later, I found a draft of the new book and recognized I was wrong. + This book is not ‘yet another’ one. The author focuses on + practical aspects rather than on technical details of the language. + When I first studied Lisp by reading an introductory book, I felt I + understood the language, but I also had an impression ‘so + what?’, meaning I had no idea about how to use it. In contrast, + this book leaps into a ‘PRACTICAL’ chapter after the first few + chapters that explain the very basic notions of the language. Then + the readers are expected to learn more about the language while + they are following the PRACTICAL projects, which are combined + together to form a product of a significant size. After reading + this book, the readers will feel themselves expert programmers on + Common Lisp since they have ‘finished’ a big project already. + I think Lisp is the only language that allows this type of + practical introduction. Peter makes use of this feature of the + language in building up a fancy introduction on Common Lisp.” — + ‘Taiichi Yuasa, Professor, Department of Communications and + Computer Engineering, Kyoto University’ + + ------------------------------------------------------------- + + Have something to say about this book? Something nice? Want to + see it here? Send it along to . + + +File: pcl.info, Node: Chapter 1, Next: Chapter 2, Prev: Blurbs, Up: Top + +1. Introduction: Why Lisp? +========================== + +If you think the greatest pleasure in programming comes from getting a +lot done with code that simply and clearly expresses your intention, +then programming in Common Lisp is likely to be about the most fun you +can have with a computer. You'll get more done, faster, using it than +you would using pretty much any other language. + +That's a bold claim. Can I justify it? Not in a just a few pages in +this chapter-you're going to have to learn some Lisp and see for +yourself-thus the rest of this book. For now, let me start with some +anecdotal evidence, the story of my own road to Lisp. Then, in the next +section, I'll explain the payoff I think you'll get from learning Common +Lisp. + +I'm one of what must be a fairly small number of second-generation Lisp +hackers. My father got his start in computers writing an operating +system in assembly for the machine he used to gather data for his +doctoral dissertation in physics. After running computer systems at +various physics labs, by the 1980s he had left physics altogether and +was working at a large pharmaceutical company. That company had a +project under way to develop software to model production processes in +its chemical plants-if you increase the size of this vessel, how does it +affect annual production? The original team, writing in FORTRAN, had +burned through half the money and almost all the time allotted to the +project with nothing to show for their efforts. This being the 1980s +and the middle of the artificial intelligence (AI) boom, Lisp was in the +air. So my dad-at that point not a Lisper-went to Carnegie Mellon +University (CMU) to talk to some of the folks working on what was to +become Common Lisp about whether Lisp might be a good language for this +project. + +The CMU folks showed him some demos of stuff they were working on, and +he was convinced. He in turn convinced his bosses to let his team take +over the failing project and do it in Lisp. A year later, and using +only what was left of the original budget, his team delivered a working +application with features that the original team had given up any hope +of delivering. My dad credits his team's success to their decision to +use Lisp. + +Now, that's just one anecdote. And maybe my dad is wrong about why they +succeeded. Or maybe Lisp was better only in comparison to other +languages of the day. These days we have lots of fancy new languages, +many of which have incorporated features from Lisp. Am I really saying +Lisp can offer you the same benefits today as it offered my dad in the +1980s? Read on. + +Despite my father's best efforts, I didn't learn any Lisp in high +school. After a college career that didn't involve much programming in +any language, I was seduced by the Web and back into computers. I +worked first in Perl, learning enough to be dangerous while building an +online discussion forum for ‘Mother Jones’ magazine's Web site and +then moving to a Web shop, Organic Online, where I worked on big-for the +time-Web sites such as the one Nike put up during the 1996 Olympics. +Later I moved onto Java as an early developer at WebLogic, now part of +BEA. After WebLogic, I joined another startup where I was the lead +programmer on a team building a transactional messaging system in Java. +Along the way, my general interest in programming languages led me to +explore such mainstream languages as C, C++, and Python, as well as less +well-known ones such as Smalltalk, Eiffel, and Beta. + +So I knew two languages inside and out and was familiar with another +half dozen. Eventually, however, I realized my interest in programming +languages was really rooted in the idea planted by my father's tales of +Lisp-that different languages really are different, and that, despite +the formal Turing equivalence of all programming languages, you really +can get more done more quickly in some languages than others and have +more fun doing it. Yet, ironically, I had never spent that much time +with Lisp itself. So, I started doing some Lisp hacking in my free +time. And whenever I did, it was exhilarating how quickly I was able to +go from idea to working code. + +For example, one vacation, having a week or so to hack Lisp, I decided +to try writing a version of a program-a system for breeding genetic +algorithms to play the game of Go-that I had written early in my career +as a Java programmer. Even handicapped by my then rudimentary knowledge +of Common Lisp and having to look up even basic functions, it still felt +more productive than it would have been to rewrite the same program in +Java, even with several extra years of Java experience acquired since +writing the first version. + +A similar experiment led to the library I'll discuss in Chapter 24. +Early in my time at WebLogic I had written a library, in Java, for +taking apart Java class files. It worked, but the code was a bit of a +mess and hard to modify or extend. I had tried several times, over the +years, to rewrite that library, thinking that with my ever-improving +Java chops I'd find some way to do it that didn't bog down in piles of +duplicated code. I never found a way. But when I tried to do it in +Common Lisp, it took me only two days, and I ended up not only with a +Java class file parser but with a general-purpose library for taking +apart any kind of binary file. You'll see how that library works in +Chapter 24 and use it in Chapter 25 to write a parser for the ID3 tags +embedded in MP3 files. + +* Menu: + +* 1-1:: Why Lisp? +* 1-2:: Where It Began +* 1-3:: Who This Book Is For + + +File: pcl.info, Node: 1-1, Next: 1-2, Prev: Chapter 1, Up: Chapter 1 + +Why Lisp? +========= + +It's hard, in only a few pages of an introductory chapter, to explain +why users of a language like it, and it's even harder to make the case +for why you should invest your time in learning a certain language. +Personal history only gets us so far. Perhaps I like Lisp because of +some quirk in the way my brain is wired. It could even be genetic, +since my dad has it too. So before you dive into learning Lisp, it's +reasonable to want to know what the payoff is going to be. + +For some languages, the payoff is relatively obvious. For instance, if +you want to write low-level code on Unix, you should learn C. Or if you +want to write certain kinds of cross-platform applications, you should +learn Java. And any of a number companies still use a lot of C++, so if +you want to get a job at one of them, you should learn C++. + +For most languages, however, the payoff isn't so easily categorized; it +has to do with subjective criteria such as how it feels to use the +language. Perl advocates like to say that Perl "makes easy things easy +and hard things possible" and revel in the fact that, as the Perl motto +has it, "There's more than one way to do it." (1) Python's fans, on the +other hand, think Python is clean and simple and think Python code is +easier to understand because, as ‘their’ motto says, "There's only +one way to do it." + +So, why Common Lisp? There's no immediately obvious payoff for adopting +Common Lisp the way there is for C, Java, and C++ (unless, of course, +you happen to own a Lisp Machine). The benefits of using Lisp have much +more to do with the experience of using it. I'll spend the rest of this +book showing you the specific features of Common Lisp and how to use +them so you can see for yourself what it's like. For now I'll try to +give you a sense of Lisp's philosophy. + +The nearest thing Common Lisp has to a motto is the koan-like +description, "the programmable programming language." While cryptic, +that description gets at the root of the biggest advantage Common Lisp +still has over other languages. More than any other language, Common +Lisp follows the philosophy that what's good for the language's designer +is good for the language's users. Thus, when you're programming in +Common Lisp, you almost never find yourself wishing the language +supported some feature that would make your program easier to write, +because, as you'll see throughout this book, you can just add the +feature yourself. + +Consequently, a Common Lisp program tends to provide a much clearer +mapping between your ideas about how the program works and the code you +actually write. Your ideas aren't obscured by boilerplate code and +endlessly repeated idioms. This makes your code easier to maintain +because you don't have to wade through reams of code every time you need +to make a change. Even systemic changes to a program's behavior can +often be achieved with relatively small changes to the actual code. +This also means you'll develop code more quickly; there's less code to +write, and you don't waste time thrashing around trying to find a clean +way to express yourself within the limitations of the language. (2) + +Common Lisp is also an excellent language for exploratory programming-if +you don't know exactly how your program is going to work when you first +sit down to write it, Common Lisp provides several features to help you +develop your code incrementally and interactively. + +For starters, the interactive read-eval-print loop, which I'll introduce +in the next chapter, lets you continually interact with your program as +you develop it. Write a new function. Test it. Change it. Try a +different approach. You never have to stop for a lengthy compilation +cycle. (3) + +Other features that support a flowing, interactive programming style are +Lisp's dynamic typing and the Common Lisp condition system. Because of +the former, you spend less time convincing the compiler you should be +allowed to run your code and more time actually running it and working +on it, (4) and the latter lets you develop even your error handling code +interactively. + +Another consequence of being "a programmable programming language" is +that Common Lisp, in addition to incorporating small changes that make +particular programs easier to write, can easily adopt big new ideas +about how programming languages should work. For instance, the original +implementation of the Common Lisp Object System (CLOS), Common Lisp's +powerful object system, was as a library written in portable Common +Lisp. This allowed Lisp programmers to gain actual experience with the +facilities it provided before it was officially incorporated into the +language. + +Whatever new paradigm comes down the pike next, it's extremely likely +that Common Lisp will be able to absorb it without requiring any changes +to the core language. For example, a Lisper has recently written a +library, AspectL, that adds support for aspect-oriented programming +(AOP) to Common Lisp. (5) If AOP turns out to be the next big thing, +Common Lisp will be able to support it without any changes to the base +language and without extra preprocessors and extra compilers. (6) + + ---------- Footnotes ---------- + + (1) Perl is also worth learning as "the duct tape of the Internet." + + (2) Unfortunately, there's little actual research on the productivity +of different languages. One report that shows Lisp coming out well +compared to C++ and Java in the combination of programmer and program +efficiency is discussed at http://www.norvig.com/java-lisp.html. + + (3) Psychologists have identified a state of mind called flow in +which we're capable of incredible concentration and productivity. The +importance of flow to programming has been recognized for nearly two +decades since it was discussed in the classic book about human factors +in programming ‘Peopleware: Productive Projects and Teams’ by Tom +DeMarco and Timothy Lister (Dorset House, 1987). The two key facts +about flow are that it takes around 15 minutes to get into a state of +flow and that even brief interruptions can break you right out of it, +requiring another 15-minute immersion to reenter. DeMarco and Lister, +like most subsequent authors, concerned themselves mostly with +flow-destroying interruptions such as ringing telephones and inopportune +visits from the boss. Less frequently considered but probably just as +important to programmers are the interruptions caused by our tools. +Languages that require, for instance, a lengthy compilation before you +can try your latest code can be just as inimical to flow as a noisy +phone or a nosy boss. So, one way to look at Lisp is as a language +designed to keep you in a state of flow. + + (4) This point is bound to be somewhat controversial, at least with +some folks. Static versus dynamic typing is one of the classic +religious wars in programming. If you're coming from C++ and Java (or +from statically typed functional languages such as Haskel and ML) and +refuse to consider living without static type checks, you might as well +put this book down now. However, before you do, you might first want to +check out what self-described "statically typed bigot" Robert Martin +(author of ‘Designing Object Oriented C++ Applications Using the Booch +Method’ [Prentice Hall, 1995]) and C++ and Java author Bruce Eckel +(author of ‘Thinking in C++’ [Prentice Hall, 1995] and ‘Thinking +in Java’ [Prentice Hall, 1998]) have had to say about dynamic typing +on their weblogs (http://www.artima.com/weblogs/viewpost.jsp?thread=4639 +and http://www.mindview.net/WebLog/log-0025). On the other hand, folks +coming from Smalltalk, Python, Perl, or Ruby should feel right at home +with this aspect of Common Lisp. + + (5) AspectL is an interesting project insofar as AspectJ, its +Java-based predecessor, was written by Gregor Kiczales, one of the +designers of Common Lisp's object and metaobject systems. To many +Lispers, AspectJ seems like Kiczales's attempt to backport his ideas +from Common Lisp into Java. However, Pascal Costanza, the author of +AspectL, thinks there are interesting ideas in AOP that could be useful +in Common Lisp. Of course, the reason he's able to implement AspectL as +a library is because of the incredible flexibility of the Common Lisp +Meta Object Protocol Kiczales designed. To implement AspectJ, Kiczales +had to write what was essentially a separate compiler that compiles a +new language into Java source code. The AspectL project page is at +http://common-lisp.net/ project/aspectl/. + + (6) Or to look at it another, more technically accurate, way, Common +Lisp comes with a built-in facility for integrating compilers for +embedded languages. + + +File: pcl.info, Node: 1-2, Next: 1-3, Prev: 1-1, Up: Chapter 1 + +Where It Began +============== + +Common Lisp is the modern descendant of the Lisp language first +conceived by John McCarthy in 1956. Lisp circa 1956 was designed for +"symbolic data processing" (1) and derived its name from one of the +things it was quite good at: LISt Processing. We've come a long way +since then: Common Lisp sports as fine an array of modern data types as +you can ask for: a condition system that, as you'll see in Chapter 19, +provides a whole level of flexibility missing from the exception systems +of languages such as Java, Python, and C++; powerful facilities for +doing object-oriented programming; and several language facilities that +just don't exist in other programming languages. How is this possible? +What on Earth would provoke the evolution of such a well-equipped +language? + +Well, McCarthy was (and still is) an artificial intelligence (AI) +researcher, and many of the features he built into his initial version +of the language made it an excellent language for AI programming. +During the AI boom of the 1980s, Lisp remained a favorite tool for +programmers writing software to solve hard problems such as automated +theorem proving, planning and scheduling, and computer vision. These +were problems that required a lot of hard-to-write software; to make a +dent in them, AI programmers needed a powerful language, and they grew +Lisp into the language they needed. And the Cold War helped-as the +Pentagon poured money into the Defense Advanced Research Projects Agency +(DARPA), a lot of it went to folks working on problems such as +large-scale battlefield simulations, automated planning, and natural +language interfaces. These folks also used Lisp and continued pushing +it to do what they needed. + +The same forces that drove Lisp's feature evolution also pushed the +envelope along other dimensions-big AI problems eat up a lot of +computing resources however you code them, and if you run Moore's law in +reverse for 20 years, you can imagine how scarce computing resources +were on circa-80s hardware. The Lisp guys had to find all kinds of ways +to squeeze performance out of their implementations. Modern Common Lisp +implementations are the heirs to those early efforts and often include +quite sophisticated, native machine code-generating compilers. While +today, thanks to Moore's law, it's possible to get usable performance +from a purely interpreted language, that's no longer an issue for Common +Lisp. As I'll show in Chapter 32, with proper (optional) declarations, +a good Lisp compiler can generate machine code quite similar to what +might be generated by a C compiler. + +The 1980s were also the era of the Lisp Machines, with several +companies, most famously Symbolics, producing computers that ran Lisp +natively from the chips up. Thus, Lisp became a systems programming +language, used for writing the operating system, editors, compilers, and +pretty much everything else that ran on the Lisp Machines. + +In fact, by the early 1980s, with various AI labs and the Lisp machine +vendors all providing their own Lisp implementations, there was such a +proliferation of Lisp systems and dialects that the folks at DARPA began +to express concern about the Lisp community splintering. To address +this concern, a grassroots group of Lisp hackers got together in 1981 +and began the process of standardizing a new language called Common Lisp +that combined the best features from the existing Lisp dialects. Their +work was documented in the book ‘Common Lisp the Language’ by Guy +Steele (Digital Press, 1984)-CLtL to the Lisp-cognoscenti. + +By 1986 the first Common Lisp implementations were available, and the +writing was on the wall for the dialects it was intended to replace. In +1996, the American National Standards Institute (ANSI) released a +standard for Common Lisp that built on and extended the language +specified in CLtL, adding some major new features such as the CLOS and +the condition system. And even that wasn't the last word: like CLtL +before it, the ANSI standard intentionally leaves room for implementers +to experiment with the best way to do things: a full Lisp implementation +provides a rich runtime environment with access to GUI widgets, multiple +threads of control, TCP/IP sockets, and more. These days Common Lisp is +evolving much like other open-source languages-the folks who use it +write the libraries they need and often make them available to others. +In the last few years, in particular, there has been a spurt of activity +in open-source Lisp libraries. + +So, on one hand, Lisp is one of computer science's "classical" +languages, based on ideas that have stood the test of time. (2) On the +other, it's a thoroughly modern, general-purpose language whose design +reflects a deeply pragmatic approach to solving real problems as +efficiently and robustly as possible. The only downside of Lisp's +"classical" heritage is that lots of folks are still walking around with +ideas about Lisp based on some particular flavor of Lisp they were +exposed to at some particular time in the nearly half century since +McCarthy invented Lisp. If someone tells you Lisp is only interpreted, +that it's slow, or that you have to use recursion for everything, ask +them what dialect of Lisp they're talking about and whether people were +wearing bell-bottoms when they learned it. (3) + +* Menu: + +* 1-2-1:: But I learned Lisp Once, And IT Wasn't Like what you're describing + + ---------- Footnotes ---------- + + (1) ‘Lisp 1.5 Programmer's Manual’ (M.I.T. Press, 1962) + + (2) Ideas first introduced in Lisp include the if/then/else +construct, recursive function calls, dynamic memory allocation, garbage +collection, first-class functions, lexical closures, interactive +programming, incremental compilation, and dynamic typing. + + (3) One of the most commonly repeated myths about Lisp is that it's +"dead." While it's true that Common Lisp isn't as widely used as, say, +Visual Basic or Java, it seems strange to describe a language that +continues to be used for new development and that continues to attract +new users as "dead." Some recent Lisp success stories include Paul +Graham's Viaweb, which became Yahoo Store when Yahoo bought his company; +ITA Software's airfare pricing and shopping system, QPX, used by the +online ticket seller Orbitz and others; Naughty Dog's game for the +PlayStation 2, Jak and Daxter, which is largely written in a +domain-specific Lisp dialect Naughty Dog invented called GOAL, whose +compiler is itself written in Common Lisp; and the Roomba, the +autonomous robotic vacuum cleaner, whose software is written in L, a +downwardly compatible subset of Common Lisp. Perhaps even more telling +is the growth of the Common-Lisp.net Web site, which hosts open-source +Common Lisp projects, and the number of local Lisp user groups that have +sprung up in the past couple of years. + + +File: pcl.info, Node: 1-2-1, Next: 1-3, Prev: 1-2, Up: 1-2 + +But I learned Lisp Once, And IT Wasn't Like what you're describing +================================================================== + +If you've used Lisp in the past, you may have ideas about what "Lisp" is +that have little to do with Common Lisp. While Common Lisp supplanted +most of the dialects it's descended from, it isn't the only remaining +Lisp dialect, and depending on where and when you were exposed to Lisp, +you may very well have learned one of these other dialects. + +Other than Common Lisp, the one general-purpose Lisp dialect that still +has an active user community is Scheme. Common Lisp borrowed a few +important features from Scheme but never intended to replace it. + +Originally designed at M.I.T., where it was quickly put to use as a +teaching language for undergraduate computer science courses, Scheme has +always been aimed at a different language niche than Common Lisp. In +particular, Scheme's designers have focused on keeping the core language +as small and as simple as possible. This has obvious benefits for a +teaching language and also for programming language researchers who like +to be able to formally prove things about languages. + +It also has the benefit of making it relatively easy to understand the +whole language as specified in the standard. But, it does so at the +cost of omitting many useful features that are standardized in Common +Lisp. Individual Scheme implementations may provide these features in +implementation-specific ways, but their omission from the standard makes +it harder to write portable Scheme code than to write portable Common +Lisp code. + +Scheme also emphasizes a functional programming style and the use of +recursion much more than Common Lisp does. If you studied Lisp in +college and came away with the impression that it was only an academic +language with no real-world application, chances are you learned Scheme. +This isn't to say that's a particularly fair characterization of Scheme, +but it's even less applicable to Common Lisp, which was expressly +designed to be a real-world engineering language rather than a +theoretically "pure" language. + +If you've learned Scheme, you should also be aware that a number of +subtle differences between Scheme and Common Lisp may trip you up. +These differences are also the basis for several perennial religious +wars between the hotheads in the Common Lisp and Scheme communities. +I'll try to point out some of the more important differences as we go +along. + +Two other Lisp dialects still in widespread use are Elisp, the extension +language for the Emacs editor, and Autolisp, the extension language for +Autodesk's AutoCAD computer-aided design tool. Although it's possible +more lines of Elisp and Autolisp have been written than of any other +dialect of Lisp, neither can be used outside their host application, and +both are quite old-fashioned Lisps compared to either Scheme or Common +Lisp. If you've used one of these dialects, prepare to hop in the Lisp +time machine and jump forward several decades. + + +File: pcl.info, Node: 1-3, Next: Chapter 2, Prev: 1-2, Up: Chapter 1 + +Who This Book Is For +==================== + +This book is for you if you're curious about Common Lisp, regardless of +whether you're already convinced you want to use it or if you just want +to know what all the fuss is about. + +If you've learned some Lisp already but have had trouble making the leap +from academic exercises to real programs, this book should get you on +your way. On the other hand, you don't have to be already convinced +that you want to use Lisp to get something out of this book. + +If you're a hard-nosed pragmatist who wants to know what advantages +Common Lisp has over languages such as Perl, Python, Java, C, or C#, +this book should give you some ideas. Or maybe you don't even care +about using Lisp-maybe you're already sure Lisp isn't really any better +than other languages you know but are annoyed by some Lisper telling you +that's because you just don't "get it." If so, this book will give you +a straight-to-the-point introduction to Common Lisp. If, after reading +this book, you still think Common Lisp is no better than your current +favorite languages, you'll be in an excellent position to explain +exactly why. + +I cover not only the syntax and semantics of the language but also how +you can use it to write software that does useful stuff. In the first +part of the book, I'll cover the language itself, mixing in a few +"practical" chapters, where I'll show you how to write real code. Then, +after I've covered most of the language, including several parts that +other books leave for you to figure out on your own, the remainder of +the book consists of nine more practical chapters where I'll help you +write several medium-sized programs that actually do things you might +find useful: filter spam, parse binary files, catalog MP3s, stream MP3s +over a network, and provide a Web interface for the MP3 catalog and +server. + +After you finish this book, you'll be familiar with all the most +important features of the language and how they fit together, you'll +have used Common Lisp to write several nontrivial programs, and you'll +be well prepared to continue exploring the language on your own. While +everyone's road to Lisp is different, I hope this book will help smooth +the way for you. So, let's begin. + + +File: pcl.info, Node: Chapter 2, Prev: Chapter 1, Up: Top + +2. Lather, Rinse, Repeat: A Tour of the REPL +============================================ + +In this chapter you'll set up your programming environment and write +your first Common Lisp programs. We'll use the easy-to-install Lisp in +a Box developed by Matthew Danish and Mikel Evins, which packages a +Common Lisp implementation with Emacs, a powerful Lisp-aware text +editor, and SLIME (1) , a Common Lisp development environment built on +top of Emacs. + +This combo provides a state-of-the-art Common Lisp development +environment that supports the incremental, interactive development style +that characterizes Lisp programming. The SLIME environment has the +added advantage of providing a fairly uniform user interface regardless +of the operating system and Common Lisp implementation you choose. I'll +use the Lisp in a Box environment in order to have a specific +development environment to talk about; folks who want to explore other +development environments such as the graphical integrated development +environments (IDEs) provided by some of the commercial Lisp vendors or +environments based on other editors shouldn't have too much trouble +translating the basics (2) . + +* Menu: + +* 2-1:: Choosing a Lisp Implementation +* 2-2:: Getting Up and Running with Lisp in a Box +* 2-3:: Free Your Mind: Interactive Programming +* 2-4:: Experimenting in the REPL +* 2-5:: "Hello, World," Lisp Style +* 2-6:: Saving Your Work + + ---------- Footnotes ---------- + + (1) Superior Lisp Interaction Mode for Emacs + + (2) If you've had a bad experience with Emacs previously, you should +treat Lisp in a Box as an IDE that happens to use an Emacs-like editor +as its text editor; there will be no need to become an Emacs guru to +program Lisp. It is, however, orders of magnitude more enjoyable to +program Lisp with an editor that has some basic Lisp awareness. At a +minimum, you'll want an editor that can automatically match ()s for you +and knows how to automatically indent Lisp code. Because Emacs is +itself largely written in a Lisp dialect, Elisp, it has quite a bit of +support for editing Lisp code. Emacs is also deeply embedded into the +history of Lisp and the culture of Lisp hackers: the original Emacs and +its immediate predecessors, TECMACS and TMACS, were written by Lispers +at the Massachusetts Institute of Technology (MIT). The editors on the +Lisp Machines were versions of Emacs written entirely in Lisp. The +first two Lisp Machine Emacs, following the hacker tradition of +recursive acronyms, were EINE and ZWEI, which stood for EINE Is Not +Emacs and ZWEI Was EINE Initially. Later ones used a descendant of +ZWEI, named, more prosaically, ZMACS. + + +File: pcl.info, Node: 2-1, Next: 2-2, Prev: Chapter 2, Up: Chapter 2 + +Choosing a Lisp Implementation +============================== + +The first thing you have to do is to choose a Lisp implementation. This +may seem like a strange thing to have to do for folks used to languages +such as Perl, Python, Visual Basic (VB), C#, and Java. The difference +between Common Lisp and these languages is that Common Lisp is defined +by its standard-there is neither a single implementation controlled by a +benevolent dictator, as with Perl and Python, nor a canonical +implementation controlled by a single company, as with VB, C#, and Java. +Anyone who wants to read the standard and implement the language is free +to do so. Furthermore, changes to the standard have to be made in +accordance with a process controlled by the standards body American +National Standards Institute (ANSI). That process is designed to keep +any one entity, such as a single vendor, from being able to arbitrarily +change the standard (1) . Thus, the Common Lisp standard is a contract +between any Common Lisp vendor and Common Lisp programmers. The +contract tells you that if you write a program that uses the features of +the language the way they're described in the standard, you can count on +your program behaving the same in any conforming implementation. + +On the other hand, the standard may not cover everything you may want to +do in your programs-some things were intentionally left unspecified in +order to allow continuing experimentation by implementers in areas where +there wasn't consensus about the best way for the language to support +certain features. So every implementation offers some features above +and beyond what's specified in the standard. Depending on what kind of +programming you're going to be doing, it may make sense to just pick one +implementation that has the extra features you need and use that. On +the other hand, if we're delivering Lisp source to be used by others, +such as libraries, you'll want-as far as possible-to write portable +Common Lisp. For writing code that should be mostly portable but that +needs facilities not defined by the standard, Common Lisp provides a +flexible way to write code "conditionalized" on the features available +in a particular implementation. You'll see an example of this kind of +code in Chapter 15 when we develop a simple library that smoothes over +some differences between how different Lisp implementations deal with +filenames. + +For the moment, however, the most important characteristic of an +implementation is whether it runs on our favorite operating system. The +folks at Franz, makers of Allegro Common Lisp, are making available a +trial version of their product for use with this book that runs on +Linux, Windows, and OS X. Folks looking for an open-source +implementation have several options. SBCL (2) is a high-quality +open-source implementation that compiles to native code and runs on a +wide variety of Unixes, including Linux and OS X. SBCL is derived from +CMUCL (3) , which is a Common Lisp developed at Carnegie Mellon +University, and, like CMUCL, is largely in the public domain, except a +few sections licensed under Berkeley Software Distribution (BSD) style +licenses. CMUCL itself is another fine choice, though SBCL tends to be +easier to install and now supports 21-bit Unicode (4) . For OS X users, +OpenMCL is an excellent choice-it compiles to machine code, supports +threads, and has quite good integration with OS X's Carbon and Cocoa +toolkits. Other open-source and commercial implementations are +available. See Chapter 32 for resources from which you can get more +information. + +All the Lisp code in this book should work in any conforming Common Lisp +implementation unless otherwise noted, and SLIME will smooth out some of +the differences between implementations by providing us with a common +interface for interacting with Lisp. The output shown in this book is +from Allegro running on GNU/Linux; in some cases, other Lisp's may +generate slightly different error messages or debugger output. + + ---------- Footnotes ---------- + + (1) Practically speaking, there's very little likelihood of the +language standard itself being revised-while there are a small handful +of warts that folks might like to clean up, the ANSI process isn't +amenable to opening an existing standard for minor tweaks, and none of +the warts that might be cleaned up actually cause anyone any serious +difficulty. The future of Common Lisp standardization is likely to +proceed via de facto standards, much like the "standardization" of Perl +and Python-as different implementers experiment with application +programming interfaces (APIs) and libraries for doing things not +specified in the language standard, other implementers may adopt them or +people will develop portability libraries to smooth over the differences +between implementations for features not specified in the language +standard. + + (2) Steel Bank Common Lisp + + (3) CMU Common Lisp + + (4) SBCL forked from CMUCL in order to focus on cleaning up the +internals and making it easier to maintain. But the fork has been +amiable; bug fixes tend to propagate between the two projects, and +there's talk that someday they will merge back together. + + +File: pcl.info, Node: 2-2, Next: 2-3, Prev: 2-1, Up: Chapter 2 + +Getting Up and Running with Lisp in a Box +========================================= + +Since the Lisp in a Box packaging is designed to get new Lispers up and +running in a first-rate Lisp development environment with minimum +hassle, all you need to do to get it running is to grab the appropriate +package for your operating system and the preferred Lisp from the Lisp +in a Box Web site listed in Chapter 32 and then follow the installation +instructions. + +Since Lisp in a Box uses Emacs as its editor, you'll need to know at +least a bit about how to use it. Perhaps the best way to get started +with Emacs is to work through its built-in tutorial. To start the +tutorial, select the first item of the Help menu, Emacs tutorial. Or +press the Ctrl key, type h, release the Ctrl key, and then press t. +Most Emacs commands are accessible via such key combinations; because +key combinations are so common, Emacs users have a notation for +describing key combinations that avoids having to constantly write out +combinations such as "Press the Ctrl key, type h, release the Ctrl key, +and then press t." Keys to be pressed together-a so-called key +chord-are written together and separated by a hyphen. Keys, or key +chords, to be pressed in sequence are separated by spaces. In a key +chord, C represents the Ctrl key and M represents the Meta key (also +known as Alt). Thus, we could write the key combination we just +described that starts the tutorial like so: C-h t. + +The tutorial describes other useful commands and the key combinations +that invoke them. Emacs also comes with extensive online documentation +using its own built-in hypertext documentation browser, Info. To read +the manual, type C-h i. The Info system comes with its own tutorial, +accessible simply by pressing h while reading the manual. Finally, +Emacs provides quite a few ways to get help, all bound to key combos +starting with C-h. Typing C-h ? brings up a complete list. Two of the +most useful, besides the tutorial, are C-h k, which lets us type any key +combo and tells us what command it invokes, and C-h w, which lets us +enter the name of a command and tells us what key combination invokes +it. + +The other crucial bit of Emacs terminology, for folks who refuse to work +through the tutorial, is the notion of a ‘buffer’. While working in +Emacs, each file you edit will be represented by a different buffer, +only one of which is "current" at any given time. The current buffer +receives all input-whatever you type and any commands you invoke. +Buffers are also used to represent interactions with programs such as +Common Lisp. Thus, one common action you'll take is to "switch +buffers," which means to make a different buffer the current buffer so +you can edit a particular file or interact with a particular program. +The command ‘switch-to-buffer’, bound to the key combination C-x b, +prompts for the name of a buffer in the area at the bottom of the Emacs +frame. When entering a buffer name, hitting Tab will complete the name +based on the characters typed so far or will show a list of possible +completions. The prompt also suggests a default buffer, which you can +accept just by hitting Return. You can also switch buffers by selecting +a buffer from the Buffers menu. + +In certain contexts, other key combinations may be available for +switching to certain buffers. For instance, when editing Lisp source +files, the key combo C-c C-z switches to the buffer where you interact +with Lisp. + + +File: pcl.info, Node: 2-3, Next: 2-4, Prev: 2-2, Up: Chapter 2 + +Free Your Mind: Interactive Programming +======================================= + +When you start Lisp in a Box, you should see a buffer containing a +prompt that looks like this: + + CL-USER> + +This is the Lisp prompt. Like a Unix or DOS shell prompt, the Lisp +prompt is a place where you can type expressions that will cause things +to happen. However, instead of reading and interpreting a line of shell +commands, Lisp reads Lisp expressions, evaluates them according to the +rules of Lisp, and prints the result. Then it does it again with the +next expression you type. That endless cycle of reading, evaluating, +and printing is why it's called the ‘read-eval-print loop’, or REPL +for short. It's also referred to as the ‘top-level’, the +‘top-level listener’, or the ‘Lisp listener’. + +From within the environment provided by the REPL, you can define and +redefine program elements such as variables, functions, classes, and +methods; evaluate any Lisp expression; load files containing Lisp source +code or compiled code; compile whole files or individual functions; +enter the debugger; step through code; and inspect the state of +individual Lisp objects. + +All those facilities are built into the language, accessible via +functions defined in the language standard. If you had to, you could +build a pretty reasonable programming environment out of just the REPL +and any text editor that knows how to properly indent Lisp code. But +for the true Lisp programming experience, you need an environment, such +as SLIME, that lets you interact with Lisp both via the REPL and while +editing source files. For instance, you don't want to have to cut and +paste a function definition from a source file to the REPL or have to +load a whole file just because you changed one function; your Lisp +environment should let us evaluate or compile both individual +expressions and whole files directly from your editor. + + +File: pcl.info, Node: 2-4, Next: 2-5, Prev: 2-3, Up: Chapter 2 + +Experimenting in the REPL +========================= + +To try the REPL, you need a Lisp expression that can be read, evaluated, +and printed. One of the simplest kinds of Lisp expressions is a number. +At the Lisp prompt, you can type 10 followed by Return and should see +something like this: + + CL-USER> 10 + 10 + +The first 10 is the one you typed. The Lisp reader, the R in REPL, +reads the text "10" and creates a Lisp object representing the number +10. This object is a ‘self-evaluating object’, which means that +when given to the evaluator, the E in REPL, it evaluates to itself. +This value is then given to the printer, which prints the 10 on the line +by itself. While that may seem like a lot of work just to get back to +where you started, things get a bit more interesting when you give Lisp +something meatier to chew on. For instance, you can type (+ 2 3) at the +Lisp prompt. + + CL-USER> (+ 2 3) + 5 + +Anything in parentheses is a list, in this case a list of three +elements, the symbol +, and the numbers 2 and 3. Lisp, in general, +evaluates lists by treating the first element as the name of a function +and the rest of the elements as expressions to be evaluated to yield the +arguments to the function. In this case, the symbol + names a function +that performs addition. 2 and 3 evaluate to themselves and are then +passed to the addition function, which returns 5. The value 5 is passed +to the printer, which prints it. Lisp can evaluate a list expression in +other ways, but we needn't get into them right away. First we have to +write. . . + + +File: pcl.info, Node: 2-5, Next: 2-6, Prev: 2-4, Up: Chapter 2 + +"Hello, World," Lisp Style +========================== + +No programming book is complete without a "hello, world" (1) program. +As it turns out, it's trivially easy to get the REPL to print "hello, +world." + + CL-USER> "hello, world" + "hello, world" + +This works because strings, like numbers, have a literal syntax that's +understood by the Lisp reader and are self-evaluating objects: Lisp +reads the double-quoted string and instantiates a string object in +memory that, when evaluated, evaluates to itself and is then printed in +the same literal syntax. The quotation marks aren't part of the string +object in memory-they're just the syntax that tells the reader to read a +string. The printer puts them back on when it prints the string because +it tries to print objects in the same syntax the reader understands. + +However, this may not really qualify as a "hello, world" ‘program’. +It's more like the "hello, world" ‘value’. + +You can take a step toward a real program by writing some code that as a +side effect prints the string "hello, world" to standard output. Common +Lisp provides a couple ways to emit output, but the most flexible is the +‘FORMAT’ function. ‘FORMAT’ takes a variable number of +arguments, but the only two required arguments are the place to send the +output and a string. You'll see in the next chapter how the string can +contain embedded directives that allow you to interpolate subsequent +arguments into the string, à la ‘printf’ or Python's string-%. As +long as the string doesn't contain an ~, it will be emitted as-is. If +you pass t as its first argument, it sends its output to standard +output. So a ‘FORMAT’ expression that will print "hello, world" +looks like this (2) : + + CL-USER> (format t "hello, world") + hello, world + NIL + +One thing to note about the result of the ‘FORMAT’ expression is the +‘NIL’ on the line after the "hello, world" output. That ‘NIL’ +is the result of evaluating the ‘FORMAT’ expression, printed by the +REPL. (‘NIL’ is Lisp's version of false and/or null. More on that +in Chapter 4.) Unlike the other expressions we've seen so far, a +‘FORMAT’ expression is more interesting for its side effect-printing +to standard output in this case-than for its return value. But every +expression in Lisp evaluates to some result (3) . + +However, it's still arguable whether you've yet written a true +"program." But you're getting there. And you're seeing the bottom-up +style of programming supported by the REPL: you can experiment with +different approaches and build a solution from parts you've already +tested. Now that you have a simple expression that does what you want, +you just need to package it in a function. Functions are one of the +basic program building blocks in Lisp and can be defined with a +‘DEFUN’ expression such as this: + + CL-USER> (defun hello-world () (format t "hello, world")) + HELLO-WORLD + +The ‘hello-world’ after the ‘DEFUN’ is the name of the function. +In Chapter 4 we'll look at exactly what characters can be used in a +name, but for now suffice it to say that lots of characters, such as -, +that are illegal in names in other languages are legal in Common Lisp. +It's standard Lisp style-not to mention more in line with normal English +typography-to form compound names with hyphens, such as +‘hello-world’, rather than with underscores, as in +‘hello_world’, or with inner caps such as ‘helloWorld’. The ()s +after the name delimit the parameter list, which is empty in this case +because the function takes no arguments. The rest is the body of the +function. + +At one level, this expression, like all the others you've seen, is just +another expression to be read, evaluated, and printed by the REPL. The +return value in this case is the name of the function you just defined +(4) . But like the ‘FORMAT’ expression, this expression is more +interesting for the side effects it has than for its return value. +Unlike the ‘FORMAT’ expression, however, the side effects are +invisible: when this expression is evaluated, a new function that takes +no arguments and with the body (format t "hello, world") is created and +given the name HELLO-WORLD. + +Once you've defined the function, you can call it like this: + + CL-USER> (hello-world) + hello, world + NIL + +You can see that the output is just the same as when you evaluated the +‘FORMAT’ expression directly, including the ‘NIL’ value printed +by the REPL. Functions in Common Lisp automatically return the value of +the last expression evaluated. + + ---------- Footnotes ---------- + + (1) The venerable "hello, world" predates even the classic Kernighan +and Ritchie C book that played a big role in its popularization. The +original "hello, world" seems to have come from Brian Kernighan's "A +Tutorial Introduction to the Language B" that was part of the ‘Bell +Laboratories Computing Science Technical Report #8: The Programming +Language B’ published in January 1973. (It's available online at +http://cm.bell-labs.com/cm/cs/who/dmr/bintro.html.) + + (2) These are some other expressions that also print the string +"hello, world": + + (3) Well, as you'll see when I discuss returning multiple values, +it's technically possible to write expressions that evaluate to no +value, but even such expressions are treated as returning NIL when +evaluated in a context that expects a value. + + (4) I'll discuss in Chapter 4 why the name has been converted to all +uppercase. + + +File: pcl.info, Node: 2-6, Next: Chapter 3, Prev: 2-5, Up: Chapter 2 + +Saving Your Work +================ + +You could argue that this is a complete "hello, world" program of sorts. +However, it still has a problem. If you exit Lisp and restart, the +function definition will be gone. Having written such a fine function, +you'll want to save your work. + +Easy enough. You just need to create a file in which to save the +definition. In Emacs you can create a new file by typing C-x C-f and +then, when Emacs prompts you, entering the name of the file you want to +create. It doesn't matter particularly where you put the file. It's +customary to name Common Lisp source files with a .lisp extension, +though some folks use .cl instead. + +Once you've created the file, you can type the definition you previously +entered at the REPL. Some things to note are that after you type the +opening parenthesis and the word ‘DEFUN’, at the bottom of the Emacs +window, SLIME will tell you the arguments expected. The exact form will +depend somewhat on what Common Lisp implementation you're using, but +it'll probably look something like this: + + (defun name varlist &rest body) + +The message will disappear as you start to type each new element but +will reappear each time you enter a space. When you're entering the +definition in the file, you might choose to break the definition across +two lines after the parameter list. If you hit Return and then Tab, +SLIME will automatically indent the second line appropriately, like this +(1) : + + (defun hello-world () + (format t "hello, world")) + +SLIME will also help match up the parentheses-as you type a closing +parenthesis, it will flash the corresponding opening parenthesis. Or +you can just type C-c C-q to invoke the command +‘slime-close-parens-at-point’, which will insert as many closing +parentheses as necessary to match all the currently open parentheses. + +Now you can get this definition into your Lisp environment in several +ways. The easiest is to type C-c C-c with the cursor anywhere in or +immediately after the ‘DEFUN’ form, which runs the command +‘slime-compile-defun’, which in turn sends the definition to Lisp to +be evaluated and compiled. To make sure this is working, you can make +some change to hello-world, recompile it, and then go back to the REPL, +using C-c C-z or C-x b, and call it again. For instance, you could make +it a bit more grammatical. + + (defun hello-world () + (format t "Hello, world!")) + +Next, recompile with C-c C-c and then type C-c C-z to switch to the REPL +to try the new version. + + CL-USER> (hello-world) + Hello, world! + NIL + +You'll also probably want to save the file you've been working on; in +the hello.lisp buffer, type C-x C-s to invoke the Emacs command +‘save-buffer’. + +Now to try reloading this function from the source file, you'll need to +quit Lisp and restart. To quit you can use a SLIME shortcut: at the +REPL, type a comma. At the bottom of the Emacs window, you will be +prompted for a command. Type ‘quit’ (or ‘sayoonara’), and then +hit Enter. This will quit Lisp and close all the buffers created by +SLIME such as the REPL buffer (2) . Now restart SLIME by typing ‘M-x +slime’. + +Just for grins, you can try to invoke ‘hello-world’. + + CL-USER> (hello-world) + +At that point SLIME will pop up a new buffer that starts with something +that looks like this: + + attempt to call `HELLO-WORLD' which is an undefined function. + [Condition of type UNDEFINED-FUNCTION] + + Restarts: + 0: [TRY-AGAIN] Try calling HELLO-WORLD again. + 1: [RETURN-VALUE] Return a value instead of calling HELLO-WORLD. + 2: [USE-VALUE] Try calling a function other than HELLO-WORLD. + 3: [STORE-VALUE] Setf the symbol-function of HELLO-WORLD and call it again. + 4: [ABORT] Abort handling SLIME request. + 5: [ABORT] Abort entirely from this process. + + Backtrace: + 0: (SWANK::DEBUG-IN-EMACS #) + 1: ((FLET SWANK:SWANK-DEBUGGER-HOOK SWANK::DEBUG-IT)) + 2: (SWANK:SWANK-DEBUGGER-HOOK # #) + 3: (ERROR #) + 4: (EVAL (HELLO-WORLD)) + 5: (SWANK::EVAL-REGION "(hello-world) + " T) + +Blammo! What happened? Well, you tried to invoke a function that +doesn't exist. But despite the burst of output, Lisp is actually +handling this situation gracefully. Unlike Java or Python, Common Lisp +doesn't just bail-throwing an exception and unwinding the stack. And it +definitely doesn't dump core just because you tried to invoke a missing +function. Instead Lisp drops you into the debugger. + +While you're in the debugger you still have full access to Lisp, so you +can evaluate expressions to examine the state of our program and maybe +even fix things. For now don't worry about that; just type q to exit +the debugger and get back to the REPL. The debugger buffer will go away, +and the REPL will show this: + + CL-USER> (hello-world) + ; Evaluation aborted + CL-USER> + +There's obviously more that can be done from within the debugger than +just abort-we'll see, for instance, in Chapter 19 how the debugger +integrates with the error handling system. For now, however, the +important thing to know is that you can always get out of it, and back +to the REPL, by typing ‘q’. + +Back at the REPL you can try again. Things blew up because Lisp didn't +know the definition of hello-world. So you need to let Lisp know about +the definition we saved in the file hello.lisp. You have several ways +you could do this. You could switch back to the buffer containing the +file (type C-x b and then enter hello.lisp when prompted) and recompile +the definition as you did before with C-c C-c. Or you can load the +whole file, which would be a more convenient approach if the file +contained a bunch of definitions, using the LOAD function at the REPL +like this: + + CL-USER> (load "hello.lisp") + ; Loading /home/peter/my-lisp-programs/hello.lisp + T + +The ‘T’ means everything loaded correctly (3) . Loading a file with +LOAD is essentially equivalent to typing each of the expressions in the +file at the REPL in the order they appear in the file, so after the call +to ‘LOAD’, hello-world should be defined: + + CL-USER> (hello-world) + Hello, world! + NIL + +Another way to load a file's worth of definitions is to compile the file +first with ‘COMPILE-FILE’ and then ‘LOAD’ the resulting compiled +file, called a ‘FASL file’, which is short for ‘fast-load file’. +‘COMPILE-FILE’ returns the name of the FASL file, so we can compile +and load from the REPL like this: + + CL-USER> (load (compile-file "hello.lisp")) + ;;; Compiling file hello.lisp + ;;; Writing fasl file hello.fasl + ;;; Fasl write complete + v; Fast loading /home/peter/my-lisp-programs/hello.fasl + T + +SLIME also provides support for loading and compiling files without +using the REPL. When you're in a source code buffer, you can use C-c C-l +to load the file with ‘slime-load-file’. Emacs will prompt for the +name of a file to load with the name of the current file already filled +in; you can just hit Enter. Or you can type C-c C-k to compile and load +the file represented by the current buffer. In some Common Lisp +implementations, compiling code this way will make it quite a bit +faster; in others, it won't, typically because they always compile +everything. + +This should be enough to give you a flavor of how Lisp programming +works. Of course I haven't covered all the tricks and techniques yet, +but you've seen the essential elements-interacting with the REPL trying +things out, loading and testing new code, tweaking and debugging. +Serious Lisp hackers often keep a Lisp image running for days on end, +adding, redefining, and testing bits of their program incrementally. + +Also, even when the Lisp app is deployed, there's often still a way to +get to a REPL. You'll see in Chapter 26 how you can use the REPL and +SLIME to interact with the Lisp that's running a Web server at the same +time as it's serving up Web pages. It's even possible to use SLIME to +connect to a Lisp running on a different machine, allowing you-for +instance-to debug a remote server just like a local one. + +An even more impressive instance of remote debugging occurred on NASA's +1998 Deep Space 1 mission. A half year after the space craft launched, +a bit of Lisp code was going to control the spacecraft for two days +while conducting a sequence of experiments. Unfortunately, a subtle +race condition in the code had escaped detection during ground testing +and was already in space. When the bug manifested in the wild-100 +million miles away from Earth-the team was able to diagnose and fix the +running code, allowing the experiments to complete (4) . One of the +programmers described it as follows: + + Debugging a program running on a $100M piece of hardware that is + 100 million miles away is an interesting experience. Having a + read-eval-print loop running on the spacecraft proved invaluable in + finding and fixing the problem. + +You're not quite ready to send any Lisp code into deep space, but in the +next chapter you'll take a crack at writing a program a bit more +interesting than "hello, world." + + ---------- Footnotes ---------- + + (1) You could also have entered the definition as two lines at the +REPL, as the REPL reads whole expressions, not lines. + + (2) SLIME shortcuts aren't part of Common Lisp-they're commands to +SLIME. + + (3) If for some reason the ‘LOAD’ doesn't go cleanly, you'll get +another error and drop back into the debugger. If this happens, the +most likely reason is that Lisp can't find the file, probably because +its idea of the current working directory isn't the same as where the +file is located. In that case, you can quit the debugger by typing q +and then use the SLIME shortcut cd to change Lisp's idea of the current +directory-type a comma and then cd when prompted for a command and then +the name of the directory where hello.lisp was saved. + + (4) http://www.flownet.com/gat/jpl-lisp.html + + +File: pcl.info, Node: Chapter 3, Next: Chapter 4, Prev: Chapter 2, Up: Top + +3. Practical: A Simple Database +=============================== + +Obviously, before you can start building real software in Lisp, you'll +have to learn the language. But let's face it-you may be thinking, +"'Practical Common Lisp,' isn't that an oxymoron? Why should you be +expected to bother learning all the details of a language unless it's +actually good for something you care about?" So I'll start by giving +you a small example of what you can do with Common Lisp. In this +chapter you'll write a simple database for keeping track of CDs. You'll +use similar techniques in Chapter 27 when you build a database of MP3s +for our streaming MP3 server. In fact, you could think of this as part +of the MP3 software project-after all, in order to have a bunch of MP3s +to listen to, it might be helpful to be able to keep track of which CDs +you have and which ones you need to rip. + + In this chapter, I'll cover just enough Lisp as we go along for you +to understand how the code works. But I'll gloss over quite a few +details. For now you needn't sweat the small stuff-the next several +chapters will cover all the Common Lisp constructs used here, and more, +in a much more systematic way. + + One terminology note: I'll discuss a handful of Lisp operators in +this chapter. In Chapter 4, you'll learn that Common Lisp provides +three distinct kinds of operators: functions, macros, and special +operators. For the purposes of this chapter, you don't really need to +know the difference. I will, however, refer to different operators as +functions or macros or special operators as appropriate, rather than +trying to hide the details behind the word operator. For now you can +treat function, macro, and special operator as all more or less +equivalent. (1) + + Also, keep in mind that I won't bust out all the most sophisticated +Common Lisp techniques for your very first post-"hello, world" program. +The point of this chapter isn't that this is how you would write a +database in Lisp; rather, the point is for you to get an idea of what +programming in Lisp is like and to see how even a relatively simple Lisp +program can be quite featureful. + +* Menu: + +* 3-1:: CDs and Records +* 3-2:: Filing CDs +* 3-3:: Looking at the Database Contents +* 3-4:: Improving the User Interaction +* 3-5:: Saving and Loading the Database +* 3-6:: Querying the Database +* 3-7:: Updating Existing Records-Another Use for WHERE +* 3-8:: Removing Duplication and Winning Big +* 3-9:: Wrapping Up + + ---------- Footnotes ---------- + + (1) Before I proceed, however, it's crucially important that you +forget anything you may know about #define-style "macros" as implemented +in the C pre-processor. Lisp macros are a totally different beast. + + +File: pcl.info, Node: 3-1, Next: 3-2, Prev: Chapter 3, Up: Chapter 3 + +CDs and Records +=============== + +To keep track of CDs that need to be ripped to MP3s and which CDs should +be ripped first, each record in the database will contain the title and +artist of the CD, a rating of how much the user likes it, and a flag +saying whether it has been ripped. So, to start with, you'll need a way +to represent a single database record (in other words, one CD). Common +Lisp gives you lots of choices of data structures from a simple +four-item list to a user-defined class, using the Common Lisp Object +System (CLOS). + + For now you can stay at the simple end of the spectrum and use a +list. You can make a list with the LIST function, which, appropriately +enough, returns a list of its arguments. + + CL-USER> (list 1 2 3) + (1 2 3) + + You could use a four-item list, mapping a given position in the list +to a given field in the record. However, another flavor of list-called +a property list, or plist for short-is even more convenient. A plist is +a list where every other element, starting with the first, is a symbol +that describes what the next element in the list is. I won't get into +all the details of exactly what a symbol is right now; basically it's a +name. For the symbols that name the fields in the CD database, you can +use a particular kind of symbol, called a keyword symbol. A keyword is +any name that starts with a colon (:), for instance, :foo. Here's an +example of a plist using the keyword symbols :a, :b, and :c as property +names: + + CL-USER> (list :a 1 :b 2 :c 3) + (:A 1 :B 2 :C 3) + + Note that you can create a property list with the same LIST function +as you use to create other lists; it's the contents that make it a +plist. + + The thing that makes plists a convenient way to represent the records +in a database is the function GETF, which takes a plist and a symbol and +returns the value in the plist following the symbol, making a plist a +sort of poor man's hash table. Lisp has real hash tables too, but +plists are sufficient for your needs here and can more easily be saved +to a file, which will come in handy later. + + CL-USER> (getf (list :a 1 :b 2 :c 3) :a) + 1 + CL-USER> (getf (list :a 1 :b 2 :c 3) :c) + 3 + + Given all that, you can easily enough write a function make-cd that +will take the four fields as arguments and return a plist representing +that CD. + + (defun make-cd (title artist rating ripped) + (list :title title :artist artist :rating rating :ripped ripped)) + + The word DEFUN tells us that this form is defining a new function. +The name of the function is make-cd. After the name comes the parameter +list. This function has four parameters: title, artist, rating, and +ripped. Everything after the parameter list is the body of the +function. In this case the body is just one form, a call to LIST. When +make-cd is called, the arguments passed to the call will be bound to the +variables in the parameter list. For instance, to make a record for the +CD Roses by Kathy Mattea, you might call make-cd like this: + + CL-USER> (make-cd "Roses" "Kathy Mattea" 7 t) + (:TITLE "Roses" :ARTIST "Kathy Mattea" :RATING 7 :RIPPED T) + + +File: pcl.info, Node: 3-2, Next: 3-3, Prev: 3-1, Up: Chapter 3 + +Filing CDs +========== + +A single record, however, does not a database make. You need some +larger construct to hold the records. Again, for simplicity's sake, a +list seems like a good choice. Also for simplicity you can use a global +variable, *db*, which you can define with the DEFVAR macro. The +asterisks (*) in the name are a Lisp naming convention for global +variables. (1) + + (defvar *db* nil) + + You can use the PUSH macro to add items to *db*. But it's probably a +good idea to abstract things a tiny bit, so you should define a function +add-record that adds a record to the database. + + (defun add-record (cd) (push cd *db*)) + + Now you can use add-record and make-cd together to add CDs to the +database. + + CL-USER> (add-record (make-cd "Roses" "Kathy Mattea" 7 t)) + ((:TITLE "Roses" :ARTIST "Kathy Mattea" :RATING 7 :RIPPED T)) + CL-USER> (add-record (make-cd "Fly" "Dixie Chicks" 8 t)) + ((:TITLE "Fly" :ARTIST "Dixie Chicks" :RATING 8 :RIPPED T) + (:TITLE "Roses" :ARTIST "Kathy Mattea" :RATING 7 :RIPPED T)) + CL-USER> (add-record (make-cd "Home" "Dixie Chicks" 9 t)) + ((:TITLE "Home" :ARTIST "Dixie Chicks" :RATING 9 :RIPPED T) + (:TITLE "Fly" :ARTIST "Dixie Chicks" :RATING 8 :RIPPED T) + (:TITLE "Roses" :ARTIST "Kathy Mattea" :RATING 7 :RIPPED T)) + + The stuff printed by the REPL after each call to add-record is the +return value, which is the value returned by the last expression in the +function body, the PUSH. And PUSH returns the new value of the variable +it's modifying. So what you're actually seeing is the value of the +database after the record has been added. + + ---------- Footnotes ---------- + + (1) Using a global variable also has some drawbacks-for instance, you +can have only one database at a time. In Chapter 27, with more of the +language under your belt, you'll be ready to build a more flexible +database. You'll also see, in Chapter 6, how even using a global +variable is more flexible in Common Lisp than it may be in other +languages. + + +File: pcl.info, Node: 3-3, Next: 3-4, Prev: 3-2, Up: Chapter 3 + +Looking at the Database Contents +================================ + +You can also see the current value of *db* whenever you want by typing +*db* at the REPL. + + CL-USER> *db* + ((:TITLE "Home" :ARTIST "Dixie Chicks" :RATING 9 :RIPPED T) + (:TITLE "Fly" :ARTIST "Dixie Chicks" :RATING 8 :RIPPED T) + (:TITLE "Roses" :ARTIST "Kathy Mattea" :RATING 7 :RIPPED T)) + + However, that's not a very satisfying way of looking at the output. +You can write a dump-db function that dumps out the database in a more +human-readable format, like this: + + TITLE: Home + ARTIST: Dixie Chicks + RATING: 9 + RIPPED: T + + TITLE: Fly + ARTIST: Dixie Chicks + RATING: 8 + RIPPED: T + + TITLE: Roses + ARTIST: Kathy Mattea + RATING: 7 + RIPPED: T + + The function looks like this: + + (defun dump-db () + (dolist (cd *db*) + (format t "~{~a:~10t~a~%~}~%" cd))) + + This function works by looping over all the elements of *db* with the +DOLIST macro, binding each element to the variable cd in turn. For each +value of cd, you use the FORMAT function to print it. + + Admittedly, the FORMAT call is a little cryptic. However, FORMAT +isn't particularly more complicated than C or Perl's printf function or +Python's string-% operator. In Chapter 18 I'll discuss FORMAT in +greater detail. For now we can take this call bit by bit. As you saw +in Chapter 2, FORMAT takes at least two arguments, the first being the +stream where it sends its output; t is shorthand for the stream +*standard-output*. + + The second argument to FORMAT is a format string that can contain +both literal text and directives telling FORMAT things such as how to +interpolate the rest of its arguments. Format directives start with ~ +(much the way printf's directives start with %). FORMAT understands +dozens of directives, each with their own set of options. (1) However, +for now I'll just focus on the ones you need to write dump-db. + + The ~a directive is the aesthetic directive; it means to consume one +argument and output it in a human-readable form. This will render +keywords without the leading : and strings without quotation marks. For +instance: + + CL-USER> (format t "~a" "Dixie Chicks") + Dixie Chicks + NIL + + or: + + CL-USER> (format t "~a" :title) + TITLE + NIL + + The ~t directive is for tabulating. The ~10t tells FORMAT to emit +enough spaces to move to the tenth column before processing the next ~a. +A ~t doesn't consume any arguments. + + CL-USER> (format t "~a:~10t~a" :artist "Dixie Chicks") + ARTIST: Dixie Chicks + NIL + + Now things get slightly more complicated. When FORMAT sees ~{ the +next argument to be consumed must be a list. FORMAT loops over that +list, processing the directives between the ~{ and ~}, consuming as many +elements of the list as needed each time through the list. In dump-db, +the FORMAT loop will consume one keyword and one value from the list +each time through the loop. The ~% directive doesn't consume any +arguments but tells FORMAT to emit a newline. Then after the ~} ends +the loop, the last ~% tells FORMAT to emit one more newline to put a +blank line between each CD. + + Technically, you could have also used FORMAT to loop over the +database itself, turning our dump-db function into a one-liner. + + (defun dump-db () + (format t "~{~{~a:~10t~a~%~}~%~}" *db*)) + + That's either very cool or very scary depending on your point of +view. + + ---------- Footnotes ---------- + + (1) One of the coolest FORMAT directives is the ~R directive. Ever +want to know how to say a really big number in English words? Lisp +knows. Evaluate this: + + (format nil "~r" 1606938044258990275541962092) + + and you should get back (wrapped for legibility): + + +File: pcl.info, Node: 3-4, Next: 3-5, Prev: 3-3, Up: Chapter 3 + +Improving the User Interaction +============================== + +While our add-record function works fine for adding records, it's a bit +Lispy for the casual user. And if they want to add a bunch of records, +it's not very convenient. So you may want to write a function to prompt +the user for information about a set of CDs. Right away you know you'll +need some way to prompt the user for a piece of information and read it. +So let's write that. + + (defun prompt-read (prompt) + (format *query-io* "~a: " prompt) + (force-output *query-io*) + (read-line *query-io*)) + + You use your old friend FORMAT to emit a prompt. Note that there's +no ~% in the format string, so the cursor will stay on the same line. +The call to FORCE-OUTPUT is necessary in some implementations to ensure +that Lisp doesn't wait for a newline before it prints the prompt. + + Then you can read a single line of text with the aptly named +READ-LINE function. The variable *query-io* is a global variable (which +you can tell because of the * naming convention for global variables) +that contains the input stream connected to the terminal. The return +value of prompt-read will be the value of the last form, the call to +READ-LINE, which returns the string it read (without the trailing +newline.) + + You can combine your existing make-cd function with prompt-read to +build a function that makes a new CD record from data it gets by +prompting for each value in turn. + + (defun prompt-for-cd () + (make-cd + (prompt-read "Title") + (prompt-read "Artist") + (prompt-read "Rating") + (prompt-read "Ripped [y/n]"))) + + That's almost right. Except prompt-read returns a string, which, +while fine for the Title and Artist fields, isn't so great for the +Rating and Ripped fields, which should be a number and a boolean. +Depending on how sophisticated a user interface you want, you can go to +arbitrary lengths to validate the data the user enters. For now let's +lean toward the quick and dirty: you can wrap the prompt-read for the +rating in a call to Lisp's PARSE-INTEGER function, like this: + + (parse-integer (prompt-read "Rating")) + + Unfortunately, the default behavior of PARSE-INTEGER is to signal an +error if it can't parse an integer out of the string or if there's any +non-numeric junk in the string. However, it takes an optional keyword +argument :junk-allowed, which tells it to relax a bit. + + (parse-integer (prompt-read "Rating") :junk-allowed t) + + But there's still one problem: if it can't find an integer amidst all +the junk, PARSE-INTEGER will return NIL rather than a number. In +keeping with the quick-and-dirty approach, you may just want to call +that 0 and continue. Lisp's OR macro is just the thing you need here. +It's similar to the "short-circuiting" || in Perl, Python, Java, and C; +it takes a series of expressions, evaluates them one at a time, and +returns the first non-nil value (or NIL if they're all NIL). So you can +use the following: + + (or (parse-integer (prompt-read "Rating") :junk-allowed t) 0) + + to get a default value of 0. + + Fixing the code to prompt for Ripped is quite a bit simpler. You can +just use the Common Lisp function Y-OR-N-P. + + (y-or-n-p "Ripped [y/n]: ") + + In fact, this will be the most robust part of prompt-for-cd, as +Y-OR-N-P will reprompt the user if they enter something that doesn't +start with y, Y, n, or N. + + Putting those pieces together you get a reasonably robust +prompt-for-cd function. + + (defun prompt-for-cd () + (make-cd + (prompt-read "Title") + (prompt-read "Artist") + (or (parse-integer (prompt-read "Rating") :junk-allowed t) 0) + (y-or-n-p "Ripped [y/n]: "))) + + Finally, you can finish the "add a bunch of CDs" interface by +wrapping prompt-for-cd in a function that loops until the user is done. +You can use the simple form of the LOOP macro, which repeatedly executes +a body of expressions until it's exited by a call to RETURN. For +example: + + (defun add-cds () + (loop (add-record (prompt-for-cd)) + (if (not (y-or-n-p "Another? [y/n]: ")) (return)))) + + Now you can use add-cds to add some more CDs to the database. + + CL-USER> (add-cds) + Title: Rockin' the Suburbs + Artist: Ben Folds + Rating: 6 + Ripped [y/n]: y + Another? [y/n]: y + Title: Give Us a Break + Artist: Limpopo + Rating: 10 + Ripped [y/n]: y + Another? [y/n]: y + Title: Lyle Lovett + Artist: Lyle Lovett + Rating: 9 + Ripped [y/n]: y + Another? [y/n]: n + NIL + + +File: pcl.info, Node: 3-5, Next: 3-6, Prev: 3-4, Up: Chapter 3 + +Saving and Loading the Database +=============================== + +Having a convenient way to add records to the database is nice. But +it's not so nice that the user is going to be very happy if they have to +reenter all the records every time they quit and restart Lisp. Luckily, +with the data structures you're using to represent the data, it's +trivially easy to save the data to a file and reload it later. Here's a +save-db function that takes a filename as an argument and saves the +current state of the database: + + (defun save-db (filename) + (with-open-file (out filename + :direction :output + :if-exists :supersede) + (with-standard-io-syntax + (print *db* out)))) + + The WITH-OPEN-FILE macro opens a file, binds the stream to a +variable, executes a set of expressions, and then closes the file. It +also makes sure the file is closed even if something goes wrong while +evaluating the body. The list directly after WITH-OPEN-FILE isn't a +function call but rather part of the syntax defined by WITH-OPEN-FILE. +It contains the name of the variable that will hold the file stream to +which you'll write within the body of WITH-OPEN-FILE, a value that must +be a file name, and then some options that control how the file is +opened. Here you specify that you're opening the file for writing with +:direction :output and that you want to overwrite an existing file of +the same name if it exists with :if-exists :supersede. + + Once you have the file open, all you have to do is print the contents +of the database with (print *db* out). Unlike FORMAT, PRINT prints Lisp +objects in a form that can be read back in by the Lisp reader. The +macro WITH-STANDARD-IO-SYNTAX ensures that certain variables that affect +the behavior of PRINT are set to their standard values. You'll use the +same macro when you read the data back in to make sure the Lisp reader +and printer are operating compatibly. + + The argument to save-db should be a string containing the name of the +file where the user wants to save the database. The exact form of the +string will depend on what operating system they're using. For +instance, on a Unix box they should be able to call save-db like this: + + CL-USER> (save-db "~/my-cds.db") + ((:TITLE "Lyle Lovett" :ARTIST "Lyle Lovett" :RATING 9 :RIPPED T) + (:TITLE "Give Us a Break" :ARTIST "Limpopo" :RATING 10 :RIPPED T) + (:TITLE "Rockin' the Suburbs" :ARTIST "Ben Folds" :RATING 6 :RIPPED + T) + (:TITLE "Home" :ARTIST "Dixie Chicks" :RATING 9 :RIPPED T) + (:TITLE "Fly" :ARTIST "Dixie Chicks" :RATING 8 :RIPPED T) + (:TITLE "Roses" :ARTIST "Kathy Mattea" :RATING 9 :RIPPED T)) + + On Windows, the filename might be something like "c:/my-cds.db" or +"c:\\my-cds.db." (1) + + You can open this file in any text editor to see what it looks like. +You should see something a lot like what the REPL prints if you type +*db*. + + The function to load the database back in is similar. + + (defun load-db (filename) + (with-open-file (in filename) + (with-standard-io-syntax + (setf *db* (read in))))) + + This time you don't need to specify :direction in the options to +WITH-OPEN-FILE, since you want the default of :input. And instead of +printing, you use the function READ to read from the stream in. This is +the same reader used by the REPL and can read any Lisp expression you +could type at the REPL prompt. However, in this case, you're just +reading and saving the expression, not evaluating it. Again, the +WITH-STANDARD-IO-SYNTAX macro ensures that READ is using the same basic +syntax that save-db did when it PRINTed the data. + + The SETF macro is Common Lisp's main assignment operator. It sets +its first argument to the result of evaluating its second argument. So +in load-db the *db* variable will contain the object read from the file, +namely, the list of lists written by save-db. You do need to be careful +about one thing-load-db clobbers whatever was in *db* before the call. +So if you've added records with add-record or add-cds that haven't been +saved with save-db, you'll lose them. + + ---------- Footnotes ---------- + + (1) Windows actually understands forward slashes in filenames even +though it normally uses a backslash as the directory separator. This is +convenient since otherwise you have to write double backslashes because +backslash is the escape character in Lisp strings. + + +File: pcl.info, Node: 3-6, Next: 3-7, Prev: 3-5, Up: Chapter 3 + +Querying the Database +===================== + +Now that you have a way to save and reload the database to go along with +a convenient user interface for adding new records, you soon may have +enough records that you won't want to be dumping out the whole database +just to look at what's in it. What you need is a way to query the +database. You might like, for instance, to be able to write something +like this: + + (select :artist "Dixie Chicks") + + and get a list of all the records where the artist is the Dixie +Chicks. Again, it turns out that the choice of saving the records in a +list will pay off. + + The function REMOVE-IF-NOT takes a predicate and a list and returns a +list containing only the elements of the original list that match the +predicate. In other words, it has removed all the elements that don't +match the predicate. However, REMOVE-IF-NOT doesn't really remove +anything-it creates a new list, leaving the original list untouched. +It's like running grep over a file. The predicate argument can be any +function that accepts a single argument and returns a boolean value-NIL +for false and anything else for true. + + For instance, if you wanted to extract all the even elements from a +list of numbers, you could use REMOVE-IF-NOT as follows: + + CL-USER> (remove-if-not #'evenp '(1 2 3 4 5 6 7 8 9 10)) + (2 4 6 8 10) + + In this case, the predicate is the function EVENP, which returns true +if its argument is an even number. The funny notation #' is shorthand +for "Get me the function with the following name." Without the #', Lisp +would treat evenp as the name of a variable and look up the value of the +variable, not the function. + + You can also pass REMOVE-IF-NOT an anonymous function. For instance, +if EVENP didn't exist, you could write the previous expression as the +following: + + CL-USER> (remove-if-not #'(lambda (x) (= 0 (mod x 2))) '(1 2 3 4 5 6 7 8 9 10)) + (2 4 6 8 10) + + In this case, the predicate is this anonymous function: + + (lambda (x) (= 0 (mod x 2))) + + which checks that its argument is equal to 0 modulus 2 (in other +words, is even). If you wanted to extract only the odd numbers using an +anonymous function, you'd write this: + + CL-USER> (remove-if-not #'(lambda (x) (= 1 (mod x 2))) '(1 2 3 4 5 6 7 8 9 10)) + (1 3 5 7 9) + + Note that lambda isn't the name of the function-it's the indicator +you're defining an anonymous function. (1) Other than the lack of a +name, however, a LAMBDA expression looks a lot like a DEFUN: the word +lambda is followed by a parameter list, which is followed by the body of +the function. + + To select all the Dixie Chicks' albums in the database using +REMOVE-IF-NOT, you need a function that returns true when the artist +field of a record is "Dixie Chicks". Remember that we chose the plist +representation for the database records because the function GETF can +extract named fields from a plist. So assuming cd is the name of a +variable holding a single database record, you can use the expression +(getf cd :artist) to extract the name of the artist. The function +EQUAL, when given string arguments, compares them character by +character. So (equal (getf cd :artist) "Dixie Chicks") will test +whether the artist field of a given CD is equal to "Dixie Chicks". All +you need to do is wrap that expression in a LAMBDA form to make an +anonymous function and pass it to REMOVE-IF-NOT. + + CL-USER> (remove-if-not + #'(lambda (cd) (equal (getf cd :artist) "Dixie Chicks")) *db*) + ((:TITLE "Home" :ARTIST "Dixie Chicks" :RATING 9 :RIPPED T) + (:TITLE "Fly" :ARTIST "Dixie Chicks" :RATING 8 :RIPPED T)) + + Now suppose you want to wrap that whole expression in a function that +takes the name of the artist as an argument. You can write that like +this: + + (defun select-by-artist (artist) + (remove-if-not + #'(lambda (cd) (equal (getf cd :artist) artist)) + *db*)) + + Note how the anonymous function, which contains code that won't run +until it's invoked in REMOVE-IF-NOT, can nonetheless refer to the +variable artist. In this case the anonymous function doesn't just save +you from having to write a regular function-it lets you write a function +that derives part of its meaning-the value of artist-from the context in +which it's embedded. + + So that's select-by-artist. However, selecting by artist is only one +of the kinds of queries you might like to support. You could write +several more functions, such as select-by-title, select-by-rating, +select-by-title-and-artist, and so on. But they'd all be about the same +except for the contents of the anonymous function. You can instead make +a more general select function that takes a function as an argument. + + (defun select (selector-fn) + (remove-if-not selector-fn *db*)) + + So what happened to the #'? Well, in this case you don't want +REMOVE-IF-NOT to use the function named selector-fn. You want it to use +the anonymous function that was passed as an argument to select in the +variable selector-fn. Though, the #' comes back in the call to select. + + CL-USER> (select #'(lambda (cd) (equal (getf cd :artist) "Dixie Chicks"))) + ((:TITLE "Home" :ARTIST "Dixie Chicks" :RATING 9 :RIPPED T) + (:TITLE "Fly" :ARTIST "Dixie Chicks" :RATING 8 :RIPPED T)) + + But that's really quite gross-looking. Luckily, you can wrap up the +creation of the anonymous function. + + (defun artist-selector (artist) + #'(lambda (cd) (equal (getf cd :artist) artist))) + + This is a function that returns a function and one that references a +variable that-it seems-won't exist after artist-selector returns. (2) +It may seem odd now, but it actually works just the way you'd want-if +you call artist-selector with an argument of "Dixie Chicks", you get an +anonymous function that matches CDs whose :artist field is "Dixie +Chicks", and if you call it with "Lyle Lovett", you get a different +function that will match against an :artist field of "Lyle Lovett". So +now you can rewrite the call to select like this: + + CL-USER> (select (artist-selector "Dixie Chicks")) + ((:TITLE "Home" :ARTIST "Dixie Chicks" :RATING 9 :RIPPED T) + (:TITLE "Fly" :ARTIST "Dixie Chicks" :RATING 8 :RIPPED T)) + + Now you just need some more functions to generate selectors. But +just as you don't want to have to write select-by-title, +select-by-rating, and so on, because they would all be quite similar, +you're not going to want to write a bunch of nearly identical +selector-function generators, one for each field. Why not write one +general-purpose selector-function generator, a function that, depending +on what arguments you pass it, will generate a selector function for +different fields or maybe even a combination of fields? You can write +such a function, but first you need a crash course in a feature called +keyword parameters. + + In the functions you've written so far, you've specified a simple +list of parameters, which are bound to the corresponding arguments in +the call to the function. For instance, the following function: + + (defun foo (a b c) (list a b c)) + + has three parameters, a, b, and c, and must be called with three +arguments. But sometimes you may want to write a function that can be +called with varying numbers of arguments. Keyword parameters are one +way to achieve this. A version of foo that uses keyword parameters +might look like this: + + (defun foo (&key a b c) (list a b c)) + + The only difference is the &key at the beginning of the argument +list. However, the calls to this new foo will look quite different. +These are all legal calls with the result to the right of the ==>: + + (foo :a 1 :b 2 :c 3) ==> (1 2 3) + (foo :c 3 :b 2 :a 1) ==> (1 2 3) + (foo :a 1 :c 3) ==> (1 NIL 3) + (foo) ==> (NIL NIL NIL) + + As these examples show, the value of the variables a, b, and c are +bound to the values that follow the corresponding keyword. And if a +particular keyword isn't present in the call, the corresponding variable +is set to NIL. I'm glossing over a bunch of details of how keyword +parameters are specified and how they relate to other kinds of +parameters, but you need to know one more detail. + + Normally if a function is called with no argument for a particular +keyword parameter, the parameter will have the value NIL. However, +sometimes you'll want to be able to distinguish between a NIL that was +explicitly passed as the argument to a keyword parameter and the default +value NIL. To allow this, when you specify a keyword parameter you can +replace the simple name with a list consisting of the name of the +parameter, a default value, and another parameter name, called a +supplied-p parameter. The supplied-p parameter will be set to true or +false depending on whether an argument was actually passed for that +keyword parameter in a particular call to the function. Here's a +version of foo that uses this feature: + + (defun foo (&key a (b 20) (c 30 c-p)) (list a b c c-p)) + + Now the same calls from earlier yield these results: + + (foo :a 1 :b 2 :c 3) ==> (1 2 3 T) + (foo :c 3 :b 2 :a 1) ==> (1 2 3 T) + (foo :a 1 :c 3) ==> (1 20 3 T) + (foo) ==> (NIL 20 30 NIL) + + The general selector-function generator, which you can call where for +reasons that will soon become apparent if you're familiar with SQL +databases, is a function that takes four keyword parameters +corresponding to the fields in our CD records and generates a selector +function that selects any CDs that match all the values given to where. +For instance, it will let you say things like this: + + (select (where :artist "Dixie Chicks")) + + or this: + + (select (where :rating 10 :ripped nil)) + + The function looks like this: + + (defun where (&key title artist rating (ripped nil ripped-p)) + #'(lambda (cd) + (and + (if title (equal (getf cd :title) title) t) + (if artist (equal (getf cd :artist) artist) t) + (if rating (equal (getf cd :rating) rating) t) + (if ripped-p (equal (getf cd :ripped) ripped) t)))) + + This function returns an anonymous function that returns the logical +AND of one clause per field in our CD records. Each clause checks if +the appropriate argument was passed in and then either compares it to +the value in the corresponding field in the CD record or returns t, +Lisp's version of truth, if the parameter wasn't passed in. Thus, the +selector function will return t only for CDs that match all the +arguments passed to where. (3) Note that you need to use a three-item +list to specify the keyword parameter ripped because you need to know +whether the caller actually passed :ripped nil, meaning, "Select CDs +whose ripped field is nil," or whether they left out :ripped altogether, +meaning "I don't care what the value of the ripped field is." + + ---------- Footnotes ---------- + + (1) The word lambda is used in Lisp because of an early connection to +the lambda calculus, a mathematical formalism invented for studying +mathematical functions. + + (2) The technical term for a function that references a variable in +its enclosing scope is a closure because the function "closes over" the +variable. I'll discuss closures in more detail in Chapter 6. + + (3) Note that in Lisp, an IF form, like everything else, is an +expression that returns a value. It's actually more like the ternary +operator (?:) in Perl, Java, and C in that this is legal in those +languages: + + some_var = some_boolean ? value1 : value2; + + while this isn't: + + some_var = if (some_boolean) value1; else value2; + + because in those languages, if is a statement, not an expression. + + +File: pcl.info, Node: 3-7, Next: 3-8, Prev: 3-6, Up: Chapter 3 + +Updating Existing Records-Another Use for WHERE +=============================================== + +Now that you've got nice generalized select and where functions, you're +in a good position to write the next feature that every database needs-a +way to update particular records. In SQL the update command is used to +update a set of records matching a particular where clause. That seems +like a good model, especially since you've already got a where-clause +generator. In fact, the update function is mostly just the application +of a few ideas you've already seen: using a passed-in selector function +to choose the records to update and using keyword arguments to specify +the values to change. The main new bit is the use of a function MAPCAR +that maps over a list, *db* in this case, and returns a new list +containing the results of calling a function on each item in the +original list. + + (defun update (selector-fn &key title artist rating (ripped nil ripped-p)) + (setf *db* + (mapcar + #'(lambda (row) + (when (funcall selector-fn row) + (if title (setf (getf row :title) title)) + (if artist (setf (getf row :artist) artist)) + (if rating (setf (getf row :rating) rating)) + (if ripped-p (setf (getf row :ripped) ripped))) + row) *db*))) + + One other new bit here is the use of SETF on a complex form such as +(getf row :title). I'll discuss SETF in greater detail in Chapter 6, +but for now you just need to know that it's a general assignment +operator that can be used to assign lots of "places" other than just +variables. (It's a coincidence that SETF and GETF have such similar +names-they don't have any special relationship.) For now it's enough to +know that after (setf (getf row :title) title), the plist referenced by +row will have the value of the variable title following the property +name :title. With this update function if you decide that you really +dig the Dixie Chicks and that all their albums should go to 11, you can +evaluate the following form: + + CL-USER> (update (where :artist "Dixie Chicks") :rating 11) + NIL + + And it is so. + + CL-USER> (select (where :artist "Dixie Chicks")) + ((:TITLE "Home" :ARTIST "Dixie Chicks" :RATING 11 :RIPPED T) + (:TITLE "Fly" :ARTIST "Dixie Chicks" :RATING 11 :RIPPED T)) + + You can even more easily add a function to delete rows from the +database. + + (defun delete-rows (selector-fn) + (setf *db* (remove-if selector-fn *db*))) + + The function REMOVE-IF is the complement of REMOVE-IF-NOT; it returns +a list with all the elements that do match the predicate removed. Like +REMOVE-IF-NOT, it doesn't actually affect the list it's passed but by +saving the result back into *db*, delete-rows (1) actually changes the +contents of the database. (2) + + ---------- Footnotes ---------- + + (1) You need to use the name delete-rows rather than the more obvious +delete because there's already a function in Common Lisp called DELETE. +The Lisp package system gives you a way to deal with such naming +conflicts, so you could have a function named delete if you wanted. But +I'm not ready to explain packages just yet. + + (2) If you're worried that this code creates a memory leak, rest +assured: Lisp was the language that invented garbage collection (and +heap allocation for that matter). The memory used by the old value of +*db* will be automatically reclaimed, assuming no one else is holding on +to a reference to it, which none of this code is. + + +File: pcl.info, Node: 3-8, Next: 3-9, Prev: 3-7, Up: Chapter 3 + +Removing Duplication and Winning Big +==================================== + +So far all the database code supporting insert, select, update, and +delete, not to mention a command-line user interface for adding new +records and dumping out the contents, is just a little more than 50 +lines. Total. (1) + + Yet there's still some annoying code duplication. And it turns out +you can remove the duplication and make the code more flexible at the +same time. The duplication I'm thinking of is in the where function. +The body of the where function is a bunch of clauses like this, one per +field: + + (if title (equal (getf cd :title) title) t) + + Right now it's not so bad, but like all code duplication it has the +same cost: if you want to change how it works, you have to change +multiple copies. And if you change the fields in a CD, you'll have to +add or remove clauses to where. And update suffers from the same kind +of duplication. It's doubly annoying since the whole point of the where +function is to dynamically generate a bit of code that checks the values +you care about; why should it have to do work at runtime checking +whether title was even passed in? + + Imagine that you were trying to optimize this code and discovered +that it was spending too much time checking whether title and the rest +of the keyword parameters to where were even set? (2) If you really +wanted to remove all those runtime checks, you could go through a +program and find all the places you call where and look at exactly what +arguments you're passing. Then you could replace each call to where +with an anonymous function that does only the computation necessary. +For instance, if you found this snippet of code: + + (select (where :title "Give Us a Break" :ripped t)) + + you could change it to this: + + (select + #'(lambda (cd) + (and (equal (getf cd :title) "Give Us a Break") + (equal (getf cd :ripped) t)))) + + Note that the anonymous function is different from the one that where +would have returned; you're not trying to save the call to where but +rather to provide a more efficient selector function. This anonymous +function has clauses only for the fields that you actually care about at +this call site, so it doesn't do any extra work the way a function +returned by where might. + + You can probably imagine going through all your source code and +fixing up all the calls to where in this way. But you can probably also +imagine that it would be a huge pain. If there were enough of them, and +it was important enough, it might even be worthwhile to write some kind +of preprocessor that converts where calls to the code you'd write by +hand. + + The Lisp feature that makes this trivially easy is its macro system. +I can't emphasize enough that the Common Lisp macro shares essentially +nothing but the name with the text-based macros found in C and C++. +Where the C pre-processor operates by textual substitution and +understands almost nothing of the structure of C and C++, a Lisp macro +is essentially a code generator that gets run for you automatically by +the compiler. (3) When a Lisp expression contains a call to a macro, +instead of evaluating the arguments and passing them to the function, +the Lisp compiler passes the arguments, unevaluated, to the macro code, +which returns a new Lisp expression that is then evaluated in place of +the original macro call. + + I'll start with a simple, and silly, example and then show how you +can replace the where function with a where macro. Before I can write +this example macro, I need to quickly introduce one new function: +REVERSE takes a list as an argument and returns a new list that is its +reverse. So (reverse '(1 2 3)) evaluates to (3 2 1). Now let's create +a macro. + + (defmacro backwards (expr) (reverse expr)) + + The main syntactic difference between a function and a macro is that +you define a macro with DEFMACRO instead of DEFUN. After that a macro +definition consists of a name, just like a function, a parameter list, +and a body of expressions, both also like a function. However, a macro +has a totally different effect. You can use this macro as follows: + + CL-USER> (backwards ("hello, world" t format)) + hello, world + NIL + + How did that work? When the REPL started to evaluate the backwards +expression, it recognized that backwards is the name of a macro. So it +left the expression ("hello, world" t format) unevaluated, which is good +because it isn't a legal Lisp form. It then passed that list to the +backwards code. The code in backwards passed the list to REVERSE, which +returned the list (format t "hello, world"). backwards then passed that +value back out to the REPL, which then evaluated it in place of the +original expression. + + The backwards macro thus defines a new language that's a lot like +Lisp-just backward-that you can drop into anytime simply by wrapping a +backward Lisp expression in a call to the backwards macro. And, in a +compiled Lisp program, that new language is just as efficient as normal +Lisp because all the macro code-the code that generates the new +expression-runs at compile time. In other words, the compiler will +generate exactly the same code whether you write (backwards ("hello, +world" t format)) or (format t "hello, world"). + + So how does that help with the code duplication in where? Well, you +can write a macro that generates exactly the code you need for each +particular call to where. Again, the best approach is to build our code +bottom up. In the hand-optimized selector function, you had an +expression of the following form for each actual field referred to in +the original call to where: + + (equal (getf cd field) value) + + So let's write a function that, given the name of a field and a +value, returns such an expression. Since an expression is just a list, +you might think you could write something like this: + + (defun make-comparison-expr (field value) ; wrong + (list equal (list getf cd field) value)) + + However, there's one trick here: as you know, when Lisp sees a simple +name such as field or value other than as the first element of a list, +it assumes it's the name of a variable and looks up its value. That's +fine for field and value; it's exactly what you want. But it will treat +equal, getf, and cd the same way, which isn't what you want. However, +you also know how to stop Lisp from evaluating a form: stick a single +forward quote (') in front of it. So if you write make-comparison-expr +like this, it will do what you want: + + (defun make-comparison-expr (field value) + (list 'equal (list 'getf 'cd field) value)) + + You can test it out in the REPL. + + CL-USER> (make-comparison-expr :rating 10) + (EQUAL (GETF CD :RATING) 10) + CL-USER> (make-comparison-expr :title "Give Us a Break") + (EQUAL (GETF CD :TITLE) "Give Us a Break") + + It turns out that there's an even better way to do it. What you'd +really like is a way to write an expression that's mostly not evaluated +and then have some way to pick out a few expressions that you do want +evaluated. And, of course, there's just such a mechanism. A back quote +(') before an expression stops evaluation just like a forward quote. + + CL-USER> `(1 2 3) + (1 2 3) + CL-USER> '(1 2 3) + (1 2 3) + + However, in a back-quoted expression, any subexpression that's +preceded by a comma is evaluated. Notice the effect of the comma in the +second expression: + + `(1 2 (+ 1 2)) ==> (1 2 (+ 1 2)) + `(1 2 ,(+ 1 2)) ==> (1 2 3) + + Using a back quote, you can write make-comparison-expr like this: + + (defun make-comparison-expr (field value) + `(equal (getf cd ,field) ,value)) + + Now if you look back to the hand-optimized selector function, you can +see that the body of the function consisted of one comparison expression +per field/value pair, all wrapped in an AND expression. Assume for the +moment that you'll arrange for the arguments to the where macro to be +passed as a single list. You'll need a function that can take the +elements of such a list pairwise and collect the results of calling +make-comparison-expr on each pair. To implement that function, you can +dip into the bag of advanced Lisp tricks and pull out the mighty and +powerful LOOP macro. + + (defun make-comparisons-list (fields) + (loop while fields + collecting (make-comparison-expr (pop fields) (pop fields)))) + + A full discussion of LOOP will have to wait until Chapter 22; for now +just note that this LOOP expression does exactly what you need: it loops +while there are elements left in the fields list, popping off two at a +time, passing them to make-comparison-expr, and collecting the results +to be returned at the end of the loop. The POP macro performs the +inverse operation of the PUSH macro you used to add records to *db*. + + Now you just need to wrap up the list returned by +make-comparison-list in an AND and an anonymous function, which you can +do in the where macro itself. Using a back quote to make a template +that you fill in by interpolating the value of make-comparisons-list, +it's trivial. + + (defmacro where (&rest clauses) + `#'(lambda (cd) (and ,@(make-comparisons-list clauses)))) + + This macro uses a variant of , (namely, the ,@) before the call to +make-comparisons-list. The ,@ "splices" the value of the following +expression-which must evaluate to a list-into the enclosing list. You +can see the difference between , and , in the following two expressions: + + `(and ,(list 1 2 3)) ==> (AND (1 2 3)) + `(and ,@(list 1 2 3)) ==> (AND 1 2 3) + + You can also use , to splice into the middle of a list. + + `(and ,@(list 1 2 3) 4) ==> (AND 1 2 3 4) + + The other important feature of the where macro is the use of &rest in +the argument list. Like &key, &rest modifies the way arguments are +parsed. With a &rest in its parameter list, a function or macro can +take an arbitrary number of arguments, which are collected into a single +list that becomes the value of the variable whose name follows the +&rest. So if you call where like this: + + (where :title "Give Us a Break" :ripped t) + + the variable clauses will contain the list. + + (:title "Give Us a Break" :ripped t) + + This list is passed to make-comparisons-list, which returns a list of +comparison expressions. You can see exactly what code a call to where +will generate using the function MACROEXPAND-1. If you pass +MACROEXPAND-1, a form representing a macro call, it will call the macro +code with appropriate arguments and return the expansion. So you can +check out the previous where call like this: + + CL-USER> (macroexpand-1 '(where :title "Give Us a Break" :ripped t)) + #'(LAMBDA (CD) + (AND (EQUAL (GETF CD :TITLE) "Give Us a Break") + (EQUAL (GETF CD :RIPPED) T))) + T + + Looks good. Let's try it for real. + + CL-USER> (select (where :title "Give Us a Break" :ripped t)) + ((:TITLE "Give Us a Break" :ARTIST "Limpopo" :RATING 10 :RIPPED T)) + + It works. And the where macro with its two helper functions is +actually one line shorter than the old where function. And it's more +general in that it's no longer tied to the specific fields in our CD +records. + + ---------- Footnotes ---------- + + (1) A friend of mine was once interviewing an engineer for a +programming job and asked him a typical interview question: how do you +know when a function or method is too big? Well, said the candidate, I +don't like any method to be bigger than my head. You mean you can't +keep all the details in your head? No, I mean I put my head up against +my monitor, and the code shouldn't be bigger than my head. + + (2) It's unlikely that the cost of checking whether keyword +parameters had been passed would be a detectible drag on performance +since checking whether a variable is NIL is going to be pretty cheap. +On the other hand, these functions returned by where are going to be +right in the middle of the inner loop of any select, update, or +delete-rows call, as they have to be called once per entry in the +database. Anyway, for illustrative purposes, this will have to do. + + (3) Macros are also run by the interpreter-however, it's easier to +understand the point of macros when you think about compiled code. As +with everything else in this chapter, I'll cover this in greater detail +in future chapters. + + +File: pcl.info, Node: 3-9, Next: Chapter 4, Prev: 3-7, Up: Chapter 3 + +Wrapping Up +=========== + +Now, an interesting thing has happened. You removed duplication and +made the code more efficient and more general at the same time. That's +often the way it goes with a well-chosen macro. This makes sense +because a macro is just another mechanism for creating +abstractions-abstraction at the syntactic level, and abstractions are by +definition more concise ways of expressing underlying generalities. Now +the only code in the mini-database that's specific to CDs and the fields +in them is in the make-cd, prompt-for-cd, and add-cd functions. In +fact, our new where macro would work with any plist-based database. + + However, this is still far from being a complete database. You can +probably think of plenty of features to add, such as supporting multiple +tables or more elaborate queries. In Chapter 27 we'll build an MP3 +database that incorporates some of those features. + + The point of this chapter was to give you a quick introduction to +just a handful of Lisp's features and show how they're used to write +code that's a bit more interesting than "hello, world." In the next +chapter we'll begin a more systematic overview of Lisp. + + +File: pcl.info, Node: Chapter 4, Next: Chapter 5, Prev: Chapter 3, Up: Top + +4. Syntax and Semantics +======================= + +After that whirlwind tour, we'll settle down for a few chapters to take +a more systematic look at the features you've used so far. I'll start +with an overview of the basic elements of Lisp's syntax and semantics, +which means, of course, that I must first address that burning question. +. . + +* Menu: + +* 4-1:: What's with All the Parentheses? +* 4-2:: Breaking Open the Black Box +* 4-3:: S-expressions +* 4-4:: S-expressions As Lisp Forms +* 4-5:: Function Calls +* 4-6:: Special Operators +* 4-7:: Macros +* 4-8:: Truth, Falsehood, and Equality +* 4-9:: Formatting Lisp Code + + +File: pcl.info, Node: 4-1, Next: 4-2, Prev: Chapter 4, Up: Chapter 4 + +What's with All the Parentheses? +================================ + +Lisp's syntax is quite a bit different from the syntax of languages +descended from Algol. The two most immediately obvious characteristics +are the extensive use of parentheses and prefix notation. For whatever +reason, a lot of folks are put off by this syntax. Lisp's detractors +tend to describe the syntax as "weird" and "annoying." Lisp, they say, +must stand for Lots of Irritating Superfluous Parentheses. Lisp folks, +on the other hand, tend to consider Lisp's syntax one of its great +virtues. How is it that what's so off-putting to one group is a source +of delight to another? + + I can't really make the complete case for Lisp's syntax until I've +explained Lisp's macros a bit more thoroughly, but I can start with an +historical tidbit that suggests it may be worth keeping an open mind: +when John McCarthy first invented Lisp, he intended to implement a more +Algol-like syntax, which he called M-expressions. However, he never got +around to it. He explained why not in his article "History of Lisp." +(1) + + The project of defining M-expressions precisely and compiling them + or at least translating them into S-expressions was neither + finalized nor explicitly abandoned. It just receded into the + indefinite future, and a new generation of programmers appeared who + preferred [S-expressions] to any FORTRAN-like or ALGOL-like + notation that could be devised. + + In other words, the people who have actually used Lisp over the past +45 years have liked the syntax and have found that it makes the language +more powerful. In the next few chapters, you'll begin to see why. + + ---------- Footnotes ---------- + + (1) http://www-formal.stanford.edu/jmc/history/lisp/node3.html + + +File: pcl.info, Node: 4-2, Next: 4-3, Prev: 4-1, Up: Chapter 4 + +Breaking Open the Black Box +=========================== + +Before we look at the specifics of Lisp's syntax and semantics, it's +worth taking a moment to look at how they're defined and how this +differs from many other languages. + + In most programming languages, the language processor-whether an +interpreter or a compiler-operates as a black box: you shove a sequence +of characters representing the text of a program into the black box, and +it-depending on whether it's an interpreter or a compiler-either +executes the behaviors indicated or produces a compiled version of the +program that will execute the behaviors when it's run. + + Inside the black box, of course, language processors are usually +divided into subsystems that are each responsible for one part of the +task of translating a program text into behavior or object code. A +typical division is to split the processor into three phases, each of +which feeds into the next: a lexical analyzer breaks up the stream of +characters into tokens and feeds them to a parser that builds a tree +representing the expressions in the program, according to the language's +grammar. This tree-called an abstract syntax tree-is then fed to an +evaluator that either interprets it directly or compiles it into some +other language such as machine code. Because the language processor is +a black box, the data structures used by the processor, such as the +tokens and abstract syntax trees, are of interest only to the language +implementer. + + In Common Lisp things are sliced up a bit differently, with +consequences for both the implementer and for how the language is +defined. Instead of a single black box that goes from text to program +behavior in one step, Common Lisp defines two black boxes, one that +translates text into Lisp objects and another that implements the +semantics of the language in terms of those objects. The first box is +called the reader, and the second is called the evaluator. (1) + + Each black box defines one level of syntax. The reader defines how +strings of characters can be translated into Lisp objects called +s-expressions. (2) Since the s-expression syntax includes syntax for +lists of arbitrary objects, including other lists, s-expressions can +represent arbitrary tree expressions, much like the abstract syntax tree +generated by the parsers for non-Lisp languages. + + The evaluator then defines a syntax of Lisp forms that can be built +out of s-expressions. Not all s-expressions are legal Lisp forms any +more than all sequences of characters are legal s-expressions. For +instance, both (foo 1 2) and ("foo" 1 2) are s-expressions, but only the +former can be a Lisp form since a list that starts with a string has no +meaning as a Lisp form. + + This split of the black box has a couple of consequences. One is +that you can use s-expressions, as you saw in Chapter 3, as an +externalizable data format for data other than source code, using READ +to read it and PRINT to print it. (3) The other consequence is that +since the semantics of the language are defined in terms of trees of +objects rather than strings of characters, it's easier to generate code +within the language than it would be if you had to generate code as +text. Generating code completely from scratch is only marginally +easier-building up lists vs. building up strings is about the same +amount of work. The real win, however, is that you can generate code by +manipulating existing data. This is the basis for Lisp's macros, which +I'll discuss in much more detail in future chapters. For now I'll focus +on the two levels of syntax defined by Common Lisp: the syntax of +s-expressions understood by the reader and the syntax of Lisp forms +understood by the evaluator. + + ---------- Footnotes ---------- + + (1) Lisp implementers, like implementers of any language, have many +ways they can implement an evaluator, ranging from a "pure" interpreter +that interprets the objects given to the evaluator directly to a +compiler that translates the objects into machine code that it then +runs. In the middle are implementations that compile the input into an +intermediate form such as bytecodes for a virtual machine and then +interprets the bytecodes. Most Common Lisp implementations these days +use some form of compilation even when evaluating code at run time. + + (2) Sometimes the phrase s-expression refers to the textual +representation and sometimes to the objects that result from reading the +textual representation. Usually either it's clear from context which is +meant or the distinction isn't that important. + + (3) Not all Lisp objects can be written out in a way that can be read +back in. But anything you can READ can be printed back out "readably" +with PRINT. + + +File: pcl.info, Node: 4-3, Next: 4-4, Prev: 4-2, Up: Chapter 4 + +S-expressions +============= + +The basic elements of s-expressions are lists and atoms. Lists are +delimited by parentheses and can contain any number of +whitespace-separated elements. Atoms are everything else. (1) The +elements of lists are themselves s-expressions (in other words, atoms or +nested lists). Comments-which aren't, technically speaking, +s-expressions-start with a semicolon, extend to the end of a line, and +are treated essentially like whitespace. + + And that's pretty much it. Since lists are syntactically so trivial, +the only remaining syntactic rules you need to know are those governing +the form of different kinds of atoms. In this section I'll describe the +rules for the most commonly used kinds of atoms: numbers, strings, and +names. After that, I'll cover how s-expressions composed of these +elements can be evaluated as Lisp forms. + + Numbers are fairly straightforward: any sequence of digits-possibly +prefaced with a sign (+ or -), containing a decimal point (.) or a +solidus (/), or ending with an exponent marker-is read as a number. For +example: + + 123 ; the integer one hundred twenty-three + 3/7 ; the ratio three-sevenths + 1.0 ; the floating-point number one in default precision + 1.0e0 ; another way to write the same floating-point number + 1.0d0 ; the floating-point number one in "double" precision + 1.0e-4 ; the floating-point equivalent to one-ten-thousandth + +42 ; the integer forty-two + -42 ; the integer negative forty-two + -1/4 ; the ratio negative one-quarter + -2/8 ; another way to write negative one-quarter + 246/2 ; another way to write the integer one hundred twenty-three + + These different forms represent different kinds of numbers: integers, +ratios, and floating point. Lisp also supports complex numbers, which +have their own notation and which I'll discuss in Chapter 10. + + As some of these examples suggest, you can notate the same number in +many ways. But regardless of how you write them, all rationals-integers +and ratios-are represented internally in "simplified" form. In other +words, the objects that represent -2/8 or 246/2 aren't distinct from the +objects that represent -1/4 and 123. Similarly, 1.0 and 1.0e0 are just +different ways of writing the same number. On the other hand, 1.0, +1.0d0, and 1 can all denote different objects because the different +floating-point representations and integers are different types. We'll +save the details about the characteristics of different kinds of numbers +for Chapter 10. + + Strings literals, as you saw in the previous chapter, are enclosed in +double quotes. Within a string a backslash (\) escapes the next +character, causing it to be included in the string regardless of what it +is. The only two characters that must be escaped within a string are +double quotes and the backslash itself. All other characters can be +included in a string literal without escaping, regardless of their +meaning outside a string. Some example string literals are as follows: + + "foo" ; the string containing the characters f, o, and o. + "fo\o" ; the same string + "fo\\o" ; the string containing the characters f, o, \, and o. + "fo\"o" ; the string containing the characters f, o, ", and o. + + Names used in Lisp programs, such as FORMAT and hello-world, and *db* +are represented by objects called symbols. The reader knows nothing +about how a given name is going to be used-whether it's the name of a +variable, a function, or something else. It just reads a sequence of +characters and builds an object to represent the name. (2) Almost any +character can appear in a name. Whitespace characters can't, though, +because the elements of lists are separated by whitespace. Digits can +appear in names as long as the name as a whole can't be interpreted as a +number. Similarly, names can contain periods, but the reader can't read +a name that consists only of periods. Ten characters that serve other +syntactic purposes can't appear in names: open and close parentheses, +double and single quotes, backtick, comma, colon, semicolon, backslash, +and vertical bar. And even those characters can, if you're willing to +escape them by preceding the character to be escaped with a backslash or +by surrounding the part of the name containing characters that need +escaping with vertical bars. + + Two important characteristics of the way the reader translates names +to symbol objects have to do with how it treats the case of letters in +names and how it ensures that the same name is always read as the same +symbol. While reading names, the reader converts all unescaped +characters in a name to their uppercase equivalents. Thus, the reader +will read foo, Foo, and FOO as the same symbol: FOO. However, \f\o\o and +|foo| will both be read as foo, which is a different object than the +symbol FOO. This is why when you define a function at the REPL and it +prints the name of the function, it's been converted to uppercase. +Standard style, these days, is to write code in all lowercase and let +the reader change names to uppercase. (3) + + To ensure that the same textual name is always read as the same +symbol, the reader interns symbols-after it has read the name and +converted it to all uppercase, the reader looks in a table called a +package for an existing symbol with the same name. If it can't find +one, it creates a new symbol and adds it to the table. Otherwise, it +returns the symbol already in the table. Thus, anywhere the same name +appears in any s-expression, the same object will be used to represent +it. (4) + + Because names can contain many more characters in Lisp than they can +in Algol-derived languages, certain naming conventions are distinct to +Lisp, such as the use of hyphenated names like hello-world. Another +important convention is that global variables are given names that start +and end with *. Similarly, constants are given names starting and +ending in +. And some programmers will name particularly low-level +functions with names that start with % or even %%. The names defined in +the language standard use only the alphabetic characters (A-Z) plus *, ++, -, /, 1, 2, <, =, >, and &. + + The syntax for lists, numbers, strings, and symbols can describe a +good percentage of Lisp programs. Other rules describe notations for +literal vectors, individual characters, and arrays, which I'll cover +when I talk about the associated data types in Chapters 10 and 11. For +now the key thing to understand is how you can combine numbers, strings, +and symbols with parentheses-delimited lists to build s-expressions +representing arbitrary trees of objects. Some simple examples look like +this: + + x ; the symbol X + () ; the empty list + (1 2 3) ; a list of three numbers + ("foo" "bar") ; a list of two strings + (x y z) ; a list of three symbols + (x 1 "foo") ; a list of a symbol, a number, and a string + (+ (* 2 3) 4) ; a list of a symbol, a list, and a number. + + An only slightly more complex example is the following four-item list +that contains two symbols, the empty list, and another list, itself +containing two symbols and a string: + + (defun hello-world () + (format t "hello, world")) + + ---------- Footnotes ---------- + + (1) The empty list, (), which can also be written NIL, is both an +atom and a list. + + (2) In fact, as you'll see later, names aren't intrinsically tied to +any one kind of thing. You can use the same name, depending on context, +to refer to both a variable and a function, not to mention several other +possibilities. + + (3) The case-converting behavior of the reader can, in fact, be +customized, but understanding when and how to change it requires a much +deeper discussion of the relation between names, symbols, and other +program elements than I'm ready to get into just yet. + + (4) I'll discuss the relation between symbols and packages in more +detail in Chapter 21. + + +File: pcl.info, Node: 4-4, Next: 4-5, Prev: 4-3, Up: Chapter 4 + +S-expressions As Lisp Forms +=========================== + +After the reader has translated a bunch of text into s-expressions, the +s-expressions can then be evaluated as Lisp code. Or some of them +can-not every s-expressions that the reader can read can necessarily be +evaluated as Lisp code. Common Lisp's evaluation rule defines a second +level of syntax that determines which s-expressions can be treated as +Lisp forms. (1) The syntactic rules at this level are quite simple. +Any atom-any nonlist or the empty list-is a legal Lisp form as is any +list that has a symbol as its first element. (2) + + Of course, the interesting thing about Lisp forms isn't their syntax +but how they're evaluated. For purposes of discussion, you can think of +the evaluator as a function that takes as an argument a syntactically +well-formed Lisp form and returns a value, which we can call the value +of the form. Of course, when the evaluator is a compiler, this is a bit +of a simplification-in that case, the evaluator is given an expression +and generates code that will compute the appropriate value when it's +run. But this simplification lets me describe the semantics of Common +Lisp in terms of how the different kinds of Lisp forms are evaluated by +this notional function. + + The simplest Lisp forms, atoms, can be divided into two categories: +symbols and everything else. A symbol, evaluated as a form, is +considered the name of a variable and evaluates to the current value of +the variable. (3) I'll discuss in Chapter 6 how variables get their +values in the first place. You should also note that certain +"variables" are that old oxymoron of programming: "constant variables." +For instance, the symbol PI names a constant variable whose value is the +best possible floating-point approximation to the mathematical constant +pi. + + All other atoms-numbers and strings are the kinds you've seen so +far-are self-evaluating objects. This means when such an expression is +passed to the notional evaluation function, it's simply returned. You +saw examples of self-evaluating objects in Chapter 2 when you typed 10 +and "hello, world" at the REPL. + + It's also possible for symbols to be self-evaluating in the sense +that the variables they name can be assigned the value of the symbol +itself. Two important constants that are defined this way are T and +NIL, the canonical true and false values. I'll discuss their role as +booleans in the section "Truth, Falsehood, and Equality." + + Another class of self-evaluating symbols are the keyword +symbols-symbols whose names start with :. When the reader interns such +a name, it automatically defines a constant variable with the name and +with the symbol as the value. + + Things get more interesting when we consider how lists are evaluated. +All legal list forms start with a symbol, but three kinds of list forms +are evaluated in three quite different ways. To determine what kind of +form a given list is, the evaluator must determine whether the symbol +that starts the list is the name of a function, a macro, or a special +operator. If the symbol hasn't been defined yet-as may be the case if +you're compiling code that contains references to functions that will be +defined later-it's assumed to be a function name. (4) I'll refer to the +three kinds of forms as function call forms, macro forms, and special +forms. + + ---------- Footnotes ---------- + + (1) Of course, other levels of correctness exist in Lisp, as in other +languages. For instance, the s-expression that results from reading +(foo 1 2) is syntactically well-formed but can be evaluated only if foo +is the name of a function or macro. + + (2) One other rarely used kind of Lisp form is a list whose first +element is a lambda form. I'll discuss this kind of form in Chapter 5. + + (3) One other possibility exists-it's possible to define symbol +macros that are evaluated slightly differently. We won't worry about +them. + + (4) In Common Lisp a symbol can name both an operator-function, +macro, or special operator-and a variable. This is one of the major +differences between Common Lisp and Scheme. The difference is sometimes +described as Common Lisp being a Lisp-2 vs. Scheme being a Lisp-1-a +Lisp-2 has two namespaces, one for operators and one for variables, but +a Lisp-1 uses a single namespace. Both choices have advantages, and +partisans can debate endlessly which is better. + + +File: pcl.info, Node: 4-5, Next: 4-6, Prev: 4-4, Up: Chapter 4 + +Function Calls +============== + +The evaluation rule for function call forms is simple: evaluate the +remaining elements of the list as Lisp forms and pass the resulting +values to the named function. This rule obviously places some +additional syntactic constraints on a function call form: all the +elements of the list after the first must themselves be well-formed Lisp +forms. In other words, the basic syntax of a function call form is as +follows, where each of the arguments is itself a Lisp form: + + (function-name argument*) + + Thus, the following expression is evaluated by first evaluating 1, +then evaluating 2, and then passing the resulting values to the + +function, which returns 3: + + (+ 1 2) + + A more complex expression such as the following is evaluated in +similar fashion except that evaluating the arguments (+ 1 2) and (- 3 4) +entails first evaluating their arguments and applying the appropriate +functions to them: + + (* (+ 1 2) (- 3 4)) + + Eventually, the values 3 and -1 are passed to the * function, which +returns -3. + + As these examples show, functions are used for many of the things +that require special syntax in other languages. This helps keep Lisp's +syntax regular. + + +File: pcl.info, Node: 4-6, Next: 4-7, Prev: 4-5, Up: Chapter 4 + +Special Operators +================= + +That said, not all operations can be defined as functions. Because all +the arguments to a function are evaluated before the function is called, +there's no way to write a function that behaves like the IF operator you +used in Chapter 3. To see why, consider this form: + + (if x (format t "yes") (format t "no")) + + If IF were a function, the evaluator would evaluate the argument +expressions from left to right. The symbol x would be evaluated as a +variable yielding some value; then (format t "yes") would be evaluated +as a function call, yielding NIL after printing "yes" to standard +output. Then (format t "no") would be evaluated, printing "no" and also +yielding NIL. Only after all three expressions were evaluated would the +resulting values be passed to IF, too late for it to control which of +the two FORMAT expressions gets evaluated. + + To solve this problem, Common Lisp defines a couple dozen so-called +special operators, IF being one, that do things that functions can't do. +There are 25 in all, but only a small handful are used directly in +day-to-day programming. (1) + + When the first element of a list is a symbol naming a special +operator, the rest of the expressions are evaluated according to the +rule for that operator. + + The rule for IF is pretty easy: evaluate the first expression. If it +evaluates to non-NIL, then evaluate the next expression and return its +value. Otherwise, return the value of evaluating the third expression +or NIL if the third expression is omitted. In other words, the basic +form of an IF expression is as follows: + + (if test-form then-form [ else-form ]) + + The test-form will always be evaluated and then one or the other of +the then-form or else-form. + + An even simpler special operator is QUOTE, which takes a single +expression as its "argument" and simply returns it, unevaluated. For +instance, the following evaluates to the list (+ 1 2), not the value 3: + + (quote (+ 1 2)) + + There's nothing special about this list; you can manipulate it just +like any list you could create with the LIST function. (2) + + QUOTE is used commonly enough that a special syntax for it is built +into the reader. Instead of writing the following: + + (quote (+ 1 2)) + you can write this: + + '(+ 1 2) + + This syntax is a small extension of the s-expression syntax +understood by the reader. From the point of view of the evaluator, both +those expressions will look the same: a list whose first element is the +symbol QUOTE and whose second element is the list (+ 1 2). (3) + + In general, the special operators implement features of the language +that require some special processing by the evaluator. For instance, +several special operators manipulate the environment in which other +forms will be evaluated. One of these, which I'll discuss in detail in +Chapter 6, is LET, which is used to create new variable bindings. The +following form evaluates to 10 because the second x is evaluated in an +environment where it's the name of a variable established by the LET +with the value 10: + + (let ((x 10)) x) + + ---------- Footnotes ---------- + + (1) The others provide useful, but somewhat esoteric, features. I'll +discuss them as the features they support come up. + + (2) Well, one difference exists-literal objects such as quoted lists, +but also including double-quoted strings, literal arrays, and vectors +(whose syntax you'll see later), must not be modified. Consequently, +any lists you plan to manipulate you should create with LIST. + + (3) This syntax is an example of a reader macro. Reader macros +modify the syntax the reader uses to translate text into Lisp objects. +It is, in fact, possible to define your own reader macros, but that's a +rarely used facility of the language. When most Lispers talk about +"extending the syntax" of the language, they're talking about regular +macros, as I'll discuss in a moment. + + +File: pcl.info, Node: 4-7, Next: 4-8, Prev: 4-6, Up: Chapter 4 + +Macros +====== + +While special operators extend the syntax of Common Lisp beyond what can +be expressed with just function calls, the set of special operators is +fixed by the language standard. Macros, on the other hand, give users +of the language a way to extend its syntax. As you saw in Chapter 3, a +macro is a function that takes s-expressions as arguments and returns a +Lisp form that's then evaluated in place of the macro form. The +evaluation of a macro form proceeds in two phases: First, the elements +of the macro form are passed, unevaluated, to the macro function. +Second, the form returned by the macro function-called its expansion-is +evaluated according to the normal evaluation rules. + + It's important to keep the two phases of evaluating a macro form +clear in your mind. It's easy to lose track when you're typing +expressions at the REPL because the two phases happen one after another +and the value of the second phase is immediately returned. But when +Lisp code is compiled, the two phases happen at completely different +times, so it's important to keep clear what's happening when. For +instance, when you compile a whole file of source code with the function +COMPILE-FILE, all the macro forms in the file are recursively expanded +until the code consists of nothing but function call forms and special +forms. This macroless code is then compiled into a FASL file that the +LOAD function knows how to load. The compiled code, however, isn't +executed until the file is loaded. Because macros generate their +expansion at compile time, they can do relatively large amounts of work +generating their expansion without having to pay for it when the file is +loaded or the functions defined in the file are called. + + Since the evaluator doesn't evaluate the elements of the macro form +before passing them to the macro function, they don't need to be +well-formed Lisp forms. Each macro assigns a meaning to the +s-expressions in the macro form by virtue of how it uses them to +generate its expansion. In other words, each macro defines its own +local syntax. For instance, the backwards macro from Chapter 3 defines +a syntax in which an expression is a legal backwards form if it's a list +that's the reverse of a legal Lisp form. + + I'll talk quite a bit more about macros throughout this book. For +now the important thing for you to realize is that macros-while +syntactically similar to function calls-serve quite a different purpose, +providing a hook into the compiler. (1) + + ---------- Footnotes ---------- + + (1) People without experience using Lisp's macros or, worse yet, +bearing the scars of C preprocessor-inflicted wounds, tend to get +nervous when they realize that macro calls look like regular function +calls. This turns out not to be a problem in practice for several +reasons. One is that macro forms are usually formatted differently than +function calls. For instance, you write the following: + + (dolist (x foo) + (print x)) + + rather than this: + + (dolist (x foo) (print x)) + + or + + (dolist (x foo) + (print x)) + + the way you would if DOLIST was a function. A good Lisp environment +will automatically format macro calls correctly, even for user-defined +macros. + + And even if a DOLIST form was written on a single line, there are +several clues that it's a macro: For one, the expression (x foo) is +meaningful by itself only if x is the name of a function or macro. +Combine that with the later occurrence of x as a variable, and it's +pretty suggestive that DOLIST is a macro that's creating a binding for a +variable named x. Naming conventions also help-looping constructs, +which are invariably macros-are frequently given names starting with do. + + +File: pcl.info, Node: 4-8, Next: 4-9, Prev: 4-7, Up: Chapter 4 + +Truth, Falsehood, and Equality +============================== + +Two last bits of basic knowledge you need to get under your belt are +Common Lisp's notion of truth and falsehood and what it means for two +Lisp objects to be "equal." Truth and falsehood are-in this +realm-straightforward: the symbol NIL is the only false value, and +everything else is true. The symbol T is the canonical true value and +can be used when you need to return a non-NIL value and don't have +anything else handy. The only tricky thing about NIL is that it's the +only object that's both an atom and a list: in addition to falsehood, +it's also used to represent the empty list. (1) This equivalence +between NIL and the empty list is built into the reader: if the reader +sees (), it reads it as the symbol NIL. They're completely +interchangeable. And because NIL, as I mentioned previously, is the +name of a constant variable with the symbol NIL as its value, the +expressions nil, (), 'nil, and '() all evaluate to the same thing-the +unquoted forms are evaluated as a reference to the constant variable +whose value is the symbol NIL, but in the quoted forms the QUOTE special +operator evaluates to the symbol directly. For the same reason, both t +and 't will evaluate to the same thing: the symbol T. + + Using phrases such as "the same thing" of course begs the question of +what it means for two values to be "the same." As you'll see in future +chapters, Common Lisp provides a number of type-specific equality +predicates: = is used to compare numbers, CHAR= to compare characters, +and so on. In this section I'll discuss the four "generic" equality +predicates-functions that can be passed any two Lisp objects and will +return true if they're equivalent and false otherwise. They are, in +order of discrimination, EQ, EQL, EQUAL, and EQUALP. + + EQ tests for "object identity"-two objects are EQ if they're +identical. Unfortunately, the object identity of numbers and characters +depends on how those data types are implemented in a particular Lisp. +Thus, EQ may consider two numbers or two characters with the same value +to be equivalent, or it may not. Implementations have enough leeway +that the expression (eq 3 3) can legally evaluate to either true or +false. More to the point, (eq x x) can evaluate to either true or false +if the value of x happens to be a number or character. + + Thus, you should never use EQ to compare values that may be numbers +or characters. It may seem to work in a predictable way for certain +values in a particular implementation, but you have no guarantee that it +will work the same way if you switch implementations. And switching +implementations may mean simply upgrading your implementation to a new +version-if your Lisp implementer changes how they represent numbers or +characters, the behavior of EQ could very well change as well. + + Thus, Common Lisp defines EQL to behave like EQ except that it also +is guaranteed to consider two objects of the same class representing the +same numeric or character value to be equivalent. Thus, (eql 1 1) is +guaranteed to be true. And (eql 1 1.0) is guaranteed to be false since +the integer value 1 and the floating-point value are instances of +different classes. + + There are two schools of thought about when to use EQ and when to use +EQL: The "use EQ when possible" camp argues you should use EQ when you +know you aren't going to be com-paring numbers or characters because (a) +it's a way to indicate that you aren't going to be comparing numbers or +characters and (b) it will be marginally more efficient since EQ doesn't +have to check whether its arguments are numbers or characters. + + The "always use EQL" camp says you should never use EQ because (a) +the potential gain in clarity is lost because every time someone reading +your code-including you-sees an EQ, they have to stop and check whether +it's being used correctly (in other words, that it's never going to be +called upon to compare numbers or characters) and (b) that the +efficiency difference between EQ and EQL is in the noise compared to +real performance bottlenecks. + + The code in this book is written in the "always use EQL" style. (2) + + The other two equality predicates, EQUAL and EQUALP, are general in +the sense that they can operate on all types of objects, but they're +much less fundamental than EQ or EQL. They each define a slightly less +discriminating notion of equivalence than EQL, allowing different +objects to be considered equivalent. There's nothing special about the +particular notions of equivalence these functions implement except that +they've been found to be handy by Lisp programmers in the past. If +these predicates don't suit your needs, you can always define your own +predicate function that compares different types of objects in the way +you need. + + EQUAL loosens the discrimination of EQL to consider lists equivalent +if they have the same structure and contents, recursively, according to +EQUAL. EQUAL also considers strings equivalent if they contain the same +characters. It also defines a looser definition of equivalence than EQL +for bit vectors and pathnames, two data types I'll discuss in future +chapters. For all other types, it falls back on EQL. + + EQUALP is similar to EQUAL except it's even less discriminating. It +considers two strings equivalent if they contain the same characters, +ignoring differences in case. It also considers two characters +equivalent if they differ only in case. Numbers are equivalent under +EQUALP if they represent the same mathematical value. Thus, (equalp 1 +1.0) is true. Lists with EQUALP elements are EQUALP; likewise, arrays +with EQUALP elements are EQUALP. As with EQUAL, there are a few other +data types that I haven't covered yet for which EQUALP can consider two +objects equivalent that neither EQL nor EQUAL will. For all other data +types, EQUALP falls back on EQL. + + ---------- Footnotes ---------- + + (1) Using the empty list as false is a reflection of Lisp's heritage +as a list-processing language much as the use of the integer 0 as false +in C is a reflection of its heritage as a bit-twiddling language. Not +all Lisps handle boolean values the same way. Another of the many +subtle differences upon which a good Common Lisp vs. Scheme flame war +can rage for days is Scheme's use of a distinct false value #f, which +isn't the same value as either the symbol nil or the empty list, which +are also distinct from each other. + + (2) Even the language standard is a bit ambivalent about which of EQ +or EQL should be preferred. Object identity is defined by EQ, but the +standard defines the phrase the same when talking about objects to mean +EQL unless another predicate is explicitly mentioned. Thus, if you want +to be 100 percent technically correct, you can say that (- 3 2) and (- 4 +3) evaluate to "the same" object but not that they evaluate to +"identical" objects. This is, admittedly, a bit of an +angels-on-pinheads kind of issue. + + +File: pcl.info, Node: 4-9, Next: Chapter 5, Prev: 4-7, Up: Chapter 4 + +Formatting Lisp Code +==================== + +While code formatting is, strictly speaking, neither a syntactic nor a +semantic matter, proper formatting is important to reading and writing +code fluently and idiomatically. The key to formatting Lisp code is to +indent it properly. The indentation should reflect the structure of the +code so that you don't need to count parentheses to see what goes with +what. In general, each new level of nesting gets indented a bit more, +and, if line breaks are necessary, items at the same level of nesting +are lined up. Thus, a function call that needs to be broken up across +multiple lines might be written like this: + + (some-function arg-with-a-long-name + another-arg-with-an-even-longer-name) + + Macro and special forms that implement control constructs are +typically indented a little differently: the "body" elements are +indented two spaces relative to the opening parenthesis of the form. +Thus: + + (defun print-list (list) + (dolist (i list) + (format t "item: ~a~%" i))) + + However, you don't need to worry too much about these rules because a +proper Lisp environment such as SLIME will take care of it for you. In +fact, one of the advantages of Lisp's regular syntax is that it's fairly +easy for software such as editors to know how to indent it. Since the +indentation is supposed to reflect the structure of the code and the +structure is marked by parentheses, it's easy to let the editor indent +your code for you. + + In SLIME, hitting Tab at the beginning of each line will cause it to +be indented appropriately, or you can re-indent a whole expression by +positioning the cursor on the opening parenthesis and typing C-M-q. Or +you can re-indent the whole body of a function from anywhere within it +by typing C-c M-q. + + Indeed, experienced Lisp programmers tend to rely on their editor +handling indenting automatically, not just to make their code look nice +but to detect typos: once you get used to how code is supposed to be +indented, a misplaced parenthesis will be instantly recognizable by the +weird indentation your editor gives you. For example, suppose you were +writing a function that was supposed to look like this: + + (defun foo () + (if (test) + (do-one-thing) + (do-another-thing))) + + Now suppose you accidentally left off the closing parenthesis after +test. Because you don't bother counting parentheses, you quite likely +would have added an extra parenthesis at the end of the DEFUN form, +giving you this code: + + (defun foo () + (if (test + (do-one-thing) + (do-another-thing)))) + + However, if you had been indenting by hitting Tab at the beginning of +each line, you wouldn't have code like that. Instead you'd have this: + + (defun foo () + (if (test + (do-one-thing) + (do-another-thing)))) + + Seeing the then and else clauses indented way out under the condition +rather than just indented slightly relative to the IF shows you +immediately that something is awry. + + Another important formatting rule is that closing parentheses are +always put on the same line as the last element of the list they're +closing. That is, don't write this: + + (defun foo () + (dotimes (i 10) + (format t "~d. hello~%" i) + ) + ) + + but instead write this: + + (defun foo () + (dotimes (i 10) + (format t "~d. hello~%" i))) + + The string of )))s at the end may seem forbidding, but as long your +code is properly indented the parentheses should fade away-no need to +give them undue prominence by spreading them across several lines. + + Finally, comments should be prefaced with one to four semicolons +depending on the scope of the comment as follows: + + ;;;; Four semicolons are used for a file header comment. + + ;;; A comment with three semicolons will usually be a paragraph + ;;; comment that applies to a large section of code that follows, + + (defun foo (x) + (dotimes (i x) + ;; Two semicolons indicate this comment applies to the code + ;; that follows. Note that this comment is indented the same + ;; as the code that follows. + (some-function-call) + (another i) ; this comment applies to this line only + (and-another) ; and this is for this line + (baz))) + + Now you're ready to start looking in greater detail at the major +building blocks of Lisp programs, functions, variables, and macros. Up +next: functions. + + +File: pcl.info, Node: Chapter 5, Next: Chapter 6, Prev: Chapter 4, Up: Top + +5. Functions +============ + +After the rules of syntax and semantics, the three most basic components +of all Lisp programs are functions, variables and macros. You used all +three while building the database in Chapter 3, but I glossed over a lot +of the details of how they work and how to best use them. I'll devote +the next few chapters to these three topics, starting with functions, +which-like their counterparts in other languages-provide the basic +mechanism for abstracting, well, functionality. + + The bulk of Lisp itself consists of functions. More than three +quarters of the names defined in the language standard name functions. +All the built-in data types are defined purely in terms of what +functions operate on them. Even Lisp's powerful object system is built +upon a conceptual extension to functions, generic functions, which I'll +cover in Chapter 16. + + And, despite the importance of macros to The Lisp Way, in the end all +real functionality is provided by functions. Macros run at compile +time, so the code they generate-the code that will actually make up the +program after all the macros are expanded-will consist entirely of calls +to functions and special operators. Not to mention, macros themselves +are also functions, albeit functions that are used to generate code +rather than to perform the actions of the program. (1) + +* Menu: + +* 5-1:: Defining New Functions +* 5-2:: Function Parameter Lists +* 5-3:: Optional Parameters +* 5-4:: Rest Parameters +* 5-5:: Keyword Parameters +* 5-6:: Mixing Different Parameter Types +* 5-7:: Function Return Values +* 5-8:: Functions As Data, a.k.a. Higher-Order Functions +* 5-9:: Anonymous Functions + + ---------- Footnotes ---------- + + (1) Despite the importance of functions in Common Lisp, it isn't +really accurate to describe it as a functional language. It's true some +of Common Lisp's features, such as its list manipulation functions, are +designed to be used in a body-form* style and that Lisp has a prominent +place in the history of functional programming-McCarthy introduced many +ideas that are now considered important in functional programming-but +Common Lisp was intentionally designed to support many different styles +of programming. In the Lisp family, Scheme is the nearest thing to a +"pure" functional language, and even it has several features that +disqualify it from absolute purity compared to languages such as Haskell +and ML. + + +File: pcl.info, Node: 5-1, Next: 5-2, Prev: Chapter 5, Up: Chapter 5 + +Defining New Functions +====================== + +Normally functions are defined using the DEFUN macro. The basic +skeleton of a DEFUN looks like this: + + (defun name (parameter*) + "Optional documentation string." + body-form*) + + Any symbol can be used as a function name. (1) Usually function +names contain only alphabetic characters and hyphens, but other +characters are allowed and are used in certain naming conventions. For +instance, functions that convert one kind of value to another sometimes +use -> in the name. For example, a function to convert strings to +widgets might be called string->widget. The most important naming +convention is the one mentioned in Chapter 2, which is that you +construct compound names with hyphens rather than underscores or inner +caps. Thus, frob-widget is better Lisp style than either frob_widget or +frobWidget. + + A function's parameter list defines the variables that will be used +to hold the arguments passed to the function when it's called. (2) If +the function takes no arguments, the list is empty, written as (). +Different flavors of parameters handle required, optional, multiple, and +keyword arguments. I'll discuss the details in the next section. + + If a string literal follows the parameter list, it's a documentation +string that should describe the purpose of the function. When the +function is defined, the documentation string will be associated with +the name of the function and can later be obtained using the +DOCUMENTATION function. (3) + + Finally, the body of a DEFUN consists of any number of Lisp +expressions. They will be evaluated in order when the function is +called and the value of the last expression is returned as the value of +the function. Or the RETURN-FROM special operator can be used to return +immediately from anywhere in a function, as I'll discuss in a moment. + + In Chapter 2 we wrote a hello-world function, which looked like this: + + (defun hello-world () (format t "hello, world")) + + You can now analyze the parts of this function. Its name is +hello-world, its parameter list is empty so it takes no arguments, it +has no documentation string, and its body consists of one expression. + + (format t "hello, world") + + The following is a slightly more complex function: + + (defun verbose-sum (x y) + "Sum any two numbers after printing a message." + (format t "Summing ~d and ~d.~%" x y) + (+ x y)) + + This function is named verbose-sum, takes two arguments that will be +bound to the parameters x and y, has a documentation string, and has a +body consisting of two expressions. The value returned by the call to + +becomes the return value of verbose-sum. + + ---------- Footnotes ---------- + + (1) Well, almost any symbol. It's undefined what happens if you use +any of the names defined in the language standard as a name for one of +your own functions. However, as you'll see in Chapter 21, the Lisp +package system allows you to create names in different namespaces, so +this isn't really an issue. + + (2) Parameter lists are sometimes also called lambda lists because of +the historical relationship between Lisp's notion of functions and the +lambda calculus. + + (3) For example, the following: + + (documentation 'foo 'function) + + returns the documentation string for the function foo. Note, +however, that documentation strings are intended for human consumption, +not programmatic access. A Lisp implementation isn't required to store +them and is allowed to discard them at any time, so portable programs +shouldn't depend on their presence. In some implementations an +implementation-defined variable needs to be set before it will store +documentation strings. + + +File: pcl.info, Node: 5-2, Next: 5-3, Prev: 5-1, Up: Chapter 5 + +Function Parameter Lists +======================== + +There's not a lot more to say about function names or documentation +strings, and it will take a good portion of the rest of this book to +describe all the things you can do in the body of a function, which +leaves us with the parameter list. + + The basic purpose of a parameter list is, of course, to declare the +variables that will receive the arguments passed to the function. When +a parameter list is a simple list of variable names-as in +verbose-sum-the parameters are called required parameters. When a +function is called, it must be supplied with one argument for every +required parameter. Each parameter is bound to the corresponding +argument. If a function is called with too few or too many arguments, +Lisp will signal an error. + + However, Common Lisp's parameter lists also give you more flexible +ways of mapping the arguments in a function call to the function's +parameters. In addition to required parameters, a function can have +optional parameters. Or a function can have a single parameter that's +bound to a list containing any extra arguments. And, finally, arguments +can be mapped to parameters using keywords rather than position. Thus, +Common Lisp's parameter lists provide a convenient solution to several +common coding problems. + + +File: pcl.info, Node: 5-3, Next: 5-4, Prev: 5-2, Up: Chapter 5 + +Optional Parameters +=================== + +While many functions, like verbose-sum, need only required parameters, +not all functions are quite so simple. Sometimes a function will have a +parameter that only certain callers will care about, perhaps because +there's a reasonable default value. An example is a function that +creates a data structure that can grow as needed. Since the data +structure can grow, it doesn't matter-from a correctness point of +view-what the initial size is. But callers who have a good idea how +many items they're going to put into the data structure may be able to +improve performance by specifying a specific initial size. Most +callers, though, would probably rather let the code that implements the +data structure pick a good general-purpose value. In Common Lisp you +can accommodate both kinds of callers by using an optional parameter; +callers who don't care will get a reasonable default, and other callers +can provide a specific value. (1) + + To define a function with optional parameters, after the names of any +required parameters, place the symbol &optional followed by the names of +the optional parameters. A simple example looks like this: + + (defun foo (a b &optional c d) (list a b c d)) + + When the function is called, arguments are first bound to the +required parameters. After all the required parameters have been given +values, if there are any arguments left, their values are assigned to +the optional parameters. If the arguments run out before the optional +parameters do, the remaining optional parameters are bound to the value +NIL. Thus, the function defined previously gives the following results: + + (foo 1 2) ==> (1 2 NIL NIL) + (foo 1 2 3) ==> (1 2 3 NIL) + (foo 1 2 3 4) ==> (1 2 3 4) + + Lisp will still check that an appropriate number of arguments are +passed to the function-in this case between two and four, inclusive-and +will signal an error if the function is called with too few or too many. + + Of course, you'll often want a different default value than NIL. You +can specify the default value by replacing the parameter name with a +list containing a name and an expression. The expression will be +evaluated only if the caller doesn't pass enough arguments to provide a +value for the optional parameter. The common case is simply to provide +a value as the expression. + + (defun foo (a &optional (b 10)) (list a b)) + + This function requires one argument that will be bound to the +parameter a. The second parameter, b, will take either the value of the +second argument, if there is one, or 10. + + (foo 1 2) ==> (1 2) + (foo 1) ==> (1 10) + + Sometimes, however, you may need more flexibility in choosing the +default value. You may want to compute a default value based on other +parameters. And you can-the default-value expression can refer to +parameters that occur earlier in the parameter list. If you were +writing a function that returned some sort of representation of a +rectangle and you wanted to make it especially convenient to make +squares, you might use an argument list like this: + + (defun make-rectangle (width &optional (height width)) ...) + + which would cause the height parameter to take the same value as the +width parameter unless explicitly specified. + + Occasionally, it's useful to know whether the value of an optional +argument was supplied by the caller or is the default value. Rather +than writing code to check whether the value of the parameter is the +default (which doesn't work anyway, if the caller happens to explicitly +pass the default value), you can add another variable name to the +parameter specifier after the default-value expression. This variable +will be bound to true if the caller actually supplied an argument for +this parameter and NIL otherwise. By convention, these variables are +usually named the same as the actual parameter with a "-supplied-p" on +the end. For example: + + (defun foo (a b &optional (c 3 c-supplied-p)) + (list a b c c-supplied-p)) + + This gives results like this: + + (foo 1 2) ==> (1 2 3 NIL) + (foo 1 2 3) ==> (1 2 3 T) + (foo 1 2 4) ==> (1 2 4 T) + + ---------- Footnotes ---------- + + (1) In languages that don't support optional parameters directly, +programmers typically find ways to simulate them. One technique is to +use distinguished "no-value" values that the caller can pass to indicate +they want the default value of a given parameter. In C, for example, +it's common to use NULL as such a distinguished value. However, such a +protocol between the function and its callers is ad hoc-in some +functions or for some arguments NULL may be the distinguished value +while in other functions or for other arguments the magic value may be +-1 or some #defined constant. + + +File: pcl.info, Node: 5-4, Next: 5-5, Prev: 5-3, Up: Chapter 5 + +Rest Parameters +=============== + +Optional parameters are just the thing when you have discrete parameters +for which the caller may or may not want to provide values. But some +functions need to take a variable number of arguments. Several of the +built-in functions you've seen already work this way. FORMAT has two +required arguments, the stream and the control string. But after that +it needs a variable number of arguments depending on how many values +need to be interpolated into the control string. The + function also +takes a variable number of arguments-there's no particular reason to +limit it to summing just two numbers; it will sum any number of values. +(It even works with zero arguments, returning 0, the identity under +addition.) The following are all legal calls of those two functions: + + (format t "hello, world") + (format t "hello, ~a" name) + (format t "x: ~d y: ~d" x y) + (+) + (+ 1) + (+ 1 2) + (+ 1 2 3) + + Obviously, you could write functions taking a variable number of +arguments by simply giving them a lot of optional parameters. But that +would be incredibly painful-just writing the parameter list would be bad +enough, and that doesn't get into dealing with all the parameters in the +body of the function. To do it properly, you'd have to have as many +optional parameters as the number of arguments that can legally be +passed in a function call. This number is implementation dependent but +guaranteed to be at least 50. And in current implementations it ranges +from 4,096 to 536,870,911. (1) Blech. That kind of mind-bending tedium +is definitely not The Lisp Way. + + Instead, Lisp lets you include a catchall parameter after the symbol +&rest. If a function includes a &rest parameter, any arguments +remaining after values have been doled out to all the required and +optional parameters are gathered up into a list that becomes the value +of the &rest parameter. Thus, the parameter lists for FORMAT and + +probably look something like this: + + (defun format (stream string &rest values) ...) + (defun + (&rest numbers) ...) + + ---------- Footnotes ---------- + + (1) The constant CALL-ARGUMENTS-LIMIT tells you the +implementation-specific value. + + +File: pcl.info, Node: 5-5, Next: 5-6, Prev: 5-4, Up: Chapter 5 + +Keyword Parameters +================== + +Optional and rest parameters give you quite a bit of flexibility, but +neither is going to help you out much in the following situation: +Suppose you have a function that takes four optional parameters. Now +suppose that most of the places the function is called, the caller wants +to provide a value for only one of the four parameters and, further, +that the callers are evenly divided as to which parameter they will use. + + The callers who want to provide a value for the first parameter are +fine-they just pass the one optional argument and leave off the rest. +But all the other callers have to pass some value for between one and +three arguments they don't care about. Isn't that exactly the problem +optional parameters were designed to solve? + + Of course it is. The problem is that optional parameters are still +positional-if the caller wants to pass an explicit value for the fourth +optional parameter, it turns the first three optional parameters into +required parameters for that caller. Luckily, another parameter flavor, +keyword parameters, allow the caller to specify which values go with +which parameters. + + To give a function keyword parameters, after any required, &optional, +and &rest parameters you include the symbol &key and then any number of +keyword parameter specifiers, which work like optional parameter +specifiers. Here's a function that has only keyword parameters: + + (defun foo (&key a b c) (list a b c)) + + When this function is called, each keyword parameters is bound to the +value immediately following a keyword of the same name. Recall from +Chapter 4 that keywords are names that start with a colon and that +they're automatically defined as self-evaluating constants. + + If a given keyword doesn't appear in the argument list, then the +corresponding parameter is assigned its default value, just like an +optional parameter. Because the keyword arguments are labeled, they can +be passed in any order as long as they follow any required arguments. +For instance, foo can be invoked as follows: + + (foo) ==> (NIL NIL NIL) + (foo :a 1) ==> (1 NIL NIL) + (foo :b 1) ==> (NIL 1 NIL) + (foo :c 1) ==> (NIL NIL 1) + (foo :a 1 :c 3) ==> (1 NIL 3) + (foo :a 1 :b 2 :c 3) ==> (1 2 3) + (foo :a 1 :c 3 :b 2) ==> (1 2 3) + + As with optional parameters, keyword parameters can provide a default +value form and the name of a supplied-p variable. In both keyword and +optional parameters, the default value form can refer to parameters that +appear earlier in the parameter list. + + (defun foo (&key (a 0) (b 0 b-supplied-p) (c (+ a b))) + (list a b c b-supplied-p)) + + (foo :a 1) ==> (1 0 1 NIL) + (foo :b 1) ==> (0 1 1 T) + (foo :b 1 :c 4) ==> (0 1 4 T) + (foo :a 2 :b 1 :c 4) ==> (2 1 4 T) + + Also, if for some reason you want the keyword the caller uses to +specify the parameter to be different from the name of the actual +parameter, you can replace the parameter name with another list +containing the keyword to use when calling the function and the name to +be used for the parameter. The following definition of foo: + + (defun foo (&key ((:apple a)) ((:box b) 0) ((:charlie c) 0 c-supplied-p)) + (list a b c c-supplied-p)) + + lets the caller call it like this: + + (foo :apple 10 :box 20 :charlie 30) ==> (10 20 30 T) + + This style is mostly useful if you want to completely decouple the +public API of the function from the internal details, usually because +you want to use short variable names internally but descriptive keywords +in the API. It's not, however, very frequently used. + + +File: pcl.info, Node: 5-6, Next: 5-7, Prev: 5-5, Up: Chapter 5 + +Mixing Different Parameter Types +================================ + +It's possible, but rare, to use all four flavors of parameters in a +single function. Whenever more than one flavor of parameter is used, +they must be declared in the order I've discussed them: first the names +of the required parameters, then the optional parameters, then the rest +parameter, and finally the keyword parameters. Typically, however, in +functions that use multiple flavors of parameters, you'll combine +required parameters with one other flavor or possibly combine &optional +and &rest parameters. The other two combinations, either &optional or +&rest parameters combined with &key parameters, can lead to somewhat +surprising behavior. + + Combining &optional and &key parameters yields surprising enough +results that you should probably avoid it altogether. The problem is +that if a caller doesn't supply values for all the optional parameters, +then those parameters will eat up the keywords and values intended for +the keyword parameters. For instance, this function unwisely mixes +&optional and &key parameters: + + (defun foo (x &optional y &key z) (list x y z)) + + If called like this, it works fine: + + (foo 1 2 :z 3) ==> (1 2 3) + + And this is also fine: + + (foo 1) ==> (1 nil nil) + + But this will signal an error: + + (foo 1 :z 3) ==> ERROR + + This is because the keyword :z is taken as a value to fill the +optional y parameter, leaving only the argument 3 to be processed. At +that point, Lisp will be expecting either a keyword/value pair or +nothing and will complain. Perhaps even worse, if the function had had +two &optional parameters, this last call would have resulted in the +values :z and 3 being bound to the two &optional parameters and the &key +parameter z getting the default value NIL with no indication that +anything was amiss. + + In general, if you find yourself writing a function that uses both +&optional and &key parameters, you should probably just change it to use +all &key parameters-they're more flexible, and you can always add new +keyword parameters without disturbing existing callers of the function. +You can also remove keyword parameters, as long as no one is using them. +(1) In general, using keyword parameters helps make code much easier to +maintain and evolve-if you need to add some new behavior to a function +that requires new parameters, you can add keyword parameters without +having to touch, or even recompile, any existing code that calls the +function. + + You can safely combine &rest and &key parameters, but the behavior +may be a bit surprising initially. Normally the presence of either +&rest or &key in a parameter list causes all the values remaining after +the required and &optional parameters have been filled in to be +processed in a particular way-either gathered into a list for a &rest +parameter or assigned to the appropriate &key parameters based on the +keywords. If both &rest and &key appear in a parameter list, then both +things happen-all the remaining values, which include the keywords +themselves, are gathered into a list that's bound to the &rest +parameter, and the appropriate values are also bound to the &key +parameters. So, given this function: + + (defun foo (&rest rest &key a b c) (list rest a b c)) + + you get this result: + + (foo :a 1 :b 2 :c 3) ==> ((:A 1 :B 2 :C 3) 1 2 3) + + ---------- Footnotes ---------- + + (1) Four standard functions take both &optional and &key +arguments-READ-FROM-STRING, PARSE-NAMESTRING, WRITE-LINE, and +WRITE-STRING. They were left that way during standardization for +backward compatibility with earlier Lisp dialects. READ-FROM-STRING +tends to be the one that catches new Lisp programmers most frequently-a +call such as (read-from-string s :start 10) seems to ignore the :start +keyword argument, reading from index 0 instead of 10. That's because +READ-FROM-STRING also has two &optional parameters that swallowed up the +arguments :start and 10. + + +File: pcl.info, Node: 5-7, Next: 5-8, Prev: 5-6, Up: Chapter 5 + +Function Return Values +====================== + +All the functions you've written so far have used the default behavior +of returning the value of the last expression evaluated as their own +return value. This is the most common way to return a value from a +function. + + However, sometimes it's convenient to be able to return from the +middle of a function such as when you want to break out of nested +control constructs. In such cases you can use the RETURN-FROM special +operator to immediately return any value from the function. + + You'll see in Chapter 20 that RETURN-FROM is actually not tied to +functions at all; it's used to return from a block of code defined with +the BLOCK special operator. However, DEFUN automatically wraps the +whole function body in a block with the same name as the function. So, +evaluating a RETURN-FROM with the name of the function and the value you +want to return will cause the function to immediately exit with that +value. RETURN-FROM is a special operator whose first "argument" is the +name of the block from which to return. This name isn't evaluated and +thus isn't quoted. + + The following function uses nested loops to find the first pair of +numbers, each less than 10, whose product is greater than the argument, +and it uses RETURN-FROM to return the pair as soon as it finds it: + + (defun foo (n) + (dotimes (i 10) + (dotimes (j 10) + (when (> (* i j) n) + (return-from foo (list i j)))))) + + Admittedly, having to specify the name of the function you're +returning from is a bit of a pain-for one thing, if you change the +function's name, you'll need to change the name used in the RETURN-FROM +as well. (1) But it's also the case that explicit RETURN-FROMs are used +much less frequently in Lisp than return statements in C-derived +languages, because all Lisp expressions, including control constructs +such as loops and conditionals, evaluate to a value. So it's not much +of a problem in practice. + + ---------- Footnotes ---------- + + (1) Another macro, RETURN, doesn't require a name. However, you +can't use it instead of RETURN-FROM to avoid having to specify the +function name; it's syntactic sugar for returning from a block named +NIL. I'll cover it, along with the details of BLOCK and RETURN-FROM, in +Chapter 20. + + +File: pcl.info, Node: 5-8, Next: 5-9, Prev: 5-7, Up: Chapter 5 + +Functions As Data, a.k.a. Higher-Order Functions +================================================ + +While the main way you use functions is to call them by name, a number +of situations exist where it's useful to be able treat functions as +data. For instance, if you can pass one function as an argument to +another, you can write a general-purpose sorting function while allowing +the caller to provide a function that's responsible for comparing any +two elements. Then the same underlying algorithm can be used with many +different comparison functions. Similarly, callbacks and hooks depend +on being able to store references to code in order to run it later. +Since functions are already the standard way to abstract bits of code, +it makes sense to allow functions to be treated as data. (1) + + In Lisp, functions are just another kind of object. When you define +a function with DEFUN, you're really doing two things: creating a new +function object and giving it a name. It's also possible, as you saw in +Chapter 3, to use LAMBDA expressions to create a function without giving +it a name. The actual representation of a function object, whether +named or anonymous, is opaque-in a native-compiling Lisp, it probably +consists mostly of machine code. The only things you need to know are +how to get hold of it and how to invoke it once you've got it. + + The special operator FUNCTION provides the mechanism for getting at a +function object. It takes a single argument and returns the function +with that name. The name isn't quoted. Thus, if you've defined a +function foo, like so: + + CL-USER> (defun foo (x) (* 2 x)) + FOO + + you can get the function object like this: (2) + + CL-USER> (function foo) + # + + In fact, you've already used FUNCTION, but it was in disguise. The +syntax #', which you used in Chapter 3, is syntactic sugar for FUNCTION, +just the way ' is syntactic sugar for QUOTE. (3) Thus, you can also get +the function object for foo like this: + + CL-USER> #'foo + # + + Once you've got the function object, there's really only one thing +you can do with it-invoke it. Common Lisp provides two functions for +invoking a function through a function object: FUNCALL and APPLY. (4) +They differ only in how they obtain the arguments to pass to the +function. + + FUNCALL is the one to use when you know the number of arguments +you're going to pass to the function at the time you write the code. +The first argument to FUNCALL is the function object to be invoked, and +the rest of the arguments are passed onto that function. Thus, the +following two expressions are equivalent: + + (foo 1 2 3) === (funcall #'foo 1 2 3) + + However, there's little point in using FUNCALL to call a function +whose name you know when you write the code. In fact, the previous two +expressions will quite likely compile to exactly the same machine +instructions. + + The following function demonstrates a more apt use of FUNCALL. It +accepts a function object as an argument and plots a simple ASCII-art +histogram of the values returned by the argument function when it's +invoked on the values from min to max, stepping by step. + + (defun plot (fn min max step) + (loop for i from min to max by step do + (loop repeat (funcall fn i) do (format t "*")) + (format t "~%"))) + + The FUNCALL expression computes the value of the function for each +value of i. The inner LOOP uses that computed value to determine how +many times to print an asterisk to standard output. + + Note that you don't use FUNCTION or #' to get the function value of +fn; you want it to be interpreted as a variable because it's the +variable's value that will be the function object. You can call plot +with any function that takes a single numeric argument, such as the +built-in function EXP that returns the value of e raised to the power of +its argument. + + CL-USER> (plot #'exp 0 4 1/2) + * + * + ** + **** + ******* + ************ + ******************** + ********************************* + ****************************************************** + NIL + + FUNCALL, however, doesn't do you any good when the argument list is +known only at runtime. For instance, to stick with the plot function +for another moment, suppose you've obtained a list containing a function +object, a minimum and maximum value, and a step value. In other words, +the list contains the values you want to pass as arguments to plot. +Suppose this list is in the variable plot-data. You could invoke plot +on the values in that list like this: + + (plot (first plot-data) (second plot-data) (third plot-data) (fourth plot-data)) + + This works fine, but it's pretty annoying to have to explicitly +unpack the arguments just so you can pass them to plot. + + That's where APPLY comes in. Like FUNCALL, the first argument to +APPLY is a function object. But after the function object, instead of +individual arguments, it expects a list. It then applies the function +to the values in the list. This allows you to write the following +instead: + + (apply #'plot plot-data) + + As a further convenience, APPLY can also accept "loose" arguments as +long as the last argument is a list. Thus, if plot-data contained just +the min, max, and step values, you could still use APPLY like this to +plot the EXP function over that range: + + (apply #'plot #'exp plot-data) + + APPLY doesn't care about whether the function being applied takes +&optional, &rest, or &key arguments-the argument list produced by +combining any loose arguments with the final list must be a legal +argument list for the function with enough arguments for all the +required parameters and only appropriate keyword parameters. + + ---------- Footnotes ---------- + + (1) Lisp, of course, isn't the only language to treat functions as +data. C uses function pointers, Perl uses subroutine references, Python +uses a scheme similar to Lisp, and C# introduces delegates, essentially +typed function pointers, as an improvement over Java's rather clunky +reflection and anonymous class mechanisms. + + (2) The exact printed representation of a function object will differ +from implementation to implementation. + + (3) The best way to think of FUNCTION is as a special kind of +quotation. QUOTEing a symbol prevents it from being evaluated at all, +resulting in the symbol itself rather than the value of the variable +named by that symbol. FUNCTION also circumvents the normal evaluation +rule but, instead of preventing the symbol from being evaluated at all, +causes it to be evaluated as the name of a function, just the way it +would if it were used as the function name in a function call +expression. + + (4) There's actually a third, the special operator +MULTIPLE-VALUE-CALL, but I'll save that for when I discuss expressions +that return multiple values in Chapter 20. + + +File: pcl.info, Node: 5-9, Next: Chapter 6, Prev: 5-7, Up: Chapter 5 + +Anonymous Functions +=================== + +Once you start writing, or even simply using, functions that accept +other functions as arguments, you're bound to discover that sometimes +it's annoying to have to define and name a whole separate function +that's used in only one place, especially when you never call it by +name. + + When it seems like overkill to define a new function with DEFUN, you +can create an "anonymous" function using a LAMBDA expression. As +discussed in Chapter 3, a LAMBDA expression looks like this: + + (lambda (parameters) body) + + One way to think of LAMBDA expressions is as a special kind of +function name where the name itself directly describes what the function +does. This explains why you can use a LAMBDA expression in the place of +a function name with #'. + + (funcall #'(lambda (x y) (+ x y)) 2 3) ==> 5 + + You can even use a LAMBDA expression as the "name" of a function in a +function call expression. If you wanted, you could write the previous +FUNCALL expression more concisely. + + ((lambda (x y) (+ x y)) 2 3) ==> 5 + + But this is almost never done; it's merely worth noting that it's +legal in order to emphasize that LAMBDA expressions can be used anywhere +a normal function name can be. (1) + + Anonymous functions can be useful when you need to pass a function as +an argument to another function and the function you need to pass is +simple enough to express inline. For instance, suppose you wanted to +plot the function 2x. You could define the following function: + + (defun double (x) (* 2 x)) + + which you could then pass to plot. + + CL-USER> (plot #'double 0 10 1) + + ** + **** + ****** + ******** + ********** + ************ + ************** + **************** + ****************** + ******************** + NIL + + But it's easier, and arguably clearer, to write this: + + CL-USER> (plot #'(lambda (x) (* 2 x)) 0 10 1) + + ** + **** + ****** + ******** + ********** + ************ + ************** + **************** + ****************** + ******************** + NIL + + The other important use of LAMBDA expressions is in making closures, +functions that capture part of the environment where they're created. +You used closures a bit in Chapter 3, but the details of how closures +work and what they're used for is really more about how variables work +than functions, so I'll save that discussion for the next chapter. + + ---------- Footnotes ---------- + + (1) In Common Lisp it's also possible to use a LAMBDA expression as +an argument to FUNCALL (or some other function that takes a function +argument such as SORT or MAPCAR) with no #' before it, like this: + + (funcall (lambda (x y) (+ x y)) 2 3) + + This is legal and is equivalent to the version with the #' but for a +tricky reason. Historically LAMBDA expressions by themselves weren't +expressions that could be evaluated. That is LAMBDA wasn't the name of +a function, macro, or special operator. Rather, a list starting with +the symbol LAMBDA was a special syntactic construct that Lisp recognized +as a kind of function name. + + But if that were still true, then (funcall (lambda (...) ...)) +would be illegal because FUNCALL is a function and the normal evaluation +rule for a function call would require that the LAMBDA expression be +evaluated. However, late in the ANSI standardization process, in order +to make it possible to implement ISLISP, another Lisp dialect being +standardized at the same time, strictly as a user-level compatibility +layer on top of Common Lisp, a LAMBDA macro was defined that expands +into a call to FUNCTION wrapped around the LAMBDA expression. In other +words, the following LAMBDA expression: + + (lambda () 42) + + expands into the following when it occurs in a context where it +evaluated: + + (function (lambda () 42)) ; or #'(lambda () 42) + + This makes its use in a value position, such as an argument to +FUNCALL, legal. In other words, it's pure syntactic sugar. Most folks +either always use #' before LAMBDA expressions in value positions or +never do. In this book, I always use #'. + + +File: pcl.info, Node: Chapter 6, Next: Chapter 7, Prev: Chapter 5, Up: Top + +6. Variables +============ + +The next basic building block we need to look at are variables. Common +Lisp supports two kinds of variables: lexical and dynamic. (1) These +two types correspond roughly to "local" and "global" variables in other +languages. However, the correspondence is only approximate. On one +hand, some languages' "local" variables are in fact much like Common +Lisp's dynamic variables. (2) And on the other, some languages' local +variables are lexically scoped without providing all the capabilities +provided by Common Lisp's lexical variables. In particular, not all +languages that provide lexically scoped variables support closures. + + To make matters a bit more confusing, many of the forms that deal +with variables can be used with both lexical and dynamic variables. So +I'll start by discussing a few aspects of Lisp's variables that apply to +both kinds and then cover the specific characteristics of lexical and +dynamic variables. Then I'll discuss Common Lisp's general-purpose +assignment operator, SETF, which is used to assign new values to +variables and just about every other place that can hold a value. + +* Menu: + +* 6-1:: Variable Basics +* 6-2:: Lexical Variables and Closures +* 6-3:: Dynamic, a.k.a. Special, Variables +* 6-4:: Constants +* 6-5:: Assignment +* 6-6:: Generalized Assignment +* 6-7:: Other Ways to Modify Places + + ---------- Footnotes ---------- + + (1) Dynamic variables are also sometimes called special variables for +reasons you'll see later in this chapter. It's important to be aware of +this synonym, as some folks (and Lisp implementations) use one term +while others use the other. + + (2) Early Lisps tended to use dynamic variables for local variables, +at least when interpreted. Elisp, the Lisp dialect used in Emacs, is a +bit of a throwback in this respect, continuing to support only dynamic +variables. Other languages have recapitulated this transition from +dynamic to lexical variables-Perl's local variables, for instance, are +dynamic while its my variables, introduced in Perl 5, are lexical. +Python never had true dynamic variables but only introduced true lexical +scoping in version 2.2. (Python's lexical variables are still somewhat +limited compared to Lisp's because of the conflation of assignment and +binding in the language's syntax.) + + +File: pcl.info, Node: 6-1, Next: 6-2, Prev: Chapter 6, Up: Chapter 6 + +Variable Basics +=============== + +As in other languages, in Common Lisp variables are named places that +can hold a value. However, in Common Lisp, variables aren't typed the +way they are in languages such as Java or C++. That is, you don't need +to declare the type of object that each variable can hold. Instead, a +variable can hold values of any type and the values carry type +information that can be used to check types at runtime. Thus, Common +Lisp is dynamically typed-type errors are detected dynamically. For +instance, if you pass something other than a number to the + function, +Common Lisp will signal a type error. On the other hand, Common Lisp is +a strongly typed language in the sense that all type errors will be +detected-there's no way to treat an object as an instance of a class +that it's not. (1) + + All values in Common Lisp are, conceptually at least, references to +objects. (2) Consequently, assigning a variable a new value changes +what object the variable refers to but has no effect on the previously +referenced object. However, if a variable holds a reference to a +mutable object, you can use that reference to modify the object, and the +modification will be visible to any code that has a reference to the +same object. + + One way to introduce new variables you've already used is to define +function parameters. As you saw in the previous chapter, when you +define a function with DEFUN, the parameter list defines the variables +that will hold the function's arguments when it's called. For example, +this function defines three variables-x, y, and z-to hold its arguments. + + (defun foo (x y z) (+ x y z)) + + Each time a function is called, Lisp creates new bindings to hold the +arguments passed by the function's caller. A binding is the runtime +manifestation of a variable. A single variable-the thing you can point +to in the program's source code-can have many different bindings during +a run of the program. A single variable can even have multiple bindings +at the same time; parameters to a recursive function, for example, are +rebound for each call to the function. + + As with all Common Lisp variables, function parameters hold object +references. (3) Thus, you can assign a new value to a function +parameter within the body of the function, and it will not affect the +bindings created for another call to the same function. But if the +object passed to a function is mutable and you change it in the +function, the changes will be visible to the caller since both the +caller and the callee will be referencing the same object. + + Another form that introduces new variables is the LET special +operator. The skeleton of a LET form looks like this: + + (let (variable*) + body-form*) + + where each variable is a variable initialization form. Each +initialization form is either a list containing a variable name and an +initial value form or-as a shorthand for initializing the variable to +NIL-a plain variable name. The following LET form, for example, binds +the three variables x, y, and z with initial values 10, 20, and NIL: + + (let ((x 10) (y 20) z) + ...) + + When the LET form is evaluated, all the initial value forms are first +evaluated. Then new bindings are created and initialized to the +appropriate initial values before the body forms are executed. Within +the body of the LET, the variable names refer to the newly created +bindings. After the LET, the names refer to whatever, if anything, they +referred to before the LET. + + The value of the last expression in the body is returned as the value +of the LET expression. Like function parameters, variables introduced +with LET are rebound each time the LET is entered. (4) + + The scope of function parameters and LET variables-the area of the +program where the variable name can be used to refer to the variable's +binding-is delimited by the form that introduces the variable. This +form-the function definition or the LET-is called the binding form. As +you'll see in a bit, the two types of variables-lexical and dynamic-use +two slightly different scoping mechanisms, but in both cases the scope +is delimited by the binding form. + + If you nest binding forms that introduce variables with the same +name, then the bindings of the innermost variable shadows the outer +bindings. For instance, when the following function is called, a +binding is created for the parameter x to hold the function's argument. +Then the first LET creates a new binding with the initial value 2, and +the inner LET creates yet another binding, this one with the initial +value 3. The bars on the right mark the scope of each binding. + + (defun foo (x) + (format t "Parameter: ~a~%" x) ; |<------ x is argument + (let ((x 2)) ; | + (format t "Outer LET: ~a~%" x) ; | |<---- x is 2 + (let ((x 3)) ; | | + (format t "Inner LET: ~a~%" x)) ; | | |<-- x is 3 + (format t "Outer LET: ~a~%" x)) ; | | + (format t "Parameter: ~a~%" x)) ; | + + Each reference to x will refer to the binding with the smallest +enclosing scope. Once control leaves the scope of one binding form, the +binding from the immediately enclosing scope is unshadowed and x refers +to it instead. Thus, calling foo results in this output: + + CL-USER> (foo 1) + Parameter: 1 + Outer LET: 2 + Inner LET: 3 + Outer LET: 2 + Parameter: 1 + NIL + + In future chapters I'll discuss other constructs that also serve as +binding forms-any construct that introduces a new variable name that's +usable only within the construct is a binding form. + + For instance, in Chapter 7 you'll meet the DOTIMES loop, a basic +counting loop. It introduces a variable that holds the value of a +counter that's incremented each time through the loop. The following +loop, for example, which prints the numbers from 0 to 9, binds the +variable x: + + (dotimes (x 10) (format t "~d " x)) + + Another binding form is a variant of LET, LET*. The difference is +that in a LET, the variable names can be used only in the body of the +LET-the part of the LET after the variables list-but in a LET*, the +initial value forms for each variable can refer to variables introduced +earlier in the variables list. Thus, you can write the following: + + (let* ((x 10) + (y (+ x 10))) + (list x y)) + + but not this: + + (let ((x 10) + (y (+ x 10))) + (list x y)) + + However, you could achieve the same result with nested LETs. + + (let ((x 10)) + (let ((y (+ x 10))) + (list x y))) + + ---------- Footnotes ---------- + + (1) Actually, it's not quite true to say that all type errors will +always be detected-it's possible to use optional declarations to tell +the compiler that certain variables will always contain objects of a +particular type and to turn off runtime type checking in certain regions +of code. However, declarations of this sort are used to optimize code +after it has been developed and debugged, not during normal development. + + (2) As an optimization certain kinds of objects, such as integers +below a certain size and characters, may be represented directly in +memory where other objects would be represented by a pointer to the +actual object. However, since integers and characters are immutable, it +doesn't matter that there may be multiple copies of "the same" object in +different variables. This is the root of the difference between EQ and +EQL discussed in Chapter 4. + + (3) In compiler-writer terms Common Lisp functions are +"pass-by-value." However, the values that are passed are references to +objects. This is similar to how Java and Python work. + + (4) The variables in LET forms and function parameters are created by +exactly the same mechanism. In fact, in some Lisp dialects-though not +Common Lisp-LET is simply a macro that expands into a call to an +anonymous function. That is, in those dialects, the following: + + (let ((x 10)) (format t "~a" x)) + + is a macro form that expands into this: + + ((lambda (x) (format t "~a" x)) 10) + + +File: pcl.info, Node: 6-2, Next: 6-3, Prev: 6-1, Up: Chapter 6 + +Lexical Variables and Closures +============================== + +By default all binding forms in Common Lisp introduce lexically scoped +variables. Lexically scoped variables can be referred to only by code +that's textually within the binding form. Lexical scoping should be +familiar to anyone who has programmed in Java, C, Perl, or Python since +they all provide lexically scoped "local" variables. For that matter, +Algol programmers should also feel right at home, as Algol first +introduced lexical scoping in the 1960s. + + However, Common Lisp's lexical variables are lexical variables with a +twist, at least compared to the original Algol model. The twist is +provided by the combination of lexical scoping with nested functions. +By the rules of lexical scoping, only code textually within the binding +form can refer to a lexical variable. But what happens when an +anonymous function contains a reference to a lexical variable from an +enclosing scope? For instance, in this expression: + + (let ((count 0)) #'(lambda () (setf count (1+ count)))) + + the reference to count inside the LAMBDA form should be legal +according to the rules of lexical scoping. Yet the anonymous function +containing the reference will be returned as the value of the LET form +and can be invoked, via FUNCALL, by code that's not in the scope of the +LET. So what happens? As it turns out, when count is a lexical +variable, it just works. The binding of count created when the flow of +control entered the LET form will stick around for as long as needed, in +this case for as long as someone holds onto a reference to the function +object returned by the LET form. The anonymous function is called a +closure because it "closes over" the binding created by the LET. + + The key thing to understand about closures is that it's the binding, +not the value of the variable, that's captured. Thus, a closure can not +only access the value of the variables it closes over but can also +assign new values that will persist between calls to the closure. For +instance, you can capture the closure created by the previous expression +in a global variable like this: + + (defparameter *fn* (let ((count 0)) #'(lambda () (setf count (1+ count))))) + + Then each time you invoke it, the value of count will increase by +one. + + CL-USER> (funcall *fn*) + 1 + CL-USER> (funcall *fn*) + 2 + CL-USER> (funcall *fn*) + 3 + + A single closure can close over many variable bindings simply by +referring to them. Or multiple closures can capture the same binding. +For instance, the following expression returns a list of three closures, +one that increments the value of the closed over count binding, one that +decrements it, and one that returns the current value: + + (let ((count 0)) + (list + #'(lambda () (incf count)) + #'(lambda () (decf count)) + #'(lambda () count))) + + +File: pcl.info, Node: 6-3, Next: 6-4, Prev: 6-2, Up: Chapter 6 + +Dynamic, a.k.a. Special, Variables +================================== + +Lexically scoped bindings help keep code understandable by limiting the +scope, literally, in which a given name has meaning. This is why most +modern languages use lexical scoping for local variables. Sometimes, +however, you really want a global variable-a variable that you can refer +to from anywhere in your program. While it's true that indiscriminate +use of global variables can turn code into spaghetti nearly as quickly +as unrestrained use of goto, global variables do have legitimate uses +and exist in one form or another in almost every programming language. +(1) And as you'll see in a moment, Lisp's version of global variables, +dynamic variables, are both more useful and more manageable. + + Common Lisp provides two ways to create global variables: DEFVAR and +DEFPARAMETER. Both forms take a variable name, an initial value, and an +optional documentation string. After it has been DEFVARed or +DEFPARAMETERed, the name can be used anywhere to refer to the current +binding of the global variable. As you've seen in previous chapters, +global variables are conventionally named with names that start and end +with *. You'll see later in this section why it's quite important to +follow that naming convention. Examples of DEFVAR and DEFPARAMETER look +like this: + + (defvar *count* 0 + "Count of widgets made so far.") + + (defparameter *gap-tolerance* 0.001 + "Tolerance to be allowed in widget gaps.") + + The difference between the two forms is that DEFPARAMETER always +assigns the initial value to the named variable while DEFVAR does so +only if the variable is undefined. A DEFVAR form can also be used with +no initial value to define a global variable without giving it a value. +Such a variable is said to be unbound. + + Practically speaking, you should use DEFVAR to define variables that +will contain data you'd want to keep even if you made a change to the +source code that uses the variable. For instance, suppose the two +variables defined previously are part of an application for controlling +a widget factory. It's appropriate to define the *count* variable with +DEFVAR because the number of widgets made so far isn't invalidated just +because you make some changes to the widget-making code. (2) + + On the other hand, the variable *gap-tolerance* presumably has some +effect on the behavior of the widget-making code itself. If you decide +you need a tighter or looser tolerance and change the value in the +DEFPARAMETER form, you'd like the change to take effect when you +recompile and reload the file. + + After defining a variable with DEFVAR or DEFPARAMETER, you can refer +to it from anywhere. For instance, you might define this function to +increment the count of widgets made: + + (defun increment-widget-count () (incf *count*)) + + The advantage of global variables is that you don't have to pass them +around. Most languages store the standard input and output streams in +global variables for exactly this reason-you never know when you're +going to want to print something to standard out, and you don't want +every function to have to accept and pass on arguments containing those +streams just in case someone further down the line needs them. + + However, once a value, such as the standard output stream, is stored +in a global variable and you have written code that references that +global variable, it's tempting to try to temporarily modify the behavior +of that code by changing the variable's value. + + For instance, suppose you're working on a program that contains some +low-level logging functions that print to the stream in the global +variable *standard-output*. Now suppose that in part of the program you +want to capture all the output generated by those functions into a file. +You might open a file and assign the resulting stream to +*standard-output*. Now the low-level functions will send their output +to the file. + + This works fine until you forget to set *standard-output* back to the +original stream when you're done. If you forget to reset +*standard-output*, all the other code in the program that uses +*standard-output* will also send its output to the file. (3) + + What you really want, it seems, is a way to wrap a piece of code in +something that says, "All code below here-all the functions it calls, +all the functions they call, and so on, down to the lowest-level +functions-should use this value for the global variable +*standard-output*." Then when the high-level function returns, the old +value of *standard-output* should be automatically restored. + + It turns out that that's exactly what Common Lisp's other kind of +variable-dynamic variables-let you do. When you bind a dynamic +variable-for example, with a LET variable or a function parameter-the +binding that's created on entry to the binding form replaces the global +binding for the duration of the binding form. Unlike a lexical binding, +which can be referenced by code only within the lexical scope of the +binding form, a dynamic binding can be referenced by any code that's +invoked during the execution of the binding form. (4) And it turns out +that all global variables are, in fact, dynamic variables. + + Thus, if you want to temporarily redefine *standard-output*, the way +to do it is simply to rebind it, say, with a LET. + + (let ((*standard-output* *some-other-stream*)) + (stuff)) + + In any code that runs as a result of the call to stuff, references to +*standard-output* will use the binding established by the LET. And when +stuff returns and control leaves the LET, the new binding of +*standard-output* will go away and subsequent references to +*standard-output* will see the binding that was current before the LET. +At any given time, the most recently established binding shadows all +other bindings. Conceptually, each new binding for a given dynamic +variable is pushed onto a stack of bindings for that variable, and +references to the variable always use the most recent binding. As +binding forms return, the bindings they created are popped off the +stack, exposing previous bindings. (5) + + A simple example shows how this works. + + (defvar *x* 10) + (defun foo () (format t "X: ~d~%" *x*)) + + The DEFVAR creates a global binding for the variable *x* with the +value 10. The reference to *x* in foo will look up the current binding +dynamically. If you call foo from the top level, the global binding +created by the DEFVAR is the only binding available, so it prints 10. + + CL-USER> (foo) + X: 10 + NIL + + But you can use LET to create a new binding that temporarily shadows +the global binding, and foo will print a different value. + + CL-USER> (let ((*x* 20)) (foo)) + X: 20 + NIL + + Now call foo again, with no LET, and it again sees the global +binding. + + CL-USER> (foo) + X: 10 + NIL + + Now define another function. + + (defun bar () + (foo) + (let ((*x* 20)) (foo)) + (foo)) + + Note that the middle call to foo is wrapped in a LET that binds *x* +to the new value 20. When you run bar, you get this result: + + CL-USER> (bar) + X: 10 + X: 20 + X: 10 + NIL + + As you can see, the first call to foo sees the global binding, with +its value of 10. The middle call, however, sees the new binding, with +the value 20. But after the LET, foo once again sees the global +binding. + + As with lexical bindings, assigning a new value affects only the +current binding. To see this, you can redefine foo to include an +assignment to *x*. + + (defun foo () + (format t "Before assignment~18tX: ~d~%" *x*) + (setf *x* (+ 1 *x*)) + (format t "After assignment~18tX: ~d~%" *x*)) + + Now foo prints the value of *x*, increments it, and prints it again. +If you just run foo, you'll see this: + + CL-USER> (foo) + Before assignment X: 10 + After assignment X: 11 + NIL + + Not too surprising. Now run bar. + + CL-USER> (bar) + Before assignment X: 11 + After assignment X: 12 + Before assignment X: 20 + After assignment X: 21 + Before assignment X: 12 + After assignment X: 13 + NIL + + Notice that *x* started at 11-the earlier call to foo really did +change the global value. The first call to foo from bar increments the +global binding to 12. The middle call doesn't see the global binding +because of the LET. Then the last call can see the global binding again +and increments it from 12 to 13. + + So how does this work? How does LET know that when it binds *x* it's +supposed to create a dynamic binding rather than a normal lexical +binding? It knows because the name has been declared special. (6) The +name of every variable defined with DEFVAR and DEFPARAMETER is +automatically declared globally special. This means whenever you use +such a name in a binding form-in a LET or as a function parameter or any +other construct that creates a new variable binding-the binding that's +created will be a dynamic binding. This is why the *naming* +*convention* is so important-it'd be bad news if you used a name for +what you thought was a lexical variable and that variable happened to be +globally special. On the one hand, code you call could change the value +of the binding out from under you; on the other, you might be shadowing +a binding established by code higher up on the stack. If you always +name global variables according to the * naming convention, you'll never +accidentally use a dynamic binding where you intend to establish a +lexical binding. + + It's also possible to declare a name locally special. If, in a +binding form, you declare a name special, then the binding created for +that variable will be dynamic rather than lexical. Other code can +locally declare a name special in order to refer to the dynamic binding. +However, locally special variables are relatively rare, so you needn't +worry about them. (7) + + Dynamic bindings make global variables much more manageable, but it's +important to notice they still allow action at a distance. Binding a +global variable has two at a distance effects-it can change the behavior +of downstream code, and it also opens the possibility that downstream +code will assign a new value to a binding established higher up on the +stack. You should use dynamic variables only when you need to take +advantage of one or both of these characteristics. + + ---------- Footnotes ---------- + + (1) Java disguises global variables as public static fields, C uses +extern variables, and Python's module-level and Perl's package-level +variables can likewise be accessed from anywhere. + + (2) If you specifically want to reset a DEFVARed variable, you can +either set it directly with SETF or make it unbound using MAKUNBOUND and +then reevaluate the DEFVAR form. + + (3) The strategy of temporarily reassigning *standard-output* also +breaks if the system is multithreaded-if there are multiple threads of +control trying to print to different streams at the same time, they'll +all try to set the global variable to the stream they want to use, +stomping all over each other. You could use a lock to control access to +the global variable, but then you're not really getting the benefit of +multiple concurrent threads, since whatever thread is printing has to +lock out all the other threads until it's done even if they want to +print to a different stream. + + (4) The technical term for the interval during which references may +be made to a binding is its extent. Thus, scope and extent are +complementary notions-scope refers to space while extent refers to time. +Lexical variables have lexical scope but indefinite extent, meaning they +stick around for an indefinite interval, determined by how long they're +needed. Dynamic variables, by contrast, have indefinite scope since +they can be referred to from anywhere but dynamic extent. To further +confuse matters, the combination of indefinite scope and dynamic extent +is frequently referred to by the misnomer dynamic scope. + + (5) Though the standard doesn't specify how to incorporate +multithreading into Common Lisp, implementations that provide +multithreading follow the practice established on the Lisp machines and +create dynamic bindings on a per-thread basis. A reference to a global +variable will find the binding most recently established in the current +thread, or the global binding. + + (6) This is why dynamic variables are also sometimes called special +variables. + + (7) If you must know, you can look up DECLARE, SPECIAL, and LOCALLY +in the HyperSpec. + + +File: pcl.info, Node: 6-4, Next: 6-5, Prev: 6-3, Up: Chapter 6 + +Constants +========= + +One other kind of variable I haven't mentioned at all is the oxymoronic +"constant variable." All constants are global and are defined with +DEFCONSTANT. The basic form of DEFCONSTANT is like DEFPARAMETER. + + (defconstant name initial-value-form [ documentation-string ]) + + As with DEFVAR and DEFPARAMETER, DEFCONSTANT has a global effect on +the name used-thereafter the name can be used only to refer to the +constant; it can't be used as a function parameter or rebound with any +other binding form. Thus, many Lisp programmers follow a naming +convention of using names starting and ending with + for constants. +This convention is somewhat less universally followed than the *-naming +convention for globally special names but is a good idea for the same +reason. (1) + + Another thing to note about DEFCONSTANT is that while the language +allows you to redefine a constant by reevaluating a DEFCONSTANT with a +different initial-value-form, what exactly happens after the +redefinition isn't defined. In practice, most implementations will +require you to reevaluate any code that refers to the constant in order +to see the new value since the old value may well have been inlined. +Consequently, it's a good idea to use DEFCONSTANT only to define things +that are really constant, such as the value of NIL. For things you might +ever want to change, you should use DEFPARAMETER instead. + + ---------- Footnotes ---------- + + (1) Several key constants defined by the language itself don't follow +this convention-not least of which are T and NIL. This is occasionally +annoying when one wants to use t as a local variable name. Another is +PI, which holds the best long-float approximation of the mathematical +constant pi. + + +File: pcl.info, Node: 6-5, Next: 6-6, Prev: 6-4, Up: Chapter 6 + +Assignment +========== + +Once you've created a binding, you can do two things with it: get the +current value and set it to a new value. As you saw in Chapter 4, a +symbol evaluates to the value of the variable it names, so you can get +the current value simply by referring to the variable. To assign a new +value to a binding, you use the SETF macro, Common Lisp's +general-purpose assignment operator. The basic form of SETF is as +follows: + + (setf place value) + + Because SETF is a macro, it can examine the form of the place it's +assigning to and expand into appropriate lower-level operations to +manipulate that place. When the place is a variable, it expands into a +call to the special operator SETQ, which, as a special operator, has +access to both lexical and dynamic bindings. (1) For instance, to +assign the value 10 to the variable x, you can write this: + + (setf x 10) + + As I discussed earlier, assigning a new value to a binding has no +effect on any other bindings of that variable. And it doesn't have any +effect on the value that was stored in the binding prior to the +assignment. Thus, the SETF in this function: + + (defun foo (x) (setf x 10)) + + will have no effect on any value outside of foo. The binding that +was created when foo was called is set to 10, immediately replacing +whatever value was passed as an argument. In particular, a form such as +the following: + + (let ((y 20)) + (foo y) + (print y)) + + will print 20, not 10, as it's the value of y that's passed to foo +where it's briefly the value of the variable x before the SETF gives x a +new value. + + SETF can also assign to multiple places in sequence. For instance, +instead of the following: + + (setf x 1) + (setf y 2) + + you can write this: + + (setf x 1 y 2) + + SETF returns the newly assigned value, so you can also nest calls to +SETF as in the following expression, which assigns both x and y the same +random value: + + (setf x (setf y (random 10))) + + ---------- Footnotes ---------- + + (1) Some old-school Lispers prefer to use SETQ with variables, but +modern style tends to use SETF for all assignments. + + +File: pcl.info, Node: 6-6, Next: 6-7, Prev: 6-5, Up: Chapter 6 + +Generalized Assignment +====================== + +Variable bindings, of course, aren't the only places that can hold +values. Common Lisp supports composite data structures such as arrays, +hash tables, and lists, as well as user-defined data structures, all of +which consist of multiple places that can each hold a value. + + I'll cover those data structures in future chapters, but while we're +on the topic of assignment, you should note that SETF can assign any +place a value. As I cover the different composite data structures, I'll +point out which functions can serve as "SETFable places." The short +version, however, is if you need to assign a value to a place, SETF is +almost certainly the tool to use. It's even possible to extend SETF to +allow it to assign to user-defined places though I won't cover that. +(1) + + In this regard SETF is no different from the = assignment operator in +most C-derived languages. In those languages, the = operator assigns +new values to variables, array elements, and fields of classes. In +languages such as Perl and Python that support hash tables as a built-in +data type, = can also set the values of individual hash table entries. +Table 6-1 summarizes the various ways = is used in those languages. + + Table 6-1. Assignment with = in Other Languages + +[image src="tables/6-1.png"] + + SETF works the same way-the first "argument" to SETF is a place to +store the value, and the second argument provides the value. As with +the = operator in these languages, you use the same form to express the +place as you'd normally use to fetch the value. (2) Thus, the Lisp +equivalents of the assignments in Table 6-1-given that AREF is the array +access function, GETHASH does a hash table lookup, and field might be a +function that accesses a slot named field of a user-defined object-are +as follows: + + Simple variable: (setf x 10) + Array: (setf (aref a 0) 10) + Hash table: (setf (gethash 'key hash) 10) + Slot named 'field': (setf (field o) 10) + + Note that SETFing a place that's part of a larger object has the same +semantics as SETFing a variable: the place is modified without any +effect on the object that was previously stored in the place. Again, +this is similar to how = behaves in Java, Perl, and Python. (3) + + ---------- Footnotes ---------- + + (1) Look up DEFSETF, DEFINE-SETF-EXPANDER for more information. + + (2) The prevalence of Algol-derived syntax for assignment with the +"place" on the left side of the = and the new value on the right side +has spawned the terminology lvalue, short for "left value," meaning +something that can be assigned to, and rvalue, meaning something that +provides a value. A compiler hacker would say, "SETF treats its first +argument as an lvalue." + + (3) C programmers may want to think of variables and other places as +holding a pointer to the real object; assigning to a variable simply +changes what object it points to while assigning to a part of a +composite object is similar to indirecting through the pointer to the +actual object. C++ programmers should note that the behavior of = in +C++ when dealing with objects-namely, a memberwise copy-is quite +idiosyncratic. + + +File: pcl.info, Node: 6-7, Next: Chapter 7, Prev: 6-6, Up: Chapter 6 + +Other Ways to Modify Places +=========================== + +While all assignments can be expressed with SETF, certain patterns +involving assigning a new value based on the current value are +sufficiently common to warrant their own operators. For instance, while +you could increment a number with SETF, like this: + + (setf x (+ x 1)) + + or decrement it with this: + + (setf x (- x 1)) + + it's a bit tedious, compared to the C-style ++x and -x. Instead, you +can use the macros INCF and DECF, which increment and decrement a place +by a certain amount that defaults to 1. + + (incf x) === (setf x (+ x 1)) + (decf x) === (setf x (- x 1)) + (incf x 10) === (setf x (+ x 10)) + + INCF and DECF are examples of a kind of macro called modify macros. +Modify macros are macros built on top of SETF that modify places by +assigning a new value based on the current value of the place. The main +benefit of modify macros is that they're more concise than the same +modification written out using SETF. Additionally, modify macros are +defined in a way that makes them safe to use with places where the place +expression must be evaluated only once. A silly example is this +expression, which increments the value of an arbitrary element of an +array: + + (incf (aref *array* (random (length *array*)))) + + A naive translation of that into a SETF expression might look like +this: + + (setf (aref *array* (random (length *array*))) + (1+ (aref *array* (random (length *array*))))) + + However, that doesn't work because the two calls to RANDOM won't +necessarily return the same value-this expression will likely grab the +value of one element of the array, increment it, and then store it back +as the new value of a different element. The INCF expression, however, +does the right thing because it knows how to take apart this expression: + + (aref *array* (random (length *array*))) + + to pull out the parts that could possibly have side effects to make +sure they're evaluated only once. In this case, it would probably +expand into something more or less equivalent to this: + + (let ((tmp (random (length *array*)))) + (setf (aref *array* tmp) (1+ (aref *array* tmp)))) + + In general, modify macros are guaranteed to evaluate both their +arguments and the subforms of the place form exactly once each, in +left-to-right order. + + The macro PUSH, which you used in the mini-database to add elements +to the *db* variable, is another modify macro. You'll take a closer +look at how it and its counterparts POP and PUSHNEW work in Chapter 12 +when I talk about how lists are represented in Lisp. + + Finally, two slightly esoteric but useful modify macros are ROTATEF +and SHIFTF. ROTATEF rotates values between places. For instance, if you +have two variables, a and b, this call: + + (rotatef a b) + + swaps the values of the two variables and returns NIL. Since a and b +are variables and you don't have to worry about side effects, the +previous ROTATEF expression is equivalent to this: + + (let ((tmp a)) (setf a b b tmp) nil) + + With other kinds of places, the equivalent expression using SETF +would be quite a bit more complex. + + SHIFTF is similar except instead of rotating values it shifts them to +the left-the last argument provides a value that's moved to the +second-to-last argument while the rest of the values are moved one to +the left. The original value of the first argument is simply returned. +Thus, the following: + + (shiftf a b 10) + + is equivalent-again, since you don't have to worry about side +effects-to this: + + (let ((tmp a)) (setf a b b 10) tmp) + + Both ROTATEF and SHIFTF can be used with any number of arguments and, +like all modify macros, are guaranteed to evaluate them exactly once, in +left to right order. + + With the basics of Common Lisp's functions and variables under your +belt, now you're ready to move onto the feature that continues to +differentiate Lisp from other languages: macros. + + +File: pcl.info, Node: Chapter 7, Next: Chapter 8, Prev: Chapter 6, Up: Top + +7. Macros: Standard Control Constructs +====================================== + +While many of the ideas that originated in Lisp, from the conditional +expression to garbage collection, have been incorporated into other +languages, the one language feature that continues to set Common Lisp +apart is its macro system. Unfortunately, the word macro describes a +lot of things in computing to which Common Lisp's macros bear only a +vague and metaphorical similarity. This causes no end of +misunderstanding when Lispers try to explain to non-Lispers what a great +feature macros are. (1) To understand Lisp's macros, you really need to +come at them fresh, without preconceptions based on other things that +also happen to be called macros. So let's start our discussion of +Lisp's macros by taking a step back and looking at various ways +languages support extensibility. + + All programmers should be used to the idea that the definition of a +language can include a standard library of functionality that's +implemented in terms of the "core" language-functionality that could +have been implemented by any programmer on top of the language if it +hadn't been defined as part of the standard library. C's standard +library, for instance, can be implemented almost entirely in portable C. +Similarly, most of the ever-growing set of classes and interfaces that +ship with Java's standard Java Development Kit (JDK) are written in +"pure" Java. + + One advantage of defining languages in terms of a core plus a +standard library is it makes them easier to understand and implement. +But the real benefit is in terms of expressiveness-since much of what +you think of as "the language" is really just a library-the language is +easy to extend. If C doesn't have a function to do some thing or +another that you need, you can write that function, and now you have a +slightly richer version of C. Similarly, in a language such as Java or +Smalltalk where almost all the interesting parts of the "language" are +defined in terms of classes, by defining new classes you extend the +language, making it more suited for writing programs to do whatever it +is you're trying to do. + + While Common Lisp supports both these methods of extending the +language, macros give Common Lisp yet another way. As I discussed +briefly in Chapter 4, each macro defines its own syntax, determining how +the s-expressions it's passed are turned into Lisp forms. With macros +as part of the core language it's possible to build new syntax-control +constructs such as WHEN, DOLIST, and LOOP as well as definitional forms +such as DEFUN and DEFPARAMETER-as part of the "standard library" rather +than having to hardwire them into the core. This has implications for +how the language itself is implemented, but as a Lisp programmer you'll +care more that it gives you another way to extend the language, making +it a better language for expressing solutions to your particular +programming problems. + + Now, it may seem that the benefits of having another way to extend +the language would be easy to recognize. But for some reason a lot of +folks who haven't actually used Lisp macros-folks who think nothing of +spending their days creating new functional abstractions or defining +hierarchies of classes to solve their programming problems-get spooked +by the idea of being able to define new syntactic abstractions. The +most common cause of macrophobia seems to be bad experiences with other +"macro" systems. Simple fear of the unknown no doubt plays a role, too. +To avoid triggering any macrophobic reactions, I'll ease into the +subject by discussing several of the standard control-construct macros +defined by Common Lisp. These are some of the things that, if Lisp +didn't have macros, would have to be built into the language core. When +you use them, you don't have to care that they're implemented as macros, +but they provide a good example of some of the things you can do with +macros. (2) In the next chapter, I'll show you how you can define your +own macros. + +* Menu: + +* 7-1:: WHEN and UNLESS +* 7-2:: COND +* 7-3:: AND, OR, and NOT +* 7-4:: Looping +* 7-5:: DOLIST and DOTIMES +* 7-6:: DO +* 7-7:: The Mighty LOOP + + ---------- Footnotes ---------- + + (1) To see what this misunderstanding looks like, find any longish +Usenet thread cross-posted between comp.lang.lisp and any other +comp.lang.* group with macro in the subject. A rough paraphrase goes +like this: + + Lispnik: "Lisp is the best because of its macros!"; + + Othernik: "You think Lisp is good because of macros?! But macros are +horrible and evil; Lisp must be horrible and evil." + + (2) Another important class of language constructs that are defined +using macros are all the definitional constructs such as DEFUN, +DEFPARAMETER, DEFVAR, and others. In Chapter 24 you'll define your own +definitional macros that will allow you to concisely write code for +reading and writing binary data. + + +File: pcl.info, Node: 7-1, Next: 7-2, Prev: Chapter 7, Up: Chapter 7 + +WHEN and UNLESS +=============== + +As you've already seen, the most basic form of conditional execution-if +x, do y; otherwise do z-is provided by the IF special operator, which +has this basic form: + + (if condition then-form [else-form]) + + The condition is evaluated and, if its value is non-NIL, the +then-form is evaluated and the resulting value returned. Otherwise, the +else-form, if any, is evaluated and its value returned. If condition is +NIL and there's no else-form, then the IF returns NIL. + + (if (> 2 3) "Yup" "Nope") ==> "Nope" + (if (> 2 3) "Yup") ==> NIL + (if (> 3 2) "Yup" "Nope") ==> "Yup" + + However, IF isn't actually such a great syntactic construct because +the then-form and else-form are each restricted to being a single Lisp +form. This means if you want to perform a sequence of actions in either +clause, you need to wrap them in some other syntax. For instance, +suppose in the middle of a spam-filtering program you wanted to both +file a message as spam and update the spam database when a message is +spam. You can't write this: + + (if (spam-p current-message) + (file-in-spam-folder current-message) + (update-spam-database current-message)) + + because the call to update-spam-database will be treated as the else +clause, not as part of the then clause. Another special operator, +PROGN, executes any number of forms in order and returns the value of +the last form. So you could get the desired behavior by writing the +following: + + (if (spam-p current-message) + (progn + (file-in-spam-folder current-message) + (update-spam-database current-message))) + + That's not too horrible. But given the number of times you'll likely +have to use this idiom, it's not hard to imagine that you'd get tired of +it after a while. "Why," you might ask yourself, "doesn't Lisp provide +a way to say what I really want, namely, 'When x is true, do this, that, +and the other thing'?" In other words, after a while you'd notice the +pattern of an IF plus a PROGN and wish for a way to abstract away the +details rather than writing them out every time. + + This is exactly what macros provide. In this case, Common Lisp comes +with a standard macro, WHEN, which lets you write this: + + (when (spam-p current-message) + (file-in-spam-folder current-message) + (update-spam-database current-message)) + + But if it wasn't built into the standard library, you could define +WHEN yourself with a macro such as this, using the backquote notation I +discussed in Chapter 3: (1) + + (defmacro when (condition &rest body) + `(if ,condition (progn ,@body))) + + A counterpart to the WHEN macro is UNLESS, which reverses the +condition, evaluating its body forms only if the condition is false. In +other words: + + (defmacro unless (condition &rest body) + `(if (not ,condition) (progn ,@body))) + + Admittedly, these are pretty trivial macros. There's no deep black +magic here; they just abstract away a few language-level bookkeeping +details, allowing you to express your true intent a bit more clearly. +But their very triviality makes an important point: because the macro +system is built right into the language, you can write trivial macros +like WHEN and UNLESS that give you small but real gains in clarity that +are then multiplied by the thousands of times you use them. In Chapters +24, 26, and 31 you'll see how macros can also be used on a larger scale, +creating whole domain-specific embedded languages. But first let's +finish our discussion of the standard control-construct macros. + + ---------- Footnotes ---------- + + (1) You can't actually feed this definition to Lisp because it's +illegal to redefine names in the COMMON-LISP package where WHEN comes +from. If you really want to try writing such a macro, you'd need to +change the name to something else, such as my-when. + + +File: pcl.info, Node: 7-2, Next: 7-3, Prev: 7-1, Up: Chapter 7 + +COND +==== + +Another time raw IF expressions can get ugly is when you have a +multibranch conditional: if a do x, else if b do y; else do z. There's +no logical problem writing such a chain of conditional expressions with +just IF, but it's not pretty. + + (if a + (do-x) + (if b + (do-y) + (do-z))) + + And it would be even worse if you needed to include multiple forms in +the then clauses, requiring PROGNs. So, not surprisingly, Common Lisp +provides a macro for expressing multibranch conditionals: COND. This is +the basic skeleton: + + (cond + (test-1 form*) + . + . + . + (test-N form*)) + + Each element of the body represents one branch of the conditional and +consists of a list containing a condition form and zero or more forms to +be evaluated if that branch is chosen. The conditions are evaluated in +the order the branches appear in the body until one of them evaluates to +true. At that point, the remaining forms in that branch are evaluated, +and the value of the last form in the branch is returned as the value of +the COND as a whole. If the branch contains no forms after the +condition, the value of the condition is returned instead. By +convention, the branch representing the final else clause in an +if/else-if chain is written with a condition of T. Any non-NIL value +will work, but a T serves as a useful landmark when reading the code. +Thus, you can write the previous nested IF expression using COND like +this: + + (cond (a (do-x)) + (b (do-y)) + (t (do-z))) + + +File: pcl.info, Node: 7-3, Next: 7-4, Prev: 7-2, Up: Chapter 7 + +AND, OR, and NOT +================ + +When writing the conditions in IF, WHEN, UNLESS, and COND forms, three +operators that will come in handy are the boolean logic operators, AND, +OR, and NOT. + + NOT is a function so strictly speaking doesn't belong in this +chapter, but it's closely tied to AND and OR. It takes a single argument +and inverts its truth value, returning T if the argument is NIL and NIL +otherwise. + + AND and OR, however, are macros. They implement logical conjunction +and disjunction of any number of subforms and are defined as macros so +they can short-circuit. That is, they evaluate only as many of their +subforms-in left-to-right order-as necessary to determine the overall +truth value. Thus, AND stops and returns NIL as soon as one of its +subforms evaluates to NIL. If all the subforms evaluate to non-NIL, it +returns the value of the last subform. OR, on the other hand, stops as +soon as one of its subforms evaluates to non-NIL and returns the +resulting value. If none of the subforms evaluate to true, OR returns +NIL. Here are some examples: + + (not nil) ==> T + (not (= 1 1)) ==> NIL + (and (= 1 2) (= 3 3)) ==> NIL + (or (= 1 2) (= 3 3)) ==> T + + +File: pcl.info, Node: 7-4, Next: 7-5, Prev: 7-3, Up: Chapter 7 + +Looping +======= + +Control constructs are the other main kind of looping constructs. +Common Lisp's looping facilities are-in addition to being quite powerful +and flexible-an interesting lesson in the have-your-cake-and-eat-it-too +style of programming that macros provide. + + As it turns out, none of Lisp's 25 special operators directly support +structured looping. All of Lisp's looping control constructs are macros +built on top of a pair of special operators that provide a primitive +goto facility. (1) Like many good abstractions, syntactic or otherwise, +Lisp's looping macros are built as a set of layered abstractions +starting from the base provided by those two special operators. + + At the bottom (leaving aside the special operators) is a very general +looping construct, DO. While very powerful, DO suffers, as do many +general-purpose abstractions, from being overkill for simple situations. +So Lisp also provides two other macros, DOLIST and DOTIMES, that are +less flexible than DO but provide convenient support for the common +cases of looping over the elements of a list and counting loops. While +an implementation can implement these macros however it wants, they're +typically implemented as macros that expand into an equivalent DO loop. +Thus, DO provides a basic structured looping construct on top of the +underlying primitives provided by Common Lisp's special operators, and +DOLIST and DOTIMES provide two easier-to-use, if less general, +constructs. And, as you'll see in the next chapter, you can build your +own looping constructs on top of DO for situations where DOLIST and +DOTIMES don't meet your needs. + + Finally, the LOOP macro provides a full-blown mini-language for +expressing looping constructs in a non-Lispy, English-like (or at least +Algol-like) language. Some Lisp hackers love LOOP; others hate it. +LOOP's fans like it because it provides a concise way to express certain +commonly needed looping constructs. Its detractors dislike it because +it's not Lispy enough. But whichever side one comes down on, it's a +remarkable example of the power of macros to add new constructs to the +language. + + ---------- Footnotes ---------- + + (1) The special operators, if you must know, are TAGBODY and GO. +There's no need to discuss them now, but I'll cover them in Chapter 20. + + +File: pcl.info, Node: 7-5, Next: 7-6, Prev: 7-4, Up: Chapter 7 + +DOLIST and DOTIMES +================== + +I'll start with the easy-to-use DOLIST and DOTIMES macros. + + DOLIST loops across the items of a list, executing the loop body with +a variable holding the successive items of the list. (1) This is the +basic skeleton (leaving out some of the more esoteric options): + + (dolist (var list-form) + body-form*) + + When the loop starts, the list-form is evaluated once to produce a +list. Then the body of the loop is evaluated once for each item in the +list with the variable var holding the value of the item. For instance: + + CL-USER> (dolist (x '(1 2 3)) (print x)) + 1 + 2 + 3 + NIL + + Used this way, the DOLIST form as a whole evaluates to NIL. + + If you want to break out of a DOLIST loop before the end of the list, +you can use RETURN. + + CL-USER> (dolist (x '(1 2 3)) (print x) (if (evenp x) (return))) + 1 + 2 + NIL + + DOTIMES is the high-level looping construct for counting loops. The +basic template is much the same as DOLIST's. + + (dotimes (var count-form) + body-form*) + + The count-form must evaluate to an integer. Each time through the +loop var holds successive integers from 0 to one less than that number. +For instance: + + CL-USER> (dotimes (i 4) (print i)) + 0 + 1 + 2 + 3 + NIL + + As with DOLIST, you can use RETURN to break out of the loop early. + + Because the body of both DOLIST and DOTIMES loops can contain any +kind of expressions, you can also nest loops. For example, to print out +the times tables from 1 × 1 = 1 to 20 × 20 = 400, you can write this +pair of nested DOTIMES loops: + + (dotimes (x 20) + (dotimes (y 20) + (format t "~3d " (* (1+ x) (1+ y)))) + (format t "~%")) + + ---------- Footnotes ---------- + + (1) DOLIST is similar to Perl's foreach or Python's for. Java added +a similar kind of loop construct with the "enhanced" for loop in Java +1.5, as part of JSR-201. Notice what a difference macros make. A Lisp +programmer who notices a common pattern in their code can write a macro +to give themselves a source-level abstraction of that pattern. A Java +programmer who notices the same pattern has to convince Sun that this +particular abstraction is worth adding to the language. Then Sun has to +publish a JSR and convene an industry-wide "expert group" to hash +everything out. That process-according to Sun-takes an average of 18 +months. After that, the compiler writers all have to go upgrade their +compilers to support the new feature. And even once the Java +programmer's favorite compiler supports the new version of Java, they +probably still can't use the new feature until they're allowed to break +source compatibility with older versions of Java. So an annoyance that +Common Lisp programmers can resolve for themselves within five minutes +plagues Java programmers for years. + + +File: pcl.info, Node: 7-6, Next: 7-7, Prev: 7-5, Up: Chapter 7 + +DO +== + +While DOLIST and DOTIMES are convenient and easy to use, they aren't +flexible enough to use for all loops. For instance, what if you want to +step multiple variables in parallel? Or use an arbitrary expression to +test for the end of the loop? If neither DOLIST nor DOTIMES meet your +needs, you still have access to the more general DO loop. + + Where DOLIST and DOTIMES provide only one loop variable, DO lets you +bind any number of variables and gives you complete control over how +they change on each step through the loop. You also get to define the +test that determines when to end the loop and can provide a form to +evaluate at the end of the loop to generate a return value for the DO +expression as a whole. The basic template looks like this: + + (do (variable-definition*) + (end-test-form result-form*) + statement*) + + Each variable-definition introduces a variable that will be in scope +in the body of the loop. The full form of a single variable definition +is a list containing three elements. + + (var init-form step-form) + + The init-form will be evaluated at the beginning of the loop and the +resulting values bound to the variable var. Before each subsequent +iteration of the loop, the step-form will be evaluated and the new value +assigned to var. The step-form is optional; if it's left out, the +variable will keep its value from iteration to iteration unless you +explicitly assign it a new value in the loop body. As with the variable +definitions in a LET, if the init-form is left out, the variable is +bound to NIL. Also as with LET, you can use a plain variable name as +shorthand for a list containing just the name. + + At the beginning of each iteration, after all the loop variables have +been given their new values, the end-test-form is evaluated. As long as +it evaluates to NIL, the iteration proceeds, evaluating the statements +in order. + + When the end-test-form evaluates to true, the result-forms are +evaluated, and the value of the last result form is returned as the +value of the DO expression. + + At each step of the iteration the step forms for all the variables +are evaluated before assigning any of the values to the variables. This +means you can refer to any of the other loop variables in the step +forms. (1) That is, in a loop like this: + + (do ((n 0 (1+ n)) + (cur 0 next) + (next 1 (+ cur next))) + ((= 10 n) cur)) + + the step forms (1+ n), next, and (+ cur next) are all evaluated using +the old values of n, cur, and next. Only after all the step forms have +been evaluated are the variables given their new values. +(Mathematically inclined readers may notice that this is a particularly +efficient way of computing the eleventh Fibonacci number.) + + This example also illustrates another characteristic of DO-because +you can step multiple variables, you often don't need a body at all. +Other times, you may leave out the result form, particularly if you're +just using the loop as a control construct. This flexibility, however, +is the reason that DO expressions can be a bit cryptic. Where exactly +do all the parentheses go? The best way to understand a DO expression +is to keep in mind the basic template. + + (do (variable-definition*) + (end-test-form result-form*) + statement*) + + The six parentheses in that template are the only ones required by +the DO itself. You need one pair to enclose the variable declarations, +one pair to enclose the end test and result forms, and one pair to +enclose the whole expression. Other forms within the DO may require +their own parentheses-variable definitions are usually lists, for +instance. And the test form is often a function call. But the skeleton +of a DO loop will always be the same. Here are some example DO loops +with the skeleton in bold: + + (do ((i 0 (1+ i))) + ((>= i 4)) + (print i)) + + Notice that the result form has been omitted. This is, however, not +a particularly idiomatic use of DO, as this loop is much more simply +written using DOTIMES. (2) + + (dotimes (i 4) (print i)) + + As another example, here's the bodiless Fibonacci-computing loop: + + (do ((n 0 (1+ n)) + (cur 0 next) + (next 1 (+ cur next))) + ((= 10 n) cur)) + + Finally, the next loop demonstrates a DO loop that binds no +variables. It loops while the current time is less than the value of a +global variable, printing "Waiting" once a minute. Note that even with +no loop variables, you still need the empty variables list. + + (do () + ((> (get-universal-time) *some-future-date*)) + (format t "Waiting~%") + (sleep 60)) + + ---------- Footnotes ---------- + + (1) A variant of DO, DO*, assigns each variable its value before +evaluating the step form for subsequent variables. For more details, +consult your favorite Common Lisp reference. + + (2) The DOTIMES is also preferred because the macro expansion will +likely include declarations that allow the compiler to generate more +efficient code. + + +File: pcl.info, Node: 7-7, Next: Chapter 8, Prev: 7-6, Up: Chapter 7 + +The Mighty LOOP +=============== + +For the simple cases you have DOLIST and DOTIMES. And if they don't suit +your needs, you can fall back on the completely general DO. What more +could you want? + + Well, it turns out a handful of looping idioms come up over and over +again, such as looping over various data structures: lists, vectors, +hash tables, and packages. Or accumulating values in various ways while +looping: collecting, counting, summing, minimizing, or maximizing. If +you need a loop to do one of these things (or several at the same time), +the LOOP macro may give you an easier way to express it. + + The LOOP macro actually comes in two flavors-simple and extended. +The simple version is as simple as can be-an infinite loop that doesn't +bind any variables. The skeleton looks like this: + + (loop + body-form*) + + The forms in body are evaluated each time through the loop, which +will iterate forever unless you use RETURN to break out. For example, +you could write the previous DO loop with a simple LOOP. + + (loop + (when (> (get-universal-time) *some-future-date*) + (return)) + (format t "Waiting~%") + (sleep 60)) + + The extended LOOP is quite a different beast. It's distinguished by +the use of certain loop keywords that implement a special-purpose +language for expressing looping idioms. It's worth noting that not all +Lispers love the extended LOOP language. At least one of Common Lisp's +original designers hated it. LOOP's detractors complain that its syntax +is totally un-Lispy (in other words, not enough parentheses). LOOP's +fans counter that that's the point: complicated looping constructs are +hard enough to understand without wrapping them up in DO's cryptic +syntax. It's better, they say, to have a slightly more verbose syntax +that gives you some clues what the heck is going on. + + For instance, here's an idiomatic DO loop that collects the numbers +from 1 to 10 into a list: + + (do ((nums nil) (i 1 (1+ i))) + ((> i 10) (nreverse nums)) + (push i nums)) ==> (1 2 3 4 5 6 7 8 9 10) + + A seasoned Lisper won't have any trouble understanding that code-it's +just a matter of understanding the basic form of a DO loop and +recognizing the PUSH/NREVERSE idiom for building up a list. But it's +not exactly transparent. The LOOP version, on the other hand, is almost +understandable as an English sentence. + + (loop for i from 1 to 10 collecting i) ==> (1 2 3 4 5 6 7 8 9 10) + + The following are some more examples of simple uses of LOOP. This +sums the first ten squares: + + (loop for x from 1 to 10 summing (expt x 2)) ==> 385 + + This counts the number of vowels in a string: + + (loop for x across "the quick brown fox jumps over the lazy dog" + counting (find x "aeiou")) ==> 11 + + This computes the eleventh Fibonacci number, similar to the DO loop +used earlier: + + (loop for i below 10 + and a = 0 then b + and b = 1 then (+ b a) + finally (return a)) + + The symbols across, and, below, collecting, counting, finally, for, +from, summing, then, and to are some of the loop keywords whose presence +identifies these as instances of the extended LOOP. (1) + + I'll save the details of LOOP for Chapter 22, but it's worth noting +here as another example of the way macros can be used to extend the base +language. While LOOP provides its own language for expressing looping +constructs, it doesn't cut you off from the rest of Lisp. The loop +keywords are parsed according to loop's grammar, but the rest of the +code in a LOOP is regular Lisp code. + + And it's worth pointing out one more time that while the LOOP macro +is quite a bit more complicated than macros such as WHEN or UNLESS, it +is just another macro. If it hadn't been included in the standard +library, you could implement it yourself or get a third-party library +that does. + + With that I'll conclude our tour of the basic control-construct +macros. Now you're ready to take a closer look at how to define your +own macros. + + ---------- Footnotes ---------- + + (1) Loop keywords is a bit of a misnomer since they aren't keyword +symbols. In fact, LOOP doesn't care what package the symbols are from. +When the LOOP macro parses its body, it considers any appropriately +named symbols equivalent. You could even use true keywords if you +wanted-:for, :across, and so on-because they also have the correct name. +But most folks just use plain symbols. Because the loop keywords are +used only as syntactic markers, it doesn't matter if they're used for +other purposes-as function or variable names. + + +File: pcl.info, Node: Chapter 8, Next: Chapter 9, Prev: Chapter 7, Up: Top + +8. Macros: Defining Your Own +============================ + +Now it's time to start writing your own macros. The standard macros I +covered in the previous chapter hint at some of the things you can do +with macros, but that's just the beginning. Common Lisp doesn't support +macros so every Lisp programmer can create their own variants of +standard control constructs any more than C supports functions so every +C programmer can write trivial variants of the functions in the C +standard library. Macros are part of the language to allow you to +create abstractions on top of the core language and standard library +that move you closer toward being able to directly express the things +you want to express. + + Perhaps the biggest barrier to a proper understanding of macros is, +ironically, that they're so well integrated into the language. In many +ways they seem like just a funny kind of function-they're written in +Lisp, they take arguments and return results, and they allow you to +abstract away distracting details. Yet despite these many similarities, +macros operate at a different level than functions and create a totally +different kind of abstraction. + + Once you understand the difference between macros and functions, the +tight integration of macros in the language will be a huge benefit. But +in the meantime, it's a frequent source of confusion for new Lispers. +The following story, while not true in a historical or technical sense, +tries to alleviate the confusion by giving you a way to think about how +macros work. + +* Menu: + +* 8-1:: The Story of Mac: A Just-So Story +* 8-2:: Macro Expansion Time vs. Runtime +* 8-3:: DEF MACRO +* 8-4:: A Sample Macro: do-primes +* 8-5:: Macro Parameters +* 8-6:: Generating the Expansion +* 8-7:: Plugging the Leaks +* 8-8:: Macro-Writing Macros +* 8-9:: Beyond Simple Macros + + +File: pcl.info, Node: 8-1, Next: 8-2, Prev: Chapter 8, Up: Chapter 8 + +The Story of Mac: A Just-So Story +================================= + +Once upon a time, long ago, there was a company of Lisp programmers. It +was so long ago, in fact, that Lisp had no macros. Anything that +couldn't be defined with a function or done with a special operator had +to be written in full every time, which was rather a drag. +Unfortunately, the programmers in this company-though brilliant-were +also quite lazy. Often in the middle of their programs-when the tedium +of writing a bunch of code got to be too much-they would instead write a +note describing the code they needed to write at that place in the +program. Even more unfortunately, because they were lazy, the +programmers also hated to go back and actually write the code described +by the notes. Soon the company had a big stack of programs that nobody +could run because they were full of notes about code that still needed +to be written. + + In desperation, the big bosses hired a junior programmer, Mac, whose +job was to find the notes, write the required code, and insert it into +the program in place of the notes. Mac never ran the programs-they +weren't done yet, of course, so he couldn't. But even if they had been +completed, Mac wouldn't have known what inputs to feed them. So he just +wrote his code based on the contents of the notes and sent it back to +the original programmer. + + With Mac's help, all the programs were soon completed, and the +company made a ton of money selling them-so much money that the company +could double the size of its programming staff. But for some reason no +one thought to hire anyone to help Mac; soon he was single- handedly +assisting several dozen programmers. To avoid spending all his time +searching for notes in source code, Mac made a small modification to the +compiler the programmers used. Thereafter, whenever the compiler hit a +note, it would e-mail him the note and wait for him to e-mail back the +replacement code. Unfortunately, even with this change, Mac had a hard +time keeping up with the programmers. He worked as carefully as he +could, but sometimes- especially when the notes weren't clear-he would +make mistakes. + + The programmers noticed, however, that the more precisely they wrote +their notes, the more likely it was that Mac would send back correct +code. One day, one of the programmers, having a hard time describing in +words the code he wanted, included in one of his notes a Lisp program +that would generate the code he wanted. That was fine by Mac; he just +ran the program and sent the result to the compiler. + + The next innovation came when a programmer put a note at the top of +one of his programs containing a function definition and a comment that +said, "Mac, don't write any code here, but keep this function for later; +I'm going to use it in some of my other notes." Other notes in the same +program said things such as, "Mac, replace this note with the result of +running that other function with the symbols x and y as arguments." + + This technique caught on so quickly that within a few days, most +programs contained dozens of notes defining functions that were only +used by code in other notes. To make it easy for Mac to pick out the +notes containing only definitions that didn't require any immediate +response, the programmers tagged them with the standard preface: +"Definition for Mac, Read Only." This-as the programmers were still +quite lazy-was quickly shortened to "DEF. MAC. R/O" and then "DEFMACRO." + + Pretty soon, there was no actual English left in the notes for Mac. +All he did all day was read and respond to e-mails from the compiler +containing DEFMACRO notes and calls to the functions defined in the +DEFMACROs. Since the Lisp programs in the notes did all the real work, +keeping up with the e-mails was no problem. Mac suddenly had a lot of +time on his hands and would sit in his office daydreaming about +white-sand beaches, clear blue ocean water, and drinks with little paper +umbrellas in them. + + Several months later the programmers realized nobody had seen Mac for +quite some time. When they went to his office, they found a thin layer +of dust over everything, a desk littered with travel brochures for +various tropical locations, and the computer off. But the compiler +still worked-how could it be? It turned out Mac had made one last +change to the compiler: instead of e-mailing notes to Mac, the compiler +now saved the functions defined by DEFMACRO notes and ran them when +called for by the other notes. The programmers decided there was no +reason to tell the big bosses Mac wasn't coming to the office anymore. +So to this day, Mac draws a salary and from time to time sends the +programmers a postcard from one tropical locale or another. + + +File: pcl.info, Node: 8-2, Next: 8-3, Prev: 8-1, Up: Chapter 8 + +Macro Expansion Time vs. Runtime +================================ + +The key to understanding macros is to be quite clear about the +distinction between the code that generates code (macros) and the code +that eventually makes up the program (everything else). When you write +macros, you're writing programs that will be used by the compiler to +generate the code that will then be compiled. Only after all the macros +have been fully expanded and the resulting code compiled can the program +actually be run. The time when macros run is called macro expansion +time; this is distinct from runtime, when regular code, including the +code generated by macros, runs. + + It's important to keep this distinction firmly in mind because code +running at macro expansion time runs in a very different environment +than code running at runtime. Namely, at macro expansion time, there's +no way to access the data that will exist at runtime. Like Mac, who +couldn't run the programs he was working on because he didn't know what +the correct inputs were, code running at macro expansion time can deal +only with the data that's inherent in the source code. For instance, +suppose the following source code appears somewhere in a program: + + (defun foo (x) + (when (> x 10) (print 'big))) + + Normally you'd think of x as a variable that will hold the argument +passed in a call to foo. But at macro expansion time, such as when the +compiler is running the WHEN macro, the only data available is the +source code. Since the program isn't running yet, there's no call to +foo and thus no value associated with x. Instead, the values the +compiler passes to WHEN are the Lisp lists representing the source code, +namely, (> x 10) and (print 'big). Suppose that WHEN is defined, as you +saw in the previous chapter, with something like the following macro: + + (defmacro when (condition &rest body) + `(if ,condition (progn ,@body))) + + When the code in foo is compiled, the WHEN macro will be run with +those two forms as arguments. The parameter condition will be bound to +the form (> x 10), and the form (print 'big) will be collected into a +list that will become the value of the &rest body parameter. The +backquote expression will then generate this code: + + (if (> x 10) (progn (print 'big))) + + by interpolating in the value of condition and splicing the value of +body into the PROGN. + + When Lisp is interpreted, rather than compiled, the distinction +between macro expansion time and runtime is less clear because they're +temporally intertwined. Also, the language standard doesn't specify +exactly how an interpreter must handle macros-it could expand all the +macros in the form being interpreted and then interpret the resulting +code, or it could start right in on interpreting the form and expand +macros when it hits them. In either case, macros are always passed the +unevaluated Lisp objects representing the subforms of the macro form, +and the job of the macro is still to produce code that will do something +rather than to do anything directly. + + +File: pcl.info, Node: 8-3, Next: 8-4, Prev: 8-2, Up: Chapter 8 + +DEF MACRO +========= + +As you saw in Chapter 3, macros really are defined with DEFMACRO forms, +though it stands-of course-for DEFine MACRO, not Definition for Mac. +The basic skeleton of a DEFMACRO is quite similar to the skeleton of a +DEFUN. + + (defmacro name (parameter*) + "Optional documentation string." + body-form*) + + Like a function, a macro consists of a name, a parameter list, an +optional documentation string, and a body of Lisp expressions. (1) +However, as I just discussed, the job of a macro isn't to do anything +directly-its job is to generate code that will later do what you want. + + Macros can use the full power of Lisp to generate their expansion, +which means in this chapter I can only scratch the surface of what you +can do with macros. I can, however, describe a general process for +writing macros that works for all macros from the simplest to the most +complex. + + The job of a macro is to translate a macro form-in other words, a +Lisp form whose first element is the name of the macro-into code that +does a particular thing. Sometimes you write a macro starting with the +code you'd like to be able to write, that is, with an example macro +form. Other times you decide to write a macro after you've written the +same pattern of code several times and realize you can make your code +clearer by abstracting the pattern. + + Regardless of which end you start from, you need to figure out the +other end before you can start writing a macro: you need to know both +where you're coming from and where you're going before you can hope to +write code to do it automatically. Thus, the first step of writing a +macro is to write at least one example of a call to the macro and the +code into which that call should expand. + + Once you have an example call and the desired expansion, you're ready +for the second step: writing the actual macro code. For simple macros +this will be a trivial matter of writing a backquoted template with the +macro parameters plugged into the right places. Complex macros will be +significant programs in their own right, complete with helper functions +and data structures. + + After you've written code to translate the example call to the +appropriate expansion, you need to make sure the abstraction the macro +provides doesn't "leak" details of its implementation. Leaky macro +abstractions will work fine for certain arguments but not others or will +interact with code in the calling environment in undesirable ways. As +it turns out, macros can leak in a small handful of ways, all of which +are easily avoided as long as you know to check for them. I'll discuss +how in the section "Plugging the Leaks." + + To sum up, the steps to writing a macro are as follows: + + * Write a sample call to the macro and the code it should expand + into, or vice versa. + * Write code that generates the handwritten expansion from the + arguments in the sample call. + * Make sure the macro abstraction doesn't "leak." + + ---------- Footnotes ---------- + + (1) As with functions, macros can also contain declarations, but you +don't need to worry about those for now. + + +File: pcl.info, Node: 8-4, Next: 8-5, Prev: 8-3, Up: Chapter 8 + +A Sample Macro: do-primes +========================= + +To see how this three-step process works, you'll write a macro do-primes +that provides a looping construct similar to DOTIMES and DOLIST except +that instead of iterating over integers or elements of a list, it +iterates over successive prime numbers. This isn't meant to be an +example of a particularly useful macro-it's just a vehicle for +demonstrating the process. + + First, you'll need two utility functions, one to test whether a given +number is prime and another that returns the next prime number greater +or equal to its argument. In both cases you can use a simple, but +inefficient, brute-force approach. + + (defun primep (number) + (when (> number 1) + (loop for fac from 2 to (isqrt number) never (zerop (mod number fac))))) + + (defun next-prime (number) + (loop for n from number when (primep n) return n)) + + Now you can write the macro. Following the procedure outlined +previously, you need at least one example of a call to the macro and the +code into which it should expand. Suppose you start with the idea that +you want to be able to write this: + + (do-primes (p 0 19) + (format t "~d " p)) + + to express a loop that executes the body once each for each prime +number greater or equal to 0 and less than or equal to 19, with the +variable p holding the prime number. It makes sense to model this macro +on the form of the standard DOLIST and DOTIMES macros; macros that +follow the pattern of existing macros are easier to understand and use +than macros that introduce gratuitously novel syntax. + + Without the do-primes macro, you could write such a loop with DO (and +the two utility functions defined previously) like this: + + (do ((p (next-prime 0) (next-prime (1+ p)))) + ((> p 19)) + (format t "~d " p)) + + Now you're ready to start writing the macro code that will translate +from the former to the latter. + + +File: pcl.info, Node: 8-5, Next: 8-6, Prev: 8-4, Up: Chapter 8 + +Macro Parameters +================ + +Since the arguments passed to a macro are Lisp objects representing the +source code of the macro call, the first step in any macro is to extract +whatever parts of those objects are needed to compute the expansion. +For macros that simply interpolate their arguments directly into a +template, this step is trivial: simply defining the right parameters to +hold the different arguments is sufficient. + + But this approach, it seems, will not suffice for do-primes. The +first argument to the do-primes call is a list containing the name of +the loop variable, p; the lower bound, 0; and the upper bound, 19. But +if you look at the expansion, the list as a whole doesn't appear in the +expansion; the three element are split up and put in different places. + + You could define do-primes with two parameters, one to hold the list +and a &rest parameter to hold the body forms, and then take apart the +list by hand, something like this: + + (defmacro do-primes (var-and-range &rest body) + (let ((var (first var-and-range)) + (start (second var-and-range)) + (end (third var-and-range))) + `(do ((,var (next-prime ,start) (next-prime (1+ ,var)))) + ((> ,var ,end)) + ,@body))) + + In a moment I'll explain how the body generates the correct +expansion; for now you can just note that the variables var, start, and +end each hold a value, extracted from var-and-range, that's then +interpolated into the backquote expression that generates do-primes's +expansion. + + However, you don't need to take apart var-and-range "by hand" because +macro parameter lists are what are called destructuring parameter lists. +Destructuring, as the name suggests, involves taking apart a +structure-in this case the list structure of the forms passed to a +macro. + + Within a destructuring parameter list, a simple parameter name can be +replaced with a nested parameter list. The parameters in the nested +parameter list will take their values from the elements of the +expression that would have been bound to the parameter the list +replaced. For instance, you can replace var-and-range with a list (var +start end), and the three elements of the list will automatically be +destructured into those three parameters. + + Another special feature of macro parameter lists is that you can use +&body as a synonym for &rest. Semantically &body and &rest are +equivalent, but many development environments will use the presence of a +&body parameter to modify how they indent uses of the macro-typically +&body parameters are used to hold a list of forms that make up the body +of the macro. + + So you can streamline the definition of do-primes and give a hint to +both human readers and your development tools about its intended use by +defining it like this: + + (defmacro do-primes ((var start end) &body body) + `(do ((,var (next-prime ,start) (next-prime (1+ ,var)))) + ((> ,var ,end)) + ,@body)) + + In addition to being more concise, destructuring parameter lists also +give you automatic error checking-with do-primes defined this way, Lisp +will be able to detect a call whose first argument isn't a three-element +list and will give you a meaningful error message just as if you had +called a function with too few or too many arguments. Also, in +development environments such as SLIME that indicate what arguments are +expected as soon as you type the name of a function or macro, if you use +a destructuring parameter list, the environment will be able to tell you +more specifically the syntax of the macro call. With the original +definition, SLIME would tell you do-primes is called like this: + + (do-primes var-and-range &rest body) + + But with the new definition, it can tell you that a call should look +like this: + + (do-primes (var start end) &body body) + + Destructuring parameter lists can contain &optional, &key, and &rest +parameters and can contain nested destructuring lists. However, you +don't need any of those options to write do-primes. + + +File: pcl.info, Node: 8-6, Next: 8-7, Prev: 8-5, Up: Chapter 8 + +Generating the Expansion +======================== + +Because do-primes is a fairly simple macro, after you've destructured +the arguments, all that's left is to interpolate them into a template to +get the expansion. + + For simple macros like do-primes, the special backquote syntax is +perfect. To review, a backquoted expression is similar to a quoted +expression except you can "unquote" particular subexpressions by +preceding them with a comma, possibly followed by an at (@) sign. +Without an at sign, the comma causes the value of the subexpression to +be included as is. With an at sign, the value-which must be a list-is +"spliced" into the enclosing list. + + Another useful way to think about the backquote syntax is as a +particularly concise way of writing code that generates lists. This way +of thinking about it has the benefit of being pretty much exactly what's +happening under the covers-when the reader reads a backquoted +expression, it translates it into code that generates the appropriate +list structure. For instance, '(,a b) might be read as (list a 'b). +The language standard doesn't specify exactly what code the reader must +produce as long as it generates the right list structure. + + Table 8-1 shows some examples of backquoted expressions along with +equivalent list-building code and the result you'd get if you evaluated +either the backquoted expression or the equivalent code. (1) + + Table 8-1. Backquote Examples + +[image src="tables/8-1.png"] + + It's important to note that backquote is just a convenience. But +it's a big convenience. To appreciate how big, compare the backquoted +version of do-primes to the following version, which uses explicit +list-building code: + + (defmacro do-primes-a ((var start end) &body body) + (append '(do) + (list (list (list var + (list 'next-prime start) + (list 'next-prime (list '1+ var))))) + (list (list (list '> var end))) + body)) + + As you'll see in a moment, the current implementation of do-primes +doesn't handle certain edge cases correctly. But first you should +verify that it at least works for the original example. You can test it +in two ways. You can test it indirectly by simply using it-presumably, +if the resulting behavior is correct, the expansion is correct. For +instance, you can type the original example's use of do-primes to the +REPL and see that it indeed prints the right series of prime numbers. + + CL-USER> (do-primes (p 0 19) (format t "~d " p)) + 2 3 5 7 11 13 17 19 + NIL + + Or you can check the macro directly by looking at the expansion of a +particular call. The function MACROEXPAND-1 takes any Lisp expression +as an argument and returns the result of doing one level of macro +expansion. (2) Because MACROEXPAND-1 is a function, to pass it a +literal macro form you must quote it. You can use it to see the +expansion of the previous call. (3) + + CL-USER> (macroexpand-1 '(do-primes (p 0 19) (format t "~d " p))) + (DO ((P (NEXT-PRIME 0) (NEXT-PRIME (1+ P)))) + ((> P 19)) + (FORMAT T "~d " P)) + T + + Or, more conveniently, in SLIME you can check a macro's expansion by +placing the cursor on the opening parenthesis of a macro form in your +source code and typing C-c RET to invoke the Emacs function +slime-macroexpand-1, which will pass the macro form to MACROEXPAND-1 and +"pretty print" the result in a temporary buffer. + + However you get to it, you can see that the result of macro expansion +is the same as the original handwritten expansion, so it seems that +do-primes works. + + ---------- Footnotes ---------- + + (1) APPEND, which I haven't discussed yet, is a function that takes +any number of list arguments and returns the result of splicing them +together into a single list. + + (2) Another function, MACROEXPAND, keeps expanding the result as long +as the first element of the resulting expansion is the name of the +macro. However, this will often show you a much lower-level view of +what the code is doing than you want, since basic control constructs +such as DO are also implemented as macros. In other words, while it can +be educational to see what your macro ultimately expands into, it isn't +a very useful view into what your own macros are doing. + + (3) If the macro expansion is shown all on one line, it's probably +because the variable *PRINT-PRETTY* is NIL. If it is, evaluating (setf +*print-pretty* t) should make the macro expansion easier to read. + + +File: pcl.info, Node: 8-7, Next: 8-8, Prev: 8-6, Up: Chapter 8 + +Plugging the Leaks +================== + +In his essay "The Law of Leaky Abstractions," Joel Spolsky coined the +term leaky abstraction to describe an abstraction that "leaks" details +it's supposed to be abstracting away. Since writing a macro is a way of +creating an abstraction, you need to make sure your macros don't leak +needlessly. (1) + + As it turns out, a macro can leak details of its inner workings in +three ways. Luckily, it's pretty easy to tell whether a given macro +suffers from any of those leaks and to fix them. + + The current definition suffers from one of the three possible macro +leaks: namely, it evaluates the end subform too many times. Suppose you +were to call do-primes with, instead of a literal number such as 19, an +expression such as (random 100) in the end position. + + (do-primes (p 0 (random 100)) + (format t "~d " p)) + + Presumably the intent here is to loop over the primes from zero to +whatever random number is returned by (random 100). However, this isn't +what the current implementation does, as MACROEXPAND-1 shows. + + CL-USER> (macroexpand-1 '(do-primes (p 0 (random 100)) (format t "~d " p))) + (DO ((P (NEXT-PRIME 0) (NEXT-PRIME (1+ P)))) + ((> P (RANDOM 100))) + (FORMAT T "~d " P)) + T + + When this expansion code is run, RANDOM will be called each time the +end test for the loop is evaluated. Thus, instead of looping until p is +greater than an initially chosen random number, this loop will iterate +until it happens to draw a random number less than or equal to the +current value of p. While the total number of iterations will still be +random, it will be drawn from a much different distribution than the +uniform distribution RANDOM returns. + + This is a leak in the abstraction because, to use the macro +correctly, the caller needs to be aware that the end form is going to be +evaluated more than once. One way to plug this leak would be to simply +define this as the behavior of do-primes. But that's not very +satisfactory-you should try to observe the Principle of Least +Astonishment when implementing macros. And programmers will typically +expect the forms they pass to macros to be evaluated no more times than +absolutely necessary. (2) Furthermore, since do-primes is built on the +model of the standard macros, DOTIMES and DOLIST, neither of which +causes any of the forms except those in the body to be evaluated more +than once, most programmers will expect do-primes to behave similarly. + + You can fix the multiple evaluation easily enough; you just need to +generate code that evaluates end once and saves the value in a variable +to be used later. Recall that in a DO loop, variables defined with an +initialization form and no step form don't change from iteration to +iteration. So you can fix the multiple evaluation problem with this +definition: + + (defmacro do-primes ((var start end) &body body) + `(do ((ending-value ,end) + (,var (next-prime ,start) (next-prime (1+ ,var)))) + ((> ,var ending-value)) + ,@body)) + + Unfortunately, this fix introduces two new leaks to the macro +abstraction. + + One new leak is similar to the multiple-evaluation leak you just +fixed. Because the initialization forms for variables in a DO loop are +evaluated in the order the variables are defined, when the macro +expansion is evaluated, the expression passed as end will be evaluated +before the expression passed as start, opposite to the order they appear +in the macro call. This leak doesn't cause any problems when start and +end are literal values like 0 and 19. But when they're forms that can +have side effects, evaluating them out of order can once again run afoul +of the Principle of Least Astonishment. + + This leak is trivially plugged by swapping the order of the two +variable definitions. + + (defmacro do-primes ((var start end) &body body) + `(do ((,var (next-prime ,start) (next-prime (1+ ,var))) + (ending-value ,end)) + ((> ,var ending-value)) + ,@body)) + + The last leak you need to plug was created by using the variable name +ending-value. The problem is that the name, which ought to be a purely +internal detail of the macro implementation, can end up interacting with +code passed to the macro or in the context where the macro is called. +The following seemingly innocent call to do-primes doesn't work +correctly because of this leak: + + (do-primes (ending-value 0 10) + (print ending-value)) + + Neither does this one: + + (let ((ending-value 0)) + (do-primes (p 0 10) + (incf ending-value p)) + ending-value) + + Again, MACROEXPAND-1 can show you the problem. The first call +expands to this: + + (do ((ending-value (next-prime 0) (next-prime (1+ ending-value))) + (ending-value 10)) + ((> ending-value ending-value)) + (print ending-value)) + + Some Lisps may reject this code because ending-value is used twice as +a variable name in the same DO loop. If not rejected outright, the code +will loop forever since ending-value will never be greater than itself. + + The second problem call expands to the following: + + (let ((ending-value 0)) + (do ((p (next-prime 0) (next-prime (1+ p))) + (ending-value 10)) + ((> p ending-value)) + (incf ending-value p)) + ending-value) + + In this case the generated code is perfectly legal, but the behavior +isn't at all what you want. Because the binding of ending-value +established by the LET outside the loop is shadowed by the variable with +the same name inside the DO, the form (incf ending-value p) increments +the loop variable ending-value instead of the outer variable with the +same name, creating another infinite loop. (3) + + Clearly, what you need to patch this leak is a symbol that will never +be used outside the code generated by the macro. You could try using a +really unlikely name, but that's no guarantee. You could also protect +yourself to some extent by using packages, as described in Chapter 21. +But there's a better solution. + + The function GENSYM returns a unique symbol each time it's called. +This is a symbol that has never been read by the Lisp reader and never +will be because it isn't interned in any package. Thus, instead of +using a literal name like ending-value, you can generate a new symbol +each time do-primes is expanded. + + (defmacro do-primes ((var start end) &body body) + (let ((ending-value-name (gensym))) + `(do ((,var (next-prime ,start) (next-prime (1+ ,var))) + (,ending-value-name ,end)) + ((> ,var ,ending-value-name)) + ,@body))) + + Note that the code that calls GENSYM isn't part of the expansion; it +runs as part of the macro expander and thus creates a new symbol each +time the macro is expanded. This may seem a bit strange at +first-ending-value-name is a variable whose value is the name of another +variable. But really it's no different from the parameter var whose +value is the name of a variable-the difference is the value of var was +created by the reader when the macro form was read, and the value of +ending-value-name is generated programmatically when the macro code +runs. + + With this definition the two previously problematic forms expand into +code that works the way you want. The first form: + + (do-primes (ending-value 0 10) + (print ending-value)) + + expands into the following: + + (do ((ending-value (next-prime 0) (next-prime (1+ ending-value))) + (#:g2141 10)) + ((> ending-value #:g2141)) + (print ending-value)) + + Now the variable used to hold the ending value is the gensymed +symbol, #:g2141. The name of the symbol, G2141, was generated by GENSYM +but isn't significant; the thing that matters is the object identity of +the symbol. Gensymed symbols are printed in the normal syntax for +uninterned symbols, with a leading #:. + + The other previously problematic form: + + (let ((ending-value 0)) + (do-primes (p 0 10) + (incf ending-value p)) + ending-value) + + looks like this if you replace the do-primes form with its expansion: + + (let ((ending-value 0)) + (do ((p (next-prime 0) (next-prime (1+ p))) + (#:g2140 10)) + ((> p #:g2140)) + (incf ending-value p)) + ending-value) + + Again, there's no leak since the ending-value variable bound by the +LET surrounding the do-primes loop is no longer shadowed by any +variables introduced in the expanded code. + + Not all literal names used in a macro expansion will necessarily +cause a problem-as you get more experience with the various binding +forms, you'll be able to determine whether a given name is being used in +a position that could cause a leak in a macro abstraction. But there's +no real downside to using a gensymed name just to be safe. + + With that fix, you've plugged all the leaks in the implementation of +do-primes. Once you've gotten a bit of macro-writing experience under +your belt, you'll learn to write macros with these kinds of leaks +preplugged. It's actually fairly simple if you follow these rules of +thumb: + + * Unless there's a particular reason to do otherwise, include any + subforms in the expansion in positions that will be evaluated in + the same order as the subforms appear in the macro call. + * Unless there's a particular reason to do otherwise, make sure + subforms are evaluated only once by creating a variable in the + expansion to hold the value of evaluating the argument form and + then using that variable anywhere else the value is needed in the + expansion. + * Use GENSYM at macro expansion time to create variable names used in + the expansion. + + ---------- Footnotes ---------- + + (1) This is from Joel on Software by Joel Spolsky, also available at +http://www.joelonsoftware.com/ articles/LeakyAbstractions.html. +Spolsky's point in the essay is that all abstractions leak to some +extent; that is, there are no perfect abstractions. But that doesn't +mean you should tolerate leaks you can easily plug. + + (2) Of course, certain forms are supposed to be evaluated more than +once, such as the forms in the body of a do-primes loop. + + (3) It may not be obvious that this loop is necessarily infinite +given the nonuniform occurrences of prime numbers. The starting point +for a proof that it is in fact infinite is Bertrand's postulate, which +says for any n > 1, there exists a prime p, n < p < 2n. From there you +can prove that for any prime number, P less than the sum of the +preceding prime numbers, the next prime, P', is also smaller than the +original sum plus P. + + +File: pcl.info, Node: 8-8, Next: 8-9, Prev: 8-7, Up: Chapter 8 + +Macro-Writing Macros +==================== + +Of course, there's no reason you should be able to take advantage of +macros only when writing functions. The job of macros is to abstract +away common syntactic patterns, and certain patterns come up again and +again in writing macros that can also benefit from being abstracted +away. + + In fact, you've already seen one such pattern-many macros will, like +the last version of do-primes, start with a LET that introduces a few +variables holding gensymed symbols to be used in the macro's expansion. +Since this is such a common pattern, why not abstract it away with its +own macro? + + In this section you'll write a macro, with-gensyms, that does just +that. In other words, you'll write a macro-writing macro: a macro that +generates code that generates code. While complex macro-writing macros +can be a bit confusing until you get used to keeping the various levels +of code clear in your mind, with-gensyms is fairly straightforward and +will serve as a useful but not too strenuous mental limbering exercise. + + You want to be able to write something like this: + + (defmacro do-primes ((var start end) &body body) + (with-gensyms (ending-value-name) + `(do ((,var (next-prime ,start) (next-prime (1+ ,var))) + (,ending-value-name ,end)) + ((> ,var ,ending-value-name)) + ,@body))) + + and have it be equivalent to the previous version of do-primes. In +other words, the with-gensyms needs to expand into a LET that binds each +named variable, ending-value-name in this case, to a gensymed symbol. +That's easy enough to write with a simple backquote template. + + (defmacro with-gensyms ((&rest names) &body body) + `(let ,(loop for n in names collect `(,n (gensym))) + ,@body)) + + Note how you can use a comma to interpolate the value of the LOOP +expression. The loop generates a list of binding forms where each +binding form consists of a list containing one of the names given to +with-gensyms and the literal code (gensym). You can test what code the +LOOP expression would generate at the REPL by replacing names with a +list of symbols. + + CL-USER> (loop for n in '(a b c) collect `(,n (gensym))) + ((A (GENSYM)) (B (GENSYM)) (C (GENSYM))) + + After the list of binding forms, the body argument to with-gensyms is +spliced in as the body of the LET. Thus, in the code you wrap in a +with-gensyms you can refer to any of the variables named in the list of +variables passed to with-gensyms. + + If you macro-expand the with-gensyms form in the new definition of +do-primes, you should see something like this: + + (let ((ending-value-name (gensym))) + `(do ((,var (next-prime ,start) (next-prime (1+ ,var))) + (,ending-value-name ,end)) + ((> ,var ,ending-value-name)) + ,@body)) + + Looks good. While this macro is fairly trivial, it's important to +keep clear about when the different macros are expanded: when you +compile the DEFMACRO of do-primes, the with-gensyms form is expanded +into the code just shown and compiled. Thus, the compiled version of +do-primes is just the same as if you had written the outer LET by hand. +When you compile a function that uses do-primes, the code generated by +with-gensyms runs generating the do-primes expansion, but with-gensyms +itself isn't needed to compile a do-primes form since it has already +been expanded, back when do-primes was compiled. + +* Menu: + +* 8-8-1:: Another classic macro-writing MACRO: ONCE-ONLY + + +File: pcl.info, Node: 8-8-1, Next: 8-8, Prev: 8-8, Up: 8-8 + +Another classic macro-writing MACRO: ONCE-ONLY +============================================== + +Another classic macro-writing macro is once-only, which is used to +generate code that evaluates certain macro arguments once only and in a +particular order. Using once-only, you could write do-primes almost as +simply as the original leaky version, like this: + + (defmacro do-primes ((var start end) &body body) + (once-only (start end) + `(do ((,var (next-prime ,start) (next-prime (1+ ,var)))) + ((> ,var ,end)) + ,@body))) + + However, the implementation of once-only is a bit too involved for a +blow-by-blow explanation, as it relies on multiple levels of backquoting +and unquoting. If you really want to sharpen your macro chops, you can +try to figure out how it works. It looks like this: + + (defmacro once-only ((&rest names) &body body) + (let ((gensyms (loop for n in names collect (gensym)))) + `(let (,@(loop for g in gensyms collect `(,g (gensym)))) + `(let (,,@(loop for g in gensyms for n in names collect ``(,,g ,,n))) + ,(let (,@(loop for n in names for g in gensyms collect `(,n ,g))) + ,@body))))) + + +File: pcl.info, Node: 8-9, Next: Chapter 9, Prev: 8-8, Up: Chapter 8 + +Beyond Simple Macros +==================== + +I could, of course, say a lot more about macros. All the macros you've +seen so far have been fairly simple examples that save you a bit of +typing but don't provide radical new ways of expressing things. In +upcoming chapters you'll see examples of macros that allow you to +express things in ways that would be virtually impossible without +macros. You'll start in the very next chapter, in which you'll build a +simple but effective unit test framework. + + +File: pcl.info, Node: Chapter 9, Next: Chapter 10, Prev: Chapter 8, Up: Top + +9. Practical: Building a Unit Test Framework +============================================ + +In this chapter you'll return to cutting code and develop a simple unit +testing framework for Lisp. This will give you a chance to use some of +the features you've learned about since Chapter 3, including macros and +dynamic variables, in real code. + + The main design goal of the test framework will be to make it as easy +as possible to add new tests, to run various suites of tests, and to +track down test failures. For now you'll focus on designing a framework +you can use during interactive development. + + The key feature of an automated testing framework is that the +framework is responsible for telling you whether all the tests passed. +You don't want to spend your time slogging through test output checking +answers when the computer can do it much more quickly and accurately. +Consequently, each test case must be an expression that yields a boolean +value-true or false, pass or fail. For instance, if you were writing +tests for the built-in + function, these might be reasonable test cases: +(1) + + (= (+ 1 2) 3) + (= (+ 1 2 3) 6) + (= (+ -1 -3) -4) + + Functions that have side effects will be tested slightly +differently-you'll have to call the function and then check for evidence +of the expected side effects. (2) But in the end, every test case has +to boil down to a boolean expression, thumbs up or thumbs down. + +* Menu: + +* 9-1:: Two First Tries +* 9-2:: Refactoring +* 9-3:: Fixing the Return Value +* 9-4:: Better Result Reporting +* 9-5:: An Abstraction Emerges +* 9-6:: A Hierarchy of Tests +* 9-7:: Wrapping Up + + ---------- Footnotes ---------- + + (1) This is for illustrative purposes only-obviously, writing test +cases for built-in functions such as + is a bit silly, since if such +basic things aren't working, the chances the tests will be running the +way you expect is pretty slim. On the other hand, most Common Lisps are +implemented largely in Common Lisp, so it's not crazy to imagine writing +test suites in Common Lisp to test the standard library functions. + + (2) Side effects can include such things as signaling errors; I'll +discuss Common Lisp's error handling system in Chapter 19. You may, +after reading that chapter, want to think about how to incorporate tests +that check whether a function does or does not signal a particular error +in certain situations. + + +File: pcl.info, Node: 9-1, Next: 9-2, Prev: Chapter 9, Up: Chapter 9 + +Two First Tries +=============== + +If you were doing ad hoc testing, you could enter these expressions at +the REPL and check that they return T. But you want a framework that +makes it easy to organize and run these test cases whenever you want. +If you want to start with the simplest thing that could possibly work, +you can just write a function that evaluates the test cases and ANDs the +results together. + + (defun test-+ () + (and + (= (+ 1 2) 3) + (= (+ 1 2 3) 6) + (= (+ -1 -3) -4))) + + Whenever you want to run this set of test cases, you can call test-+. + + CL-USER> (test-+) + T + + As long as it returns T, you know the test cases are passing. This +way of organizing tests is also pleasantly concise-you don't have to +write a bunch of test bookkeeping code. However, as you'll discover the +first time a test case fails, the result reporting leaves something to +be desired. When test-+ returns NIL, you'll know something failed, but +you'll have no idea which test case it was. + + So let's try another simple-even simpleminded-approach. To find out +what happens to each test case, you could write something like this: + + (defun test-+ () + (format t "~:[FAIL~;pass~] ... ~a~%" (= (+ 1 2) 3) '(= (+ 1 2) 3)) + (format t "~:[FAIL~;pass~] ... ~a~%" (= (+ 1 2 3) 6) '(= (+ 1 2 3) 6)) + (format t "~:[FAIL~;pass~] ... ~a~%" (= (+ -1 -3) -4) '(= (+ -1 -3) -4))) + + Now each test case will be reported individually. The +~:[FAIL~;pass~] part of the FORMAT directive causes FORMAT to print +"FAIL" if the first format argument is false and "pass" otherwise. (1) +Then you label the result with the test expression itself. Now running +test-+ shows you exactly what's going on. + + CL-USER> (test-+) + pass ... (= (+ 1 2) 3) + pass ... (= (+ 1 2 3) 6) + pass ... (= (+ -1 -3) -4) + NIL + + This time the result reporting is more like what you want, but the +code itself is pretty gross. The repeated calls to FORMAT as well as +the tedious duplication of the test expression cry out to be refactored. +The duplication of the test expression is particularly grating because +if you mistype it, the test results will be mislabeled. + + Another problem is that you don't get a single indicator whether all +the test cases passed. It's easy enough, with only three test cases, to +scan the output looking for "FAIL"; however, when you have hundreds of +test cases, it'll be more of a hassle. + + ---------- Footnotes ---------- + + (1) I'll discuss this and other FORMAT directives in more detail in +Chapter 18. + + +File: pcl.info, Node: 9-2, Next: 9-3, Prev: 9-1, Up: Chapter 9 + +Refactoring +=========== + +What you'd really like is a way to write test functions as streamlined +as the first test-+ that return a single T or NIL value but that also +report on the results of individual test cases like the second version. +Since the second version is close to what you want in terms of +functionality, your best bet is to see if you can factor out some of the +annoying duplication. + + The simplest way to get rid of the repeated similar calls to FORMAT +is to create a new function. + + (defun report-result (result form) + (format t "~:[FAIL~;pass~] ... ~a~%" result form)) + + Now you can write test-+ with calls to report-result instead of +FORMAT. It's not a huge improvement, but at least now if you decide to +change the way you report results, there's only one place you have to +change. + + (defun test-+ () + (report-result (= (+ 1 2) 3) '(= (+ 1 2) 3)) + (report-result (= (+ 1 2 3) 6) '(= (+ 1 2 3) 6)) + (report-result (= (+ -1 -3) -4) '(= (+ -1 -3) -4))) + + Next you need to get rid of the duplication of the test case +expression, with its attendant risk of mislabeling of results. What +you'd really like is to be able to treat the expression as both code (to +get the result) and data (to use as the label). Whenever you want to +treat code as data, that's a sure sign you need a macro. Or, to look at +it another way, what you need is a way to automate writing the +error-prone report-result calls. You'd like to be able to say something +like this: + + (check (= (+ 1 2) 3)) + + and have it mean the following: + + (report-result (= (+ 1 2) 3) '(= (+ 1 2) 3)) + + Writing a macro to do this translation is trivial. + + (defmacro check (form) + `(report-result ,form ',form)) + + Now you can change test-+ to use check. + + (defun test-+ () + (check (= (+ 1 2) 3)) + (check (= (+ 1 2 3) 6)) + (check (= (+ -1 -3) -4))) + + Since you're on the hunt for duplication, why not get rid of those +repeated calls to check? You can define check to take an arbitrary +number of forms and wrap them each in a call to report-result. + + (defmacro check (&body forms) + `(progn + ,@(loop for f in forms collect `(report-result ,f ',f)))) + + This definition uses a common macro idiom of wrapping a PROGN around +a series of forms in order to turn them into a single form. Notice also +how you can use , to splice in the result of an expression that returns +a list of expressions that are themselves generated with a backquote +template. + + With the new version of check you can write a new version of test-+ +like this: + + (defun test-+ () + (check + (= (+ 1 2) 3) + (= (+ 1 2 3) 6) + (= (+ -1 -3) -4))) + + that is equivalent to the following code: + + (defun test-+ () + (progn + (report-result (= (+ 1 2) 3) '(= (+ 1 2) 3)) + (report-result (= (+ 1 2 3) 6) '(= (+ 1 2 3) 6)) + (report-result (= (+ -1 -3) -4) '(= (+ -1 -3) -4)))) + + Thanks to check, this version is as concise as the first version of +test-+ but expands into code that does the same thing as the second +version. And now any changes you want to make to how test-+ behaves, +you can make by changing check. + + +File: pcl.info, Node: 9-3, Next: 9-4, Prev: 9-2, Up: Chapter 9 + +Fixing the Return Value +======================= + +You can start with fixing test-+ so its return value indicates whether +all the test cases passed. Since check is responsible for generating +the code that ultimately runs the test cases, you just need to change it +to generate code that also keeps track of the results. + + As a first step, you can make a small change to report-result so it +returns the result of the test case it's reporting. + + (defun report-result (result form) + (format t "~:[FAIL~;pass~] ... ~a~%" result form) + result) + + Now that report-result returns the result of its test case, it might +seem you could just change the PROGN to an AND to combine the results. +Unfortunately, AND doesn't do quite what you want in this case because +of its short-circuiting behavior: as soon as one test case fails, AND +will skip the rest. On the other hand, if you had a construct that +worked like AND without the short-circuiting, you could use it in the +place of PROGN, and you'd be done. Common Lisp doesn't provide such a +construct, but that's no reason you can't use it: it's a trivial matter +to write a macro to provide it yourself. + + Leaving test cases aside for a moment, what you want is a macro-let's +call it combine-results-that will let you say this: + + (combine-results + (foo) + (bar) + (baz)) + + and have it mean something like this: + + (let ((result t)) + (unless (foo) (setf result nil)) + (unless (bar) (setf result nil)) + (unless (baz) (setf result nil)) + result) + + The only tricky bit to writing this macro is that you need to +introduce a variable-result in the previous code-in the expansion. As +you saw in the previous chapter, using a literal name for variables in +macro expansions can introduce a leak in your macro abstraction, so +you'll need to create a unique name. This is a job for with-gensyms. +You can define combine-results like this: + + (defmacro combine-results (&body forms) + (with-gensyms (result) + `(let ((,result t)) + ,@(loop for f in forms collect `(unless ,f (setf ,result nil))) + ,result))) + + Now you can fix check by simply changing the expansion to use +combine-results instead of PROGN. + + (defmacro check (&body forms) + `(combine-results + ,@(loop for f in forms collect `(report-result ,f ',f)))) + + With that version of check, test-+ should emit the results of its +three test expressions and then return T to indicate that everything +passed. (1) + + CL-USER> (test-+) + pass ... (= (+ 1 2) 3) + pass ... (= (+ 1 2 3) 6) + pass ... (= (+ -1 -3) -4) + T + + And if you change one of the test cases so it fails, (2) the final +return value changes to NIL. + + CL-USER> (test-+) + pass ... (= (+ 1 2) 3) + pass ... (= (+ 1 2 3) 6) + FAIL ... (= (+ -1 -3) -5) + NIL + + ---------- Footnotes ---------- + + (1) If test-+ has been compiled-which may happen implicitly in +certain Lisp implementations-you may need to reevaluate the definition +of test-+ to get the changed definition of check to affect the behavior +of test-+. Interpreted code, on the other hand, typically expands +macros anew each time the code is interpreted, allowing the effects of +macro redefinitions to be seen immediately. + + (2) You have to change the test to make it fail since you can't +change the behavior of +. + + +File: pcl.info, Node: 9-4, Next: 9-5, Prev: 9-3, Up: Chapter 9 + +Better Result Reporting +======================= + +As long as you have only one test function, the current result reporting +is pretty clear. If a particular test case fails, all you have to do is +find the test case in the check form and figure out why it's failing. +But if you write a lot of tests, you'll probably want to organize them +somehow, rather than shoving them all into one function. For instance, +suppose you wanted to add some test cases for the * function. You might +write a new test function. + + (defun test-* () + (check + (= (* 2 2) 4) + (= (* 3 5) 15))) + + Now that you have two test functions, you'll probably want another +function that runs all the tests. That's easy enough. + + (defun test-arithmetic () + (combine-results + (test-+) + (test-*))) + + In this function you use combine-results instead of check since both +test-+ and test-* will take care of reporting their own results. When +you run test-arithmetic, you'll get the following results: + + CL-USER> (test-arithmetic) + pass ... (= (+ 1 2) 3) + pass ... (= (+ 1 2 3) 6) + pass ... (= (+ -1 -3) -4) + pass ... (= (* 2 2) 4) + pass ... (= (* 3 5) 15) + T + + Now imagine that one of the test cases failed and you need to track +down the problem. With only five test cases and two test functions, it +won't be too hard to find the code of the failing test case. But +suppose you had 500 test cases spread across 20 functions. It might be +nice if the results told you what function each test case came from. + + Since the code that prints the results is centralized in +report-result, you need a way to pass information about what test +function you're in to report-result. You could add a parameter to +report-result to pass this information, but check, which generates the +calls to report-result, doesn't know what function it's being called +from, which means you'd also have to change the way you call check, +passing it an argument that it simply passes onto report-result. + + This is exactly the kind of problem dynamic variables were designed +to solve. If you create a dynamic variable that each test function +binds to the name of the function before calling check, then +report-result can use it without check having to know anything about it. + + Step one is to declare the variable at the top level. + + (defvar *test-name* nil) + + Now you need to make another tiny change to report-result to include +*test-name* in the FORMAT output. + + (format t "~:[FAIL~;pass~] ... ~a: ~a~%" result *test-name* form) + + With those changes, the test functions will still work but will +produce the following output because *test-name* is never rebound: + + CL-USER> (test-arithmetic) + pass ... NIL: (= (+ 1 2) 3) + pass ... NIL: (= (+ 1 2 3) 6) + pass ... NIL: (= (+ -1 -3) -4) + pass ... NIL: (= (* 2 2) 4) + pass ... NIL: (= (* 3 5) 15) + T + + For the name to be reported properly, you need to change the two test +functions. + + (defun test-+ () + (let ((*test-name* 'test-+)) + (check + (= (+ 1 2) 3) + (= (+ 1 2 3) 6) + (= (+ -1 -3) -4)))) + + (defun test-* () + (let ((*test-name* 'test-*)) + (check + (= (* 2 2) 4) + (= (* 3 5) 15)))) + + Now the results are properly labeled. + + CL-USER> (test-arithmetic) + pass ... TEST-+: (= (+ 1 2) 3) + pass ... TEST-+: (= (+ 1 2 3) 6) + pass ... TEST-+: (= (+ -1 -3) -4) + pass ... TEST-*: (= (* 2 2) 4) + pass ... TEST-*: (= (* 3 5) 15) + T + + +File: pcl.info, Node: 9-5, Next: 9-6, Prev: 9-4, Up: Chapter 9 + +An Abstraction Emerges +====================== + +In fixing the test functions, you've introduced several new bits of +duplication. Not only does each function have to include the name of +the function twice-once as the name in the DEFUN and once in the binding +of *test-name*-but the same three-line code pattern is duplicated +between the two functions. You could remove the duplication simply on +the grounds that duplication is bad. But if you look more closely at +the root cause of the duplication, you can learn an important lesson +about how to use macros. + + The reason both these functions start the same way is because they're +both test functions. The duplication arises because, at the moment, +test function is only half an abstraction. The abstraction exists in +your mind, but in the code there's no way to express "this is a test +function" other than to write code that follows a particular pattern. + + Unfortunately, partial abstractions are a crummy tool for building +software. Because a half abstraction is expressed in code by a +manifestation of the pattern, you're guaranteed to have massive code +duplication with all the normal bad consequences that implies for +maintainability. More subtly, because the abstraction exists only in +the minds of programmers, there's no mechanism to make sure different +programmers (or even the same programmer working at different times) +actually understand the abstraction the same way. To make a complete +abstraction, you need a way to express "this is a test function" and +have all the code required by the pattern be generated for you. In +other words, you need a macro. + + Because the pattern you're trying to capture is a DEFUN plus some +boilerplate code, you need to write a macro that will expand into a +DEFUN. You'll then use this macro, instead of a plain DEFUN to define +test functions, so it makes sense to call it deftest. + + (defmacro deftest (name parameters &body body) + `(defun ,name ,parameters + (let ((*test-name* ',name)) + ,@body))) + + With this macro you can rewrite test-+ as follows: + + (deftest test-+ () + (check + (= (+ 1 2) 3) + (= (+ 1 2 3) 6) + (= (+ -1 -3) -4))) + + +File: pcl.info, Node: 9-6, Next: 9-7, Prev: 9-5, Up: Chapter 9 + +A Hierarchy of Tests +==================== + +Now that you've established test functions as first-class citizens, the +question might arise, should test-arithmetic be a test function? As +things stand, it doesn't really matter-if you did define it with +deftest, its binding of *test-name* would be shadowed by the bindings in +test-+ and test-* before any results are reported. + + But now imagine you've got thousands of test cases to organize. The +first level of organization is provided by test functions such as test-+ +and test-* that directly call check. But with thousands of test cases, +you'll likely need other levels of organization. Functions such as +test-arithmetic can group related test functions into test suites. Now +suppose some low-level test functions are called from multiple test +suites. It's not unheard of for a test case to pass in one context but +fail in another. If that happens, you'll probably want to know more +than just what low-level test function contains the test case. + + If you define the test suite functions such as test-arithmetic with +deftest and make a small change to the *test-name* bookkeeping, you can +have results reported with a "fully qualified" path to the test case, +something like this: + + pass ... (TEST-ARITHMETIC TEST-+): (= (+ 1 2) 3) + + Because you've already abstracted the process of defining a test +function, you can change the bookkeeping details without modifying the +code of the test functions. (1) To make *test-name* hold a list of test +function names instead of just the name of the most recently entered +test function, you just need to change this binding form: + + (let ((*test-name* ',name)) + + to the following: + + (let ((*test-name* (append *test-name* (list ',name)))) + + Since APPEND returns a new list made up of the elements of its +arguments, this version will bind *test-name* to a list containing the +old contents of *test-name* with the new name tacked onto the end. (2) +When each test function returns, the old value of *test-name* will be +restored. + + Now you can redefine test-arithmetic with deftest instead of DEFUN. + + (deftest test-arithmetic () + (combine-results + (test-+) + (test-*))) + + The results now show exactly how you got to each test expression. + + CL-USER> (test-arithmetic) + pass ... (TEST-ARITHMETIC TEST-+): (= (+ 1 2) 3) + pass ... (TEST-ARITHMETIC TEST-+): (= (+ 1 2 3) 6) + pass ... (TEST-ARITHMETIC TEST-+): (= (+ -1 -3) -4) + pass ... (TEST-ARITHMETIC TEST-*): (= (* 2 2) 4) + pass ... (TEST-ARITHMETIC TEST-*): (= (* 3 5) 15) + T + + As your test suite grows, you can add new layers of test functions; +as long as they're defined with deftest, the results will be reported +correctly. For instance, the following: + + (deftest test-math () + (test-arithmetic)) + + would generate these results: + + CL-USER> (test-math) + pass ... (TEST-MATH TEST-ARITHMETIC TEST-+): (= (+ 1 2) 3) + pass ... (TEST-MATH TEST-ARITHMETIC TEST-+): (= (+ 1 2 3) 6) + pass ... (TEST-MATH TEST-ARITHMETIC TEST-+): (= (+ -1 -3) -4) + pass ... (TEST-MATH TEST-ARITHMETIC TEST-*): (= (* 2 2) 4) + pass ... (TEST-MATH TEST-ARITHMETIC TEST-*): (= (* 3 5) 15) + T + + ---------- Footnotes ---------- + + (1) Though, again, if the test functions have been compiled, you'll +have to recompile them after changing the macro. + + (2) As you'll see in Chapter 12, APPENDing to the end of a list isn't +the most efficient way to build a list. But for now this is +sufficient-as long as the test hierarchies aren't too deep, it should be +fine. And if it becomes a problem, all you'll have to do is change the +definition of deftest. + + +File: pcl.info, Node: 9-7, Next: Chapter 10, Prev: 9-6, Up: Chapter 9 + +Wrapping Up +=========== + +You could keep going, adding more features to this test framework. But +as a framework for writing tests with a minimum of busywork and easily +running them from the REPL, this is a reasonable start. Here's the +complete code, all 26 lines of it: + + (defvar *test-name* nil) + + (defmacro deftest (name parameters &body body) + "Define a test function. Within a test function we can call + other test functions or use 'check' to run individual test + cases." + `(defun ,name ,parameters + (let ((*test-name* (append *test-name* (list ',name)))) + ,@body))) + + (defmacro check (&body forms) + "Run each expression in 'forms' as a test case." + `(combine-results + ,@(loop for f in forms collect `(report-result ,f ',f)))) + + (defmacro combine-results (&body forms) + "Combine the results (as booleans) of evaluating 'forms' in order." + (with-gensyms (result) + `(let ((,result t)) + ,@(loop for f in forms collect `(unless ,f (setf ,result nil))) + ,result))) + + (defun report-result (result form) + "Report the results of a single test case. Called by 'check'." + (format t "~:[FAIL~;pass~] ... ~a: ~a~%" result *test-name* form) + result) + + It's worth reviewing how you got here because it's illustrative of +how programming in Lisp often goes. + + You started by defining a simple version of your problem-how to +evaluate a bunch of boolean expressions and find out if they all +returned true. Just ANDing them together worked and was syntactically +clean but revealed the need for better result reporting. So you wrote +some really simpleminded code, chock-full of duplication and error-prone +idioms that reported the results the way you wanted. + + The next step was to see if you could refactor the second version +into something as clean as the former. You started with a standard +refactoring technique of extracting some code into a function, +report-result. Unfortunately, you could see that using report-result +was going to be tedious and error-prone since you had to pass the test +expression twice, once for the value and once as quoted data. So you +wrote the check macro to automate the details of calling report-result +correctly. + + While writing check, you realized as long as you were generating +code, you could make a single call to check to generate multiple calls +to report-result, getting you back to a version of test-+ about as +concise as the original AND version. + + At that point you had the check API nailed down, which allowed you to +start mucking with how it worked on the inside. The next task was to +fix check so the code it generated would return a boolean indicating +whether all the test cases had passed. Rather than immediately hacking +away at check, you paused to indulge in a little language design by +fantasy. What if-you fantasized-there was already a +non-short-circuiting AND construct. Then fixing check would be trivial. +Returning from fantasyland you realized there was no such construct but +that you could write one in a few lines. After writing combine-results, +the fix to check was indeed trivial. + + At that point all that was left was to make a few more improvements +to the way you reported test results. Once you started making changes +to the test functions, you realized those functions represented a +special category of function that deserved its own abstraction. So you +wrote deftest to abstract the pattern of code that turns a regular +function into a test function. + + With deftest providing an abstraction barrier between the test +definitions and the underlying machinery, you were able to enhance the +result reporting without touching the test functions. + + Now, with the basics of functions, variables, and macros mastered, +and a little practical experience using them, you're ready to start +exploring Common Lisp's rich standard library of functions and data +types. + + +File: pcl.info, Node: Chapter 10, Next: Chapter 11, Prev: Chapter 9, Up: Top + +10. Numbers, Characters, and Strings +==================================== + +While functions, variables, macros, and 25 special operators provide the +basic building blocks of the language itself, the building blocks of +your programs will be the data structures you use. As Fred Brooks +observed in The Mythical Man-Month, "Representation is the essence of +programming." (1) + + Common Lisp provides built-in support for most of the data types +typically found in modern languages: numbers (integer, floating point, +and complex), characters, strings, arrays (including multidimensional +arrays), lists, hash tables, input and output streams, and an +abstraction for portably representing filenames. Functions are also a +first-class data type in Lisp-they can be stored in variables, passed as +arguments, returned as return values, and created at runtime. + + And these built-in types are just the beginning. They're defined in +the language standard so programmers can count on them being available +and because they tend to be easier to implement efficiently when tightly +integrated with the rest of the implementation. But, as you'll see in +later chapters, Common Lisp also provides several ways for you to define +new data types, define operations on them, and integrate them with the +built-in data types. + + For now, however, you can start with the built-in data types. +Because Lisp is a high-level language, the details of exactly how +different data types are implemented are largely hidden. From your +point of view as a user of the language, the built-in data types are +defined by the functions that operate on them. So to learn a data type, +you just have to learn about the functions you can use with it. +Additionally, most of the built-in data types have a special syntax that +the Lisp reader understands and that the Lisp printer uses. That's why, +for instance, you can write strings as "foo"; numbers as 123, 1/23, and +1.23; and lists as (a b c). I'll describe the syntax for different +kinds of objects when I describe the functions for manipulating them. + + In this chapter, I'll cover the built-in "scalar" data types: +numbers, characters, and strings. Technically, strings aren't true +scalars-a string is a sequence of characters, and you can access +individual characters and manipulate strings with a function that +operates on sequences. But I'll discuss strings here because most of +the string-specific functions manipulate them as single values and also +because of the close relation between several of the string functions +and their character counterparts. + +* Menu: + +* 10-1:: Numbers +* 10-2:: Numeric Literals +* 10-3:: Basic Math +* 10-4:: Numeric Comparisons +* 10-5:: Higher Math +* 10-6:: Characters +* 10-7:: Character Comparisons +* 10-8:: Strings +* 10-9:: String Comparisons + + ---------- Footnotes ---------- + + (1) Fred Brooks, The Mythical Man-Month, 20th Anniversary Edition +(Boston: Addison-Wesley, 1995), p. 103. Emphasis in original. + + +File: pcl.info, Node: 10-1, Next: 10-2, Prev: Chapter 10, Up: Chapter 10 + +Numbers +======= + +Math, as Barbie says, is hard. (1) Common Lisp can't make the math part +any easier, but it does tend to get in the way a lot less than other +programming languages. That's not surprising given its mathematical +heritage. Lisp was originally designed by a mathematician as a tool for +studying mathematical functions. And one of the main projects of the +MAC project at MIT was the Macsyma symbolic algebra system, written in +Maclisp, one of Common Lisp's immediate predecessors. Additionally, +Lisp has been used as a teaching language at places such as MIT where +even the computer science professors cringe at the thought of telling +their students that 10/4 = 2, leading to Lisp's support for exact +ratios. And at various times Lisp has been called upon to compete with +FORTRAN in the high-performance numeric computing arena. + + One of the reasons Lisp is a nice language for math is its numbers +behave more like true mathematical numbers than the approximations of +numbers that are easy to implement in finite computer hardware. For +instance, integers in Common Lisp can be almost arbitrarily large rather +than being limited by the size of a machine word. (2) And dividing two +integers results in an exact ratio, not a truncated value. And since +ratios are represented as pairs of arbitrarily sized integers, ratios +can represent arbitrarily precise fractions. (3) + + On the other hand, for high-performance numeric programming, you may +be willing to trade the exactitude of rationals for the speed offered by +using the hardware's underlying floating-point operations. So, Common +Lisp also offers several types of floating-point numbers, which are +mapped by the implementation to the appropriate hardware-supported +floating-point representations. (4) Floats are also used to represent +the results of a computation whose true mathematical value would be an +irrational number. + + Finally, Common Lisp supports complex numbers-the numbers that result +from doing things such as taking square roots and logarithms of negative +numbers. The Common Lisp standard even goes so far as to specify the +principal values and branch cuts for irrational and transcendental +functions on the complex domain. + + ---------- Footnotes ---------- + + (1) Mattel's Teen Talk Barbie + + (2) Obviously, the size of a number that can be represented on a +computer with finite memory is still limited in practice; furthermore, +the actual representation of bignums used in a particular Common Lisp +implementation may place other limits on the size of number that can be +represented. But these limits are going to be well beyond +"astronomically" large numbers. For instance, the number of atoms in +the universe is estimated to be less than 2^269; current Common Lisp +implementations can easily handle numbers up to and beyond 2^262144. + + (3) Folks interested in using Common Lisp for intensive numeric +computation should note that a naive comparison of the performance of +numeric code in Common Lisp and languages such as C or FORTRAN will +probably show Common Lisp to be much slower. This is because something +as simple as (+ a b) in Common Lisp is doing a lot more than the +seemingly equivalent a + b in one of those languages. Because of Lisp's +dynamic typing and support for things such as arbitrary precision +rationals and complex numbers, a seemingly simple addition is doing a +lot more than an addition of two numbers that are known to be +represented by machine words. However, you can use declarations to give +Common Lisp information about the types of numbers you're using that +will enable it to generate code that does only as much work as the code +that would be generated by a C or FORTRAN compiler. Tuning numeric code +for this kind of performance is beyond the scope of this book, but it's +certainly possible. + + (4) While the standard doesn't require it, many Common Lisp +implementations support the IEEE standard for floating-point arithmetic, +IEEE Standard for Binary Floating-Point Arithmetic, ANSI/ IEEE Std +754-1985 (Institute of Electrical and Electronics Engineers, 1985). + + +File: pcl.info, Node: 10-2, Next: 10-3, Prev: 10-1, Up: Chapter 10 + +Numeric Literals +================ + +You can write numeric literals in a variety of ways; you saw a few +examples in Chapter 4. However, it's important to keep in mind the +division of labor between the Lisp reader and the Lisp evaluator-the +reader is responsible for translating text into Lisp objects, and the +Lisp evaluator then deals only with those objects. For a given number +of a given type, there can be many different textual representations, +all of which will be translated to the same object representation by the +Lisp reader. For instance, you can write the integer 10 as 10, 20/2, +#xA, or any of a number of other ways, but the reader will translate all +these to the same object. When numbers are printed back out-say, at the +REPL-they're printed in a canonical textual syntax that may be different +from the syntax used to enter the number. For example: + + CL-USER> 10 + 10 + CL-USER> 20/2 + 10 + CL-USER> #xa + 10 + + The syntax for integer values is an optional sign (+ or -) followed +by one or more digits. Ratios are written as an optional sign and a +sequence of digits, representing the numerator, a slash (/), and another +sequence of digits representing the denominator. All rational numbers +are "canonicalized" as they're read-that's why 10 and 20/2 are both read +as the same number, as are 3/4 and 6/8. Rationals are printed in +"reduced" form-integer values are printed in integer syntax and ratios +with the numerator and denominator reduced to lowest terms. + + It's also possible to write rationals in bases other than 10. If +preceded by #B or #b, a rational literal is read as a binary number with +0 and 1 as the only legal digits. An #O or #o indicates an octal number +(legal digits 0-7), and #X or #x indicates hexadecimal (legal digits 0-F +or 0-f). You can write rationals in other bases from 2 to 36 with #nR +where n is the base (always written in decimal). Additional "digits" +beyond 9 are taken from the letters A-Z or a-z. Note that these radix +indicators apply to the whole rational-it's not possible to write a +ratio with the numerator in one base and denominator in another. Also, +you can write integer values, but not ratios, as decimal digits +terminated with a decimal point. (1) Some examples of rationals, with +their canonical, decimal representation are as follows: + + 123 ==> 123 + +123 ==> 123 + -123 ==> -123 + 123. ==> 123 + 2/3 ==> 2/3 + -2/3 ==> -2/3 + 4/6 ==> 2/3 + 6/3 ==> 2 + #b10101 ==> 21 + #b1010/1011 ==> 10/11 + #o777 ==> 511 + #xDADA ==> 56026 + #36rABCDEFGHIJKLMNOPQRSTUVWXYZ ==> 8337503854730415241050377135811259267835 + + You can also write floating-point numbers in a variety of ways. +Unlike rational numbers, the syntax used to notate a floating-point +number can affect the actual type of number read. Common Lisp defines +four subtypes of floating-point number: short, single, double, and long. +Each subtype can use a different number of bits in its representation, +which means each subtype can represent values spanning a different range +and with different precision. More bits gives a wider range and more +precision. (2) + + The basic format for floating-point numbers is an optional sign +followed by a nonempty sequence of decimal digits possibly with an +embedded decimal point. This sequence can be followed by an exponent +marker for "computerized scientific notation." (3) The exponent marker +consists of a single letter followed by an optional sign and a sequence +of digits, which are interpreted as the power of ten by which the number +before the exponent marker should be multiplied. The letter does double +duty: it marks the beginning of the exponent and indicates what +floating- point representation should be used for the number. The +exponent markers s, f, d, l (and their uppercase equivalents) indicate +short, single, double, and long floats, respectively. The letter e +indicates that the default representation (initially single-float) +should be used. + + Numbers with no exponent marker are read in the default +representation and must contain a decimal point followed by at least one +digit to distinguish them from integers. The digits in a floating-point +number are always treated as base 10 digits-the #B, #X, #O, and #R +syntaxes work only with rationals. The following are some example +floating-point numbers along with their canonical representation: + + 1.0 ==> 1.0 + 1e0 ==> 1.0 + 1d0 ==> 1.0d0 + 123.0 ==> 123.0 + 123e0 ==> 123.0 + 0.123 ==> 0.123 + .123 ==> 0.123 + 123e-3 ==> 0.123 + 123E-3 ==> 0.123 + 0.123e20 ==> 1.23e+19 + 123d23 ==> 1.23d+25 + + Finally, complex numbers are written in their own syntax, namely, #C +or #c followed by a list of two real numbers representing the real and +imaginary part of the complex number. There are actually five kinds of +complex numbers because the real and imaginary parts must either both be +rational or both be the same kind of floating-point number. + + But you can write them however you want-if a complex is written with +one rational and one floating-point part, the rational is converted to a +float of the appropriate representation. Similarly, if the real and +imaginary parts are both floats of different representations, the one in +the smaller representation will be upgraded. + + However, no complex numbers have a rational real component and a zero +imaginary part-since such values are, mathematically speaking, rational, +they're represented by the appropriate rational value. The same +mathematical argument could be made for complex numbers with +floating-point components, but for those complex types a number with a +zero imaginary part is always a different object than the floating-point +number representing the real component. Here are some examples of +numbers written the complex number syntax: + + #c(2 1) ==> #c(2 1) + #c(2/3 3/4) ==> #c(2/3 3/4) + #c(2 1.0) ==> #c(2.0 1.0) + #c(2.0 1.0d0) ==> #c(2.0d0 1.0d0) + #c(1/2 1.0) ==> #c(0.5 1.0) + #c(3 0) ==> 3 + #c(3.0 0.0) ==> #c(3.0 0.0) + #c(1/2 0) ==> 1/2 + #c(-6/3 0) ==> -2 + + ---------- Footnotes ---------- + + (1) It's also possible to change the default base the reader uses for +numbers without a specific radix marker by changing the value of the +global variable *READ-BASE*. However, it's not clear that's the path to +anything other than complete insanity. + + (2) Since the purpose of floating-point numbers is to make efficient +use of floating-point hardware, each Lisp implementation is allowed to +map these four subtypes onto the native floating-point types as +appropriate. If the hardware supports fewer than four distinct +representations, one or more of the types may be equivalent. + + (3) "Computerized scientific notation" is in scare quotes because, +while commonly used in computer languages since the days of FORTRAN, +it's actually quite different from real scientific notation. In +particular, something like 1.0e4 means 10000.0, but in true scientific +notation that would be written as 1.0 x 10^4. And to further confuse +matters, in true scientific notation the letter e stands for the base of +the natural logarithm, so something like 1.0 x e^4, while superficially +similar to 1.0e4, is a completely different value, approximately 54.6. + + +File: pcl.info, Node: 10-3, Next: 10-4, Prev: 10-2, Up: Chapter 10 + +Basic Math +========== + +The basic arithmetic operations-addition, subtraction, multiplication, +and division-are supported for all the different kinds of Lisp numbers +with the functions +, -, *, and /. Calling any of these functions with +more than two arguments is equivalent to calling the same function on +the first two arguments and then calling it again on the resulting value +and the rest of the arguments. For example, (+ 1 2 3) is equivalent to +(+ (+ 1 2) 3). With only one argument, + and * return the value; - +returns its negation and / its reciprocal. (1) + + (+ 1 2) ==> 3 + (+ 1 2 3) ==> 6 + (+ 10.0 3.0) ==> 13.0 + (+ #c(1 2) #c(3 4)) ==> #c(4 6) + (- 5 4) ==> 1 + (- 2) ==> -2 + (- 10 3 5) ==> 2 + (* 2 3) ==> 6 + (* 2 3 4) ==> 24 + (/ 10 5) ==> 2 + (/ 10 5 2) ==> 1 + (/ 2 3) ==> 2/3 + (/ 4) ==> 1/4 + + If all the arguments are the same type of number (rational, floating +point, or complex), the result will be the same type except in the case +where the result of an operation on complex numbers with rational +components yields a number with a zero imaginary part, in which case the +result will be a rational. However, floating-point and complex numbers +are contagious-if all the arguments are reals but one or more are +floating-point numbers, the other arguments are converted to the nearest +floating-point value in a "largest" floating-point representation of the +actual floating-point arguments. Floating-point numbers in a "smaller" +representation are also converted to the larger representation. +Similarly, if any of the arguments are complex, any real arguments are +converted to the complex equivalents. + + (+ 1 2.0) ==> 3.0 + (/ 2 3.0) ==> 0.6666667 + (+ #c(1 2) 3) ==> #c(4 2) + (+ #c(1 2) 3/2) ==> #c(5/2 2) + (+ #c(1 1) #c(2 -1)) ==> 3 + + Because / doesn't truncate, Common Lisp provides four flavors of +truncating and rounding for converting a real number (rational or +floating point) to an integer: FLOOR truncates toward negative infinity, +returning the largest integer less than or equal to the argument. +CEILING truncates toward positive infinity, returning the smallest +integer greater than or equal to the argument. TRUNCATE truncates +toward zero, making it equivalent to FLOOR for positive arguments and to +CEILING for negative arguments. And ROUND rounds to the nearest +integer. If the argument is exactly halfway between two integers, it +rounds to the nearest even integer. + + Two related functions are MOD and REM, which return the modulus and +remainder of a truncating division on real numbers. These two functions +are related to the FLOOR and TRUNCATE functions as follows: + + (+ (* (floor (/ x y)) y) (mod x y)) === x + (+ (* (truncate (/ x y)) y) (rem x y)) === x + + Thus, for positive quotients they're equivalent, but for negative +quotients they produce different results. (2) + + The functions 1+ and 1- provide a shorthand way to express adding and +subtracting one from a number. Note that these are different from the +macros INCF and DECF. 1+ and 1- are just functions that return a new +value, but INCF and DECF modify a place. The following equivalences +show the relation between INCF/DECF, 1+/1-, and +/-: + + (incf x) === (setf x (1+ x)) === (setf x (+ x 1)) + (decf x) === (setf x (1- x)) === (setf x (- x 1)) + (incf x 10) === (setf x (+ x 10)) + (decf x 10) === (setf x (- x 10)) + + ---------- Footnotes ---------- + + (1) For mathematical consistency, + and * can also be called with no +arguments, in which case they return the appropriate identity: 0 for + +and 1 for *. + + (2) Roughly speaking, MOD is equivalent to the % operator in Perl and +Python, and REM is equivalent to the % in C and Java. (Technically, the +exact behavior of % in C wasn't specified until the C99 standard.) + + +File: pcl.info, Node: 10-4, Next: 10-5, Prev: 10-3, Up: Chapter 10 + +Numeric Comparisons +=================== + +The function = is the numeric equality predicate. It compares numbers +by mathematical value, ignoring differences in type. Thus, = will +consider mathematically equivalent values of different types equivalent +while the generic equality predicate EQL would consider them +inequivalent because of the difference in type. (The generic equality +predicate EQUALP, however, uses = to compare numbers.) If it's called +with more than two arguments, it returns true only if they all have the +same value. Thus: + + (= 1 1) ==> T + (= 10 20/2) ==> T + (= 1 1.0 #c(1.0 0.0) #c(1 0)) ==> T + + The /= function, conversely, returns true only if all its arguments +are different values. + + (/= 1 1) ==> NIL + (/= 1 2) ==> T + (/= 1 2 3) ==> T + (/= 1 2 3 1) ==> NIL + (/= 1 2 3 1.0) ==> NIL + + The functions <, >, <=, and >= order rationals and floating-point +numbers (in other words, the real numbers.) Like = and /=, these +functions can be called with more than two arguments, in which case each +argument is compared to the argument to its right. + + (< 2 3) ==> T + (> 2 3) ==> NIL + (> 3 2) ==> T + (< 2 3 4) ==> T + (< 2 3 3) ==> NIL + (<= 2 3 3) ==> T + (<= 2 3 3 4) ==> T + (<= 2 3 4 3) ==> NIL + + To pick out the smallest or largest of several numbers, you can use +the function MIN or MAX, which takes any number of real number arguments +and returns the minimum or maximum value. + + (max 10 11) ==> 11 + (min -12 -10) ==> -12 + (max -1 2 -3) ==> 2 + + Some other handy functions are ZEROP, MINUSP, and PLUSP, which test +whether a single real number is equal to, less than, or greater than +zero. Two other predicates, EVENP and ODDP, test whether a single +integer argument is even or odd. The P suffix on the names of these +functions is a standard naming convention for predicate functions, +functions that test some condition and return a boolean. + + +File: pcl.info, Node: 10-5, Next: 10-6, Prev: 10-4, Up: Chapter 10 + +Higher Math +=========== + +The functions you've seen so far are the beginning of the built-in +mathematical functions. Lisp also supports logarithms: LOG; +exponentiation: EXP and EXPT; the basic trigonometric functions: SIN, +COS, and TAN; their inverses: ASIN, ACOS, and ATAN; hyperbolic +functions: SINH, COSH, and TANH; and their inverses: ASINH, ACOSH, and +ATANH. It also provides functions to get at the individual bits of an +integer and to extract the parts of a ratio or a complex number. For a +complete list, see any Common Lisp reference. + + +File: pcl.info, Node: 10-6, Next: 10-7, Prev: 10-5, Up: Chapter 10 + +Characters +========== + +Common Lisp characters are a distinct type of object from numbers. +That's as it should be-characters are not numbers, and languages that +treat them as if they are tend to run into problems when character +encodings change, say, from 8-bit ASCII to 21-bit Unicode. (1) Because +the Common Lisp standard didn't mandate a particular representation for +characters, today several Lisp implementations use Unicode as their +"native" character encoding despite Unicode being only a gleam in a +standards body's eye at the time Common Lisp's own standardization was +being wrapped up. + + The read syntax for characters objects is simple: #\ followed by the +desired character. Thus, #\x is the character x. Any character can be +used after the #\, including otherwise special characters such as ", (, +and whitespace. However, writing whitespace characters this way isn't +very (human) readable; an alternative syntax for certain characters is +#\ followed by the character's name. Exactly what names are supported +depends on the character set and on the Lisp implementation, but all +implementations support the names Space and Newline. Thus, you should +write #\Space instead of #\ , though the latter is technically legal. +Other semistandard names (that implementations must use if the character +set has the appropriate characters) are Tab, Page, Rubout, Linefeed, +Return, and Backspace. + + ---------- Footnotes ---------- + + (1) Even Java, which was designed from the beginning to use Unicode +characters on the theory that Unicode was the going to be the character +encoding of the future, has run into trouble since Java characters are +defined to be a 16-bit quantity and the Unicode 3.1 standard extended +the range of the Unicode character set to require a 21-bit +representation. Ooops. + + +File: pcl.info, Node: 10-7, Next: 10-8, Prev: 10-6, Up: Chapter 10 + +Character Comparisons +===================== + +The main thing you can do with characters, other than putting them into +strings (which I'll get to later in this chapter), is to compare them +with other characters. Since characters aren't numbers, you can't use +the numeric comparison functions, such as < and >. Instead, two sets of +functions provide character-specific analogs to the numeric comparators; +one set is case-sensitive and the other case-insensitive. + + The case-sensitive analog to the numeric = is the function CHAR=. +Like =, CHAR= can take any number of arguments and returns true only if +they're all the same character. The case- insensitive version is +CHAR-EQUAL. + + The rest of the character comparators follow this same naming scheme: +the case-sensitive comparators are named by prepending the analogous +numeric comparator with CHAR; the case-insensitive versions spell out +the comparator name, separated from the CHAR with a hyphen. Note, +however, that <= and >= are "spelled out" with the logical equivalents +NOT-GREATERP and NOT-LESSP rather than the more verbose LESSP-OR-EQUALP +and GREATERP-OR-EQUALP. Like their numeric counterparts, all these +functions can take one or more arguments. Table 10-1 summarizes the +relation between the numeric and character comparison functions. + + Table 10-1. Character Comparison Functions + +[image src="tables/10-1.png"] + + Other functions that deal with characters provide functions for, +among other things, testing whether a given character is alphabetic or a +digit character, testing the case of a character, obtaining a +corresponding character in a different case, and translating between +numeric values representing character codes and actual character +objects. Again, for complete details, see your favorite Common Lisp +reference. + + +File: pcl.info, Node: 10-8, Next: 10-9, Prev: 10-7, Up: Chapter 10 + +Strings +======= + +As mentioned earlier, strings in Common Lisp are really a composite data +type, namely, a one-dimensional array of characters. Consequently, I'll +cover many of the things you can do with strings in the next chapter +when I discuss the many functions for manipulating sequences, of which +strings are just one type. But strings also have their own literal +syntax and a library of functions for performing string-specific +operations. I'll discuss these aspects of strings in this chapter and +leave the others for Chapter 11. + + As you've seen, literal strings are written enclosed in double +quotes. You can include any character supported by the character set in +a literal string except double quote (") and backslash (\). And you can +include these two as well if you escape them with a backslash. In fact, +backslash always escapes the next character, whatever it is, though this +isn't necessary for any character except for " and itself. Table 10-2 +shows how various literal strings will be read by the Lisp reader. + + Table 10-2. Literal Strings + +[image src="tables/10-2.png"] + + Note that the REPL will ordinarily print strings in readable form, +adding the enclosing quotation marks and any necessary escaping +backslashes, so if you want to see the actual contents of a string, you +need to use function such as FORMAT designed to print human-readable +output. For example, here's what you see if you type a string +containing an embedded quotation mark at the REPL: + + CL-USER> "foo\"bar" + "foo\"bar" + + FORMAT, on the other hand, will show you the actual string contents: +(1) + + CL-USER> (format t "foo\"bar") + foo"bar + NIL + + ---------- Footnotes ---------- + + (1) Note, however, that not all literal strings can be printed by +passing them as the second argument to FORMAT since certain sequences of +characters have a special meaning to FORMAT. To safely print an +arbitrary string-say, the value of a variable s-with FORMAT you should +write (format t "~a" s). + + +File: pcl.info, Node: 10-9, Next: Chapter 11, Prev: 10-8, Up: Chapter 10 + +String Comparisons +================== + +You can compare strings using a set of functions that follow the same +naming convention as the character comparison functions except with +STRING as the prefix rather than CHAR (see Table 10-3). + + Table 10-3. String Comparison Functions + +[image src="tables/10-3.png"] + + However, unlike the character and number comparators, the string +comparators can compare only two strings. That's because they also take +keyword arguments that allow you to restrict the comparison to a +substring of either or both strings. The arguments-:start1, :end1, +:start2, and :end2-specify the starting (inclusive) and ending +(exclusive) indices of substrings in the first and second string +arguments. Thus, the following: + + (string= "foobarbaz" "quuxbarfoo" :start1 3 :end1 6 :start2 4 :end2 7) + + compares the substring "bar" in the two arguments and returns true. +The :end1 and :end2 arguments can be NIL (or the keyword argument +omitted altogether) to indicate that the corresponding substring extends +to the end of the string. + + The comparators that return true when their arguments differ-that is, +all of them except STRING= and STRING-EQUAL-return the index in the +first string where the mismatch was detected. + + (string/= "lisp" "lissome") ==> 3 + + If the first string is a prefix of the second, the return value will +be the length of the first string, that is, one greater than the largest +valid index into the string. + + (string< "lisp" "lisper") ==> 4 + + When comparing substrings, the resulting value is still an index into +the string as a whole. For instance, the following compares the +substrings "bar" and "baz" but returns 5 because that's the index of the +r in the first string: + + (string< "foobar" "abaz" :start1 3 :start2 1) ==> 5 ; N.B. not 2 + + Other string functions allow you to convert the case of strings and +trim characters from one or both ends of a string. And, as I mentioned +previously, since strings are really a kind of sequence, all the +sequence functions I'll discuss in the next chapter can be used with +strings. For instance, you can discover the length of a string with the +LENGTH function and can get and set individual characters of a string +with the generic sequence element accessor function, ELT, or the generic +array element accessor function, AREF. Or you can use the +string-specific accessor, CHAR. But those functions, and others, are the +topic of the next chapter, so let's move on. + + +File: pcl.info, Node: Chapter 11, Next: Chapter 12, Prev: Chapter 10, Up: Top + +11. Collections +=============== + +Like most programming languages, Common Lisp provides standard data +types that collect multiple values into a single object. Every language +slices up the collection problem a little bit differently, but the basic +collection types usually boil down to an integer-indexed array type and +a table type that can be used to map more or less arbitrary keys to +values. The former are variously called ‘arrays’, ‘lists’, or +‘tuples’; the latter go by the names ‘hash tables’, +‘associative arrays’, ‘maps’, and ‘dictionaries’. + +Lisp is, of course, famous for its list data structure, and most Lisp +books, following the ontogeny-recapitulates-phylogeny principle of +language instruction, start their discussion of Lisp's collections with +lists. However, that approach often leads readers to the mistaken +conclusion that lists are Lisp's ‘only’ collection type. To make +matters worse, because Lisp's lists are such a flexible data structure, +it ‘is’ possible to use them for many of the things arrays and hash +tables are used for in other languages. But it's a mistake to focus too +much on lists; while they're a crucial data structure for representing +Lisp code as Lisp data, in many situations other data structures are +more appropriate. + +To keep lists from stealing the show, in this chapter I'll focus on +Common Lisp's other collection types: vectors and hash tables. (1) +However, vectors and lists share enough characteristics that Common Lisp +treats them both as subtypes of a more general abstraction, the +sequence. Thus, you can use many of the functions I'll discuss in this +chapter with both vectors and lists. + +* Menu: + +* 11-1:: Vectors +* 11-2:: Subtypes of Vector +* 11-3:: Vectors As Sequences +* 11-4:: Sequence Iterating Functions +* 11-5:: Higher-Order Function Variants +* 11-6:: Whole Sequence Manipulations +* 11-7:: Sorting and Merging +* 11-8:: Subsequence Manipulations +* 11-9:: Sequence Predicates +* 11-10:: Sequence Mapping Functions +* 11-11:: Hash Tables +* 11-12:: Hash Table Iteration + + ---------- Footnotes ---------- + + (1) Once you're familiar with all the data types Common Lisp offers, +you'll also see that lists can be useful for prototyping data structures +that will later be replaced with something more efficient once it +becomes clear how exactly the data is to be used. + + +File: pcl.info, Node: 11-1, Next: 11-2, Prev: Chapter 11, Up: Chapter 11 + +Vectors +======= + +Vectors are Common Lisp's basic integer-indexed collection, and they +come in two flavors. Fixed-size vectors are a lot like arrays in a +language such as Java: a thin veneer over a chunk of contiguous memory +that holds the vector's elements. (1) Resizable vectors, on the other +hand, are more like arrays in Perl or Ruby, lists in Python, or the +ArrayList class in Java: they abstract the actual storage, allowing the +vector to grow and shrink as elements are added and removed. + +You can make fixed-size vectors containing specific values with the +function ‘VECTOR’, which takes any number of arguments and returns a +freshly allocated fixed-size vector containing those arguments. + + (vector) ==> #() + (vector 1) ==> #(1) + (vector 1 2) ==> #(1 2) + +The #(...) syntax is the literal notation for vectors used by the Lisp +printer and reader. This syntax allows you to save and restore vectors +by ‘PRINT’ing them out and ‘READ’ing them back in. You can use +the #(...) syntax to include literal vectors in your code, but as the +effects of modifying literal objects aren't defined, you should always +use ‘VECTOR’ or the more general function ‘MAKE-ARRAY’ to create +vectors you plan to modify. + +‘MAKE-ARRAY’ is more general than ‘VECTOR’ since you can use it +to create arrays of any dimensionality as well as both fixed-size and +resizable vectors. The one required argument to ‘MAKE-ARRAY’ is a +list containing the dimensions of the array. Since a vector is a +one-dimensional array, this list will contain one number, the size of +the vector. As a convenience, ‘MAKE-ARRAY’ will also accept a plain +number in the place of a one-item list. With no other arguments, +‘MAKE-ARRAY’ will create a vector with uninitialized elements that +must be set before they can be accessed. (2) To create a vector with +the elements all set to a particular value, you can pass an +‘:initial-element’ argument. Thus, to make a five-element vector +with its elements initialized to ‘NIL’, you can write the following: + + (make-array 5 :initial-element nil) ==> #(NIL NIL NIL NIL NIL) + +‘MAKE-ARRAY’ is also the function to use to make a resizable vector. +A resizable vector is a slightly more complicated object than a +fixed-size vector; in addition to keeping track of the memory used to +hold the elements and the number of slots available, a resizable vector +also keeps track of the number of elements actually stored in the +vector. This number is stored in the vector's ‘fill pointer’, so +called because it's the index of the next position to be filled when you +add an element to the vector. + +To make a vector with a fill pointer, you pass ‘MAKE-ARRAY’ a +‘:fill-pointer’ argument. For instance, the following call to +‘MAKE-ARRAY’ makes a vector with room for five elements; but it +looks empty because the fill pointer is zero: + + (make-array 5 :fill-pointer 0) ==> #() + +To add an element to the end of a resizable vector, you can use the +function ‘VECTOR-PUSH’. It adds the element at the current value of +the fill pointer and then increments the fill pointer by one, returning +the index where the new element was added. The function +‘VECTOR-POP’ returns the most recently pushed item, decrementing the +fill pointer in the process. + + (defparameter *x* (make-array 5 :fill-pointer 0)) + + (vector-push 'a *x*) ==> 0 + *x* ==> #(A) + (vector-push 'b *x*) ==> 1 + *x* ==> #(A B) + (vector-push 'c *x*) ==> 2 + *x* ==> #(A B C) + (vector-pop *x*) ==> C + *x* ==> #(A B) + (vector-pop *x*) ==> B + *x* ==> #(A) + (vector-pop *x*) ==> A + *x* ==> #() + +However, even a vector with a fill pointer isn't completely resizable. +The vector ‘*x*’ can hold at most five elements. To make an +arbitrarily resizable vector, you need to pass ‘MAKE-ARRAY’ another +keyword argument: ‘:adjustable’. + + (make-array 5 :fill-pointer 0 :adjustable t) ==> #() + +This call makes an ‘adjustable’ vector whose underlying memory can +be resized as needed. To add elements to an adjustable vector, you use +‘VECTOR-PUSH-EXTEND’, which works just like ‘VECTOR-PUSH’ except +it will automatically expand the array if you try to push an element +onto a full vector-one whose fill pointer is equal to the size of the +underlying storage. (3) + + ---------- Footnotes ---------- + + (1) Vectors are called ‘vectors’, not ‘arrays’ as their +analogs in other languages are, because Common Lisp supports true +multidimensional arrays. It's equally correct, though more cumbersome, +to refer to them as ‘one-dimensional arrays’. + + (2) Array elements "must" be set before they're accessed in the sense +that the behavior is undefined; Lisp won't necessarily stop you. + + (3) While frequently used together, the ‘:fill-pointer’ and +‘:adjustable’ arguments are independent-you can make an adjustable +array without a fill pointer. However, you can use ‘VECTOR-PUSH’ +and ‘VECTOR-POP’ only with vectors that have a fill pointer and +‘VECTOR-PUSH-EXTEND’ only with vectors that have a fill pointer and +are adjustable. You can also use the function ‘ADJUST-ARRAY’ to +modify adjustable arrays in a variety of ways beyond just extending the +length of a vector. + + +File: pcl.info, Node: 11-2, Next: 11-3, Prev: 11-1, Up: Chapter 11 + +Subtypes of Vector +================== + +All the vectors you've dealt with so far have been ‘general’ vectors +that can hold any type of object. It's also possible to create +‘specialized’ vectors that are restricted to holding certain types +of elements. One reason to use specialized vectors is they may be +stored more compactly and can provide slightly faster access to their +elements than general vectors. However, for the moment let's focus on a +couple kinds of specialized vectors that are important data types in +their own right. + +One of these you've seen already-strings are vectors specialized to hold +characters. Strings are important enough to get their own read/print +syntax (double quotes) and the set of string-specific functions I +discussed in the previous chapter. But because they're also vectors, +all the functions I'll discuss in the next few sections that take vector +arguments can also be used with strings. These functions will fill out +the string library with functions for things such as searching a string +for a substring, finding occurrences of a character within a string, and +more. + +Literal strings, such as "foo", are like literal vectors written with +the #() syntax-their size is fixed, and they must not be modified. +However, you can use ‘MAKE-ARRAY’ to make resizable strings by +adding another keyword argument, ‘:element-type’. This argument +takes a ‘type’ descriptor. I won't discuss all the possible type +descriptors you can use here; for now it's enough to know you can create +a string by passing the symbol ‘CHARACTER’ as the +‘:element-type’ argument. Note that you need to quote the symbol to +prevent it from being treated as a variable name. For example, to make +an initially empty but resizable string, you can write this: + + (make-array 5 :fill-pointer 0 :adjustable t :element-type 'character) "" + +Bit vectors-vectors whose elements are all zeros or ones-also get some +special treatment. They have a special read/print syntax that looks +like #*00001111 and a fairly large library of functions, which I won't +discuss, for performing bit-twiddling operations such as "anding" +together two bit arrays. The type descriptor to pass as the +‘:element-type’ to create a bit vector is the symbol ‘BIT’. + + +File: pcl.info, Node: 11-3, Next: 11-4, Prev: 11-2, Up: Chapter 11 + +Vectors As Sequences +==================== + +As mentioned earlier, vectors and lists are the two concrete subtypes of +the abstract type ‘sequence’. All the functions I'll discuss in the +next few sections are sequence functions; in addition to being +applicable to vectors-both general and specialized-they can also be used +with lists. + +The two most basic sequence functions are ‘LENGTH’, which returns +the length of a sequence, and ‘ELT’, which allows you to access +individual elements via an integer index. ‘LENGTH’ takes a sequence +as its only argument and returns the number of elements it contains. +For vectors with a fill pointer, this will be the value of the fill +pointer. ‘ELT’, short for ‘element’, takes a sequence and an +integer index between zero (inclusive) and the length of the sequence +(exclusive) and returns the corresponding element. ‘ELT’ will +signal an error if the index is out of bounds. Like ‘LENGTH’, +‘ELT’ treats a vector with a fill pointer as having the length +specified by the fill pointer. + + (defparameter *x* (vector 1 2 3)) + + (length *x*) ==> 3 + (elt *x* 0) ==> 1 + (elt *x* 1) ==> 2 + (elt *x* 2) ==> 3 + (elt *x* 3) ==> error + +‘ELT’ is also a ‘SETF’able place, so you can set the value of a +particular element like this: + + (setf (elt *x* 0) 10) + + *x* ==> #(10 2 3) + + +File: pcl.info, Node: 11-4, Next: 11-5, Prev: 11-3, Up: Chapter 11 + +Sequence Iterating Functions +============================ + +While in theory all operations on sequences boil down to some +combination of ‘LENGTH’, ‘ELT’, and ‘SETF’ of ‘ELT’ +operations, Common Lisp provides a large library of sequence functions. + +One group of sequence functions allows you to express certain operations +on sequences such as finding or filtering specific elements without +writing explicit loops. Table 11-1 summarizes them. + +‘Table 11-1. Basic Sequence Functions’ + +[image src="tables/11-1.png"] + +Here are some simple examples of how to use these functions: + + (count 1 #(1 2 1 2 3 1 2 3 4)) ==> 3 + (remove 1 #(1 2 1 2 3 1 2 3 4)) ==> #(2 2 3 2 3 4) + (remove 1 '(1 2 1 2 3 1 2 3 4)) ==> (2 2 3 2 3 4) + (remove #\a "foobarbaz") ==> "foobrbz" + (substitute 10 1 #(1 2 1 2 3 1 2 3 4)) ==> #(10 2 10 2 3 10 2 3 4) + (substitute 10 1 '(1 2 1 2 3 1 2 3 4)) ==> (10 2 10 2 3 10 2 3 4) + (substitute #\x #\b "foobarbaz") ==> "fooxarxaz" + (find 1 #(1 2 1 2 3 1 2 3 4)) ==> 1 + (find 10 #(1 2 1 2 3 1 2 3 4)) ==> NIL + (position 1 #(1 2 1 2 3 1 2 3 4)) ==> 0 + +Note how ‘REMOVE’ and ‘SUBSTITUTE’ always return a sequence of +the same type as their sequence argument. + +You can modify the behavior of these five functions in a variety of ways +using keyword arguments. For instance, these functions, by default, +look for elements in the sequence that are the same object as the item +argument. You can change this in two ways: First, you can use the +‘:test’ keyword to pass a function that accepts two arguments and +returns a boolean. If provided, it will be used to compare ‘item’ +to each element instead of the default object equality test, ‘EQL’. +(1) Second, with the ‘:key’ keyword you can pass a one-argument +function to be called on each element of the sequence to extract a +‘key’ value, which will then be compared to the item in the place of +the element itself. Note, however, that functions such as ‘FIND’ +that return elements of the sequence continue to return the actual +element, not just the extracted key. + + (count "foo" #("foo" "bar" "baz") :test #'string=) ==> 1 + (find 'c #((a 10) (b 20) (c 30) (d 40)) :key #'first) ==> (C 30) + +To limit the effects of these functions to a particular subsequence of +the sequence argument, you can provide bounding indices with +‘:start’ and ‘:end’ arguments. Passing ‘NIL’ for ‘:end’ +or omitting it is the same as specifying the length of the sequence. +(2) + +If a non-‘NIL’ ‘:from-end’ argument is provided, then the +elements of the sequence will be examined in reverse order. By itself +‘:from-end’ can affect the results of only ‘FIND’ and +‘POSITION’. For instance: + + (find 'a #((a 10) (b 20) (a 30) (b 40)) :key #'first) ==> (A 10) + (find 'a #((a 10) (b 20) (a 30) (b 40)) :key #'first :from-end t) ==> (A 30) + +However, the ‘:from-end’ argument can affect ‘REMOVE’ and +‘SUBSTITUTE’ in conjunction with another keyword parameter, +‘:count’, that's used to specify how many elements to remove or +substitute. If you specify a ‘:count’ lower than the number of +matching elements, then it obviously matters which end you start from: + + (remove #\a "foobarbaz" :count 1) ==> "foobrbaz" + (remove #\a "foobarbaz" :count 1 :from-end t) ==> "foobarbz" + +And while ‘:from-end’ can't change the results of the ‘COUNT’ +function, it does affect the order the elements are passed to any +‘:test’ and ‘:key’ functions, which could possibly have side +effects. For example: + + CL-USER> (defparameter *v* #((a 10) (b 20) (a 30) (b 40))) + *V* + CL-USER> (defun verbose-first (x) (format t "Looking at ~s~%" x) (first x)) + VERBOSE-FIRST + CL-USER> (count 'a *v* :key #'verbose-first) + Looking at (A 10) + Looking at (B 20) + Looking at (A 30) + Looking at (B 40) + 2 + CL-USER> (count 'a *v* :key #'verbose-first :from-end t) + Looking at (B 40) + Looking at (A 30) + Looking at (B 20) + Looking at (A 10) + 2 + +Table 11-2 summarizes these arguments. + +‘Table 11-2. Standard Sequence Function Keyword Arguments’ + +[image src="tables/11-2.png"] + + ---------- Footnotes ---------- + + (1) Another parameter, ‘:test-not’ parameter, specifies a +two-argument predicate to be used like a ‘:test’ argument except +with the boolean result logically reversed. This parameter is +deprecated, however, in preference for using the ‘COMPLEMENT’ +function. ‘COMPLEMENT’ takes a function argu-ment and returns a +function that takes the same number of arguments as the original and +returns the logical complement of the original function. Thus, you can, +and should, write this: + + (count x sequence :test (complement #'some-test)) + + rather than the following: + + (count x sequence :test-not #'some-test) + + (2) Note, however, that the effect of ‘:start’ and ‘:end’ on +‘REMOVE’ and ‘SUBSTITUTE’ is only to limit the elements they +consider for removal or substitution; elements before ‘:start’ and +after ‘:end’ will be passed through untouched. + + +File: pcl.info, Node: 11-5, Next: 11-6, Prev: 11-4, Up: Chapter 11 + +Higher-Order Function Variants +============================== + +For each of the functions just discussed, Common Lisp provides two +‘higher-order function’ variants that, in the place of the item +argument, take a function to be called on each element of the sequence. +One set of variants are named the same as the basic function with an -IF +appended. These functions count, find, remove, and substitute elements +of the sequence for which the function argument returns true. The other +set of variants are named with an -IF-NOT suffix and count, find, +remove, and substitute elements for which the function argument does +‘not’ return true. + + (count-if #'evenp #(1 2 3 4 5)) ==> 2 + + (count-if-not #'evenp #(1 2 3 4 5)) ==> 3 + + (position-if #'digit-char-p "abcd0001") ==> 4 + + (remove-if-not #'(lambda (x) (char= (elt x 0) #\f)) + #("foo" "bar" "baz" "foom")) ==> #("foo" "foom") + +According to the language standard, the ‘-IF-NOT’ variants are +deprecated. However, that deprecation is generally considered to have +itself been ill-advised. If the standard is ever revised, it's more +likely the deprecation will be removed than the ‘-IF-NOT’ functions. +For one thing, the ‘REMOVE-IF-NOT’ variant is probably used more +often than ‘REMOVE-IF’. Despite its negative-sounding name, +‘REMOVE-IF-NOT’ is actually the positive variant-it returns the +elements that ‘do’ satisfy the predicate. (1) + +The -IF and -IF-NOT variants accept all the same keyword arguments as +their vanilla counterparts except for ‘:test’, which isn't needed +since the main argument is already a function. (2) With a ‘:key’ +argument, the value extracted by the ‘:key’ function is passed to +the function instead of the actual element. + + (count-if #'evenp #((1 a) (2 b) (3 c) (4 d) (5 e)) :key #'first) ==> 2 + + (count-if-not #'evenp #((1 a) (2 b) (3 c) (4 d) (5 e)) :key #'first) ==> 3 + + (remove-if-not #'alpha-char-p + #("foo" "bar" "1baz") :key #'(lambda (x) (elt x 0))) ==> #("foo" "bar") + +The ‘REMOVE’ family of functions also support a fourth variant, +‘REMOVE-DUPLICATES’, that has only one required argument, a +sequence, from which it removes all but one instance of each duplicated +element. It takes the same keyword arguments as ‘REMOVE’, except +for ‘:count’, since it always removes all duplicates. + + (remove-duplicates #(1 2 1 2 3 1 2 3 4)) ==> #(1 2 3 4) + + ---------- Footnotes ---------- + + (1) This same functionality goes by the name ‘grep’ in Perl and +‘filter’ in Python. + + (2) The difference between the predicates passed as ‘:test’ +arguments and as the function arguments to the -IF and -IF-NOT functions +is that the ‘:test’ predicates are two-argument predicates used to +compare the elements of the sequence to the specific item while the -IF +and -IF-NOT predicates are one-argument functions that simply test the +individual elements of the sequence. If the vanilla variants didn't +exist, you could implement them in terms of the -IF versions by +embedding a specific item in the test function. + + (count char string) === + (count-if #'(lambda (c) (eql char c)) string) + + (count char string :test #'CHAR-EQUAL) === + (count-if #'(lambda (c) (char-equal char c)) string) + + +File: pcl.info, Node: 11-6, Next: 11-7, Prev: 11-5, Up: Chapter 11 + +Whole Sequence Manipulations +============================ + +A handful of functions perform operations on a whole sequence (or +sequences) at a time. These tend to be simpler than the other functions +I've described so far. For instance, ‘COPY-SEQ’ and ‘REVERSE’ +each take a single argument, a sequence, and each returns a new sequence +of the same type. The sequence returned by ‘COPY-SEQ’ contains the +same elements as its argument while the sequence returned by +‘REVERSE’ contains the same elements but in reverse order. Note +that neither function copies the elements themselves-only the returned +sequence is a new object. + +The ‘CONCATENATE’ function creates a new sequence containing the +concatenation of any number of sequences. However, unlike ‘REVERSE’ +and ‘COPY-SEQ’, which simply return a sequence of the same type as +their single argument, ‘CONCATENATE’ must be told explicitly what +kind of sequence to produce in case the arguments are of different +types. Its first argument is a type descriptor, like the +‘:element-type’ argument to ‘MAKE-ARRAY’. In this case, the +type descriptors you'll most likely use are the symbols ‘VECTOR’, +‘LIST’, or ‘STRING’. (1) For example: + + (concatenate 'vector #(1 2 3) '(4 5 6)) ==> #(1 2 3 4 5 6) + (concatenate 'list #(1 2 3) '(4 5 6)) ==> (1 2 3 4 5 6) + (concatenate 'string "abc" '(#\d #\e #\f)) ==> "abcdef" + + ---------- Footnotes ---------- + + (1) If you tell ‘CONCATENATE’ to return a specialized vector, +such as a string, all the elements of the argument sequences must be +instances of the vector's element type. + + +File: pcl.info, Node: 11-7, Next: 11-8, Prev: 11-6, Up: Chapter 11 + +Sorting and Merging +=================== + +The functions ‘SORT’ and ‘STABLE-SORT’ provide two ways of +sorting a sequence. They both take a sequence and a two-argument +predicate and return a sorted version of the sequence. + + (sort (vector "foo" "bar" "baz") #'string<) ==> #("bar" "baz" "foo") + +The difference is that ‘STABLE-SORT’ is guaranteed to not reorder +any elements considered equivalent by the predicate while ‘SORT’ +guarantees only that the result is sorted and may reorder equivalent +elements. + +Both these functions are examples of what are called ‘destructive’ +functions. Destructive functions are allowed-typically for reasons of +efficiency-to modify their arguments in more or less arbitrary ways. +This has two implications: one, you should always do something with the +return value of these functions (such as assign it to a variable or pass +it to another function), and, two, unless you're done with the object +you're passing to the destructive function, you should pass a copy +instead. I'll say more about destructive functions in the next chapter. + +Typically you won't care about the unsorted version of a sequence after +you've sorted it, so it makes sense to allow ‘SORT’ and +‘STABLE-SORT’ to destroy the sequence in the course of sorting it. +But it does mean you need to remember to write the following: (1) + + (setf my-sequence (sort my-sequence #'string<)) + +rather than just this: + + (sort my-sequence #'string<) + +Both these functions also take a keyword argument, ‘:key’, which, +like the ‘:key’ argument in other sequence functions, should be a +function and will be used to extract the values to be passed to the +sorting predicate in the place of the actual elements. The extracted +keys are used only to determine the ordering of elements; the sequence +returned will contain the actual elements of the argument sequence. + +The ‘MERGE’ function takes two sequences and a predicate and returns +a sequence produced by merging the two sequences, according to the +predicate. It's related to the two sorting functions in that if each +sequence is already sorted by the same predicate, then the sequence +returned by ‘MERGE’ will also be sorted. Like the sorting +functions, ‘MERGE’ takes a ‘:key’ argument. Like +‘CONCATENATE’, and for the same reason, the first argument to +‘MERGE’ must be a type descriptor specifying the type of sequence to +produce. + + (merge 'vector #(1 3 5) #(2 4 6) #'<) ==> #(1 2 3 4 5 6) + (merge 'list #(1 3 5) #(2 4 6) #'<) ==> (1 2 3 4 5 6) + + ---------- Footnotes ---------- + + (1) When the sequence passed to the sorting functions is a vector, +the "destruction" is actually guaranteed to entail permuting the +elements in place, so you could get away without saving the returned +value. However, it's good style to always do something with the return +value since the sorting functions can modify lists in much more +arbitrary ways. + + +File: pcl.info, Node: 11-8, Next: 11-9, Prev: 11-7, Up: Chapter 11 + +Subsequence Manipulations +========================= + +Another set of functions allows you to manipulate subsequences of +existing sequences. The most basic of these is ‘SUBSEQ’, which +extracts a subsequence starting at a particular index and continuing to +a particular ending index or the end of the sequence. For instance: + + (subseq "foobarbaz" 3) ==> "barbaz" + (subseq "foobarbaz" 3 6) ==> "bar" + +‘SUBSEQ’ is also ‘SETF’able, but it won't extend or shrink a +sequence; if the new value and the subsequence to be replaced are +different lengths, the shorter of the two determines how many characters +are actually changed. + + (defparameter *x* (copy-seq "foobarbaz")) + + (setf (subseq *x* 3 6) "xxx") ; subsequence and new value are same length + *x* ==> "fooxxxbaz" + + (setf (subseq *x* 3 6) "abcd") ; new value too long, extra character ignored. + *x* ==> "fooabcbaz" + + (setf (subseq *x* 3 6) "xx") ; new value too short, only two characters changed + *x* ==> "fooxxcbaz" + +You can use the ‘FILL’ function to set multiple elements of a +sequence to a single value. The required arguments are a sequence and +the value with which to fill it. By default every element of the +sequence is set to the value; ‘:start’ and ‘:end’ keyword +arguments can limit the effects to a given subsequence. + +If you need to find a subsequence within a sequence, the ‘SEARCH’ +function works like ‘POSITION’ except the first argument is a +sequence rather than a single item. + + (position #\b "foobarbaz") ==> 3 + (search "bar" "foobarbaz") ==> 3 + +On the other hand, to find where two sequences with a common prefix +first diverge, you can use the ‘MISMATCH’ function. It takes two +sequences and returns the index of the first pair of mismatched +elements. + + (mismatch "foobarbaz" "foom") ==> 3 + +It returns ‘NIL’ if the strings match. ‘MISMATCH’ also takes +many of the standard keyword arguments: a ‘:key’ argument for +specifying a function to use to extract the values to be compared; a +‘:test’ argument to specify the comparison function; and +‘:start1’, ‘:end1’, ‘:start2’, and ‘:end2’ arguments to +specify subsequences within the two sequences. And a ‘:from-end’ +argument of ‘T’ specifies the sequences should be searched in +reverse order, causing ‘MISMATCH’ to return the index, in the first +sequence, where whatever common suffix the two sequences share begins. + + (mismatch "foobar" "bar" :from-end t) ==> 3 + + +File: pcl.info, Node: 11-9, Next: 11-10, Prev: 11-8, Up: Chapter 11 + +Sequence Predicates +=================== + +Four other handy functions are ‘EVERY’, ‘SOME’, ‘NOTANY’, +and ‘NOTEVERY’, which iterate over sequences testing a boolean +predicate. The first argument to all these functions is the predicate, +and the remaining arguments are sequences. The predicate should take as +many arguments as the number of sequences passed. The elements of the +sequences are passed to the predicate-one element from each +sequence-until one of the sequences runs out of elements or the overall +termination test is met: ‘EVERY’ terminates, returning false, as +soon as the predicate fails. If the predicate is always satisfied, it +returns true. ‘SOME’ returns the first non-‘NIL’ value returned +by the predicate or returns false if the predicate is never satisfied. +‘NOTANY’ returns false as soon as the predicate is satisfied or true +if it never is. And ‘NOTEVERY’ returns true as soon as the +predicate fails or false if the predicate is always satisfied. Here are +some examples of testing just one sequence: + + (every #'evenp #(1 2 3 4 5)) ==> NIL + (some #'evenp #(1 2 3 4 5)) ==> T + (notany #'evenp #(1 2 3 4 5)) ==> NIL + (notevery #'evenp #(1 2 3 4 5)) ==> T + +These calls compare elements of two sequences pairwise: + + (every #'> #(1 2 3 4) #(5 4 3 2)) ==> NIL + (some #'> #(1 2 3 4) #(5 4 3 2)) ==> T + (notany #'> #(1 2 3 4) #(5 4 3 2)) ==> NIL + (notevery #'> #(1 2 3 4) #(5 4 3 2)) ==> T + + +File: pcl.info, Node: 11-10, Next: 11-11, Prev: 11-9, Up: Chapter 11 + +Sequence Mapping Functions +========================== + +Finally, the last of the sequence functions are the generic mapping +functions. ‘MAP’, like the sequence predicate functions, takes a +‘n’-argument function and ‘n’ sequences. But instead of a +boolean value, ‘MAP’ returns a new sequence containing the result of +applying the function to subsequent elements of the sequences. Like +‘CONCATENATE’ and ‘MERGE’, ‘MAP’ needs to be told what kind +of sequence to create. + + (map 'vector #'* #(1 2 3 4 5) #(10 9 8 7 6)) ==> #(10 18 24 28 30) + +‘MAP-INTO’ is like ‘MAP’ except instead of producing a new +sequence of a given type, it places the results into a sequence passed +as the first argument. This sequence can be the same as one of the +sequences providing values for the function. For instance, to sum +several vectors-‘a’, ‘b’, and ‘c’-into one, you could write +this: + + (map-into a #'+ a b c) + +If the sequences are different lengths, ‘MAP-INTO’ affects only as +many elements as are present in the shortest sequence, including the +sequence being mapped into. However, if the sequence being mapped into +is a vector with a fill pointer, the number of elements affected isn't +limited by the fill pointer but rather by the actual size of the vector. +After a call to ‘MAP-INTO’, the fill pointer will be set to the +number of elements mapped. ‘MAP-INTO’ won't, however, extend an +adjustable vector. + +The last sequence function is ‘REDUCE’, which does another kind of +mapping: it maps over a single sequence, applying a two-argument +function first to the first two elements of the sequence and then to the +value returned by the function and subsequent elements of the sequence. +Thus, the following expression sums the numbers from one to ten: + + (reduce #'+ #(1 2 3 4 5 6 7 8 9 10)) ==> 55 + +‘REDUCE’ is a surprisingly useful function-whenever you need to +distill a sequence down to a single value, chances are you can write it +with ‘REDUCE’, and it will often be quite a concise way to express +what you want. For instance, to find the maximum value in a sequence of +numbers, you can write ‘(reduce #'max numbers)’. ‘REDUCE’ also +takes a full complement of keyword arguments (‘:key’, +‘:from-end’, ‘:start’, and ‘:end’) and one unique to +‘REDUCE’ (‘:initial-value’). The latter specifies a value +that's logically placed before the first element of the sequence (or +after the last if you also specify a true ‘:from-end’ argument). + + +File: pcl.info, Node: 11-11, Next: 11-12, Prev: 11-10, Up: Chapter 11 + +Hash Tables +=========== + +The other general-purpose collection provided by Common Lisp is the hash +table. Where vectors provide an integer-indexed data structure, hash +tables allow you to use arbitrary objects as the indexes, or keys. When +you add a value to a hash table, you store it under a particular key. +Later you can use the same key to retrieve the value. Or you can +associate a new value with the same key-each key maps to a single value. + +With no arguments ‘MAKE-HASH-TABLE’ makes a hash table that +considers two keys equivalent if they're the same object according to +‘EQL’. This is a good default unless you want to use strings as +keys, since two strings with the same contents aren't necessarily +‘EQL’. In that case you'll want a so-called ‘EQUAL’ hash table, +which you can get by passing the symbol ‘EQUAL’ as the ‘:test’ +keyword argument to ‘MAKE-HASH-TABLE’. Two other possible values +for the ‘:test’ argument are the symbols ‘EQ’ and ‘EQUALP’. +These are, of course, the names of the standard object comparison +functions, which I discussed in Chapter 4. However, unlike the +‘:test’ argument passed to sequence functions, +‘MAKE-HASH-TABLE’'s ‘:test’ can't be used to specify an +arbitrary function-only the values ‘EQ’, ‘EQL’, ‘EQUAL’, and +‘EQUALP’. This is because hash tables actually need two functions, +an equivalence function and a ‘hash’ function that computes a +numerical hash code from the key in a way compatible with how the +equivalence function will ultimately compare two keys. However, +although the language standard provides only for hash tables that use +the standard equivalence functions, most implementations provide some +mechanism for defining custom hash tables. + +The ‘GETHASH’ function provides access to the elements of a hash +table. It takes two arguments-a key and the hash table-and returns the +value, if any, stored in the hash table under that key or ‘NIL’. +(1) For example: + + (defparameter *h* (make-hash-table)) + + (gethash 'foo *h*) ==> NIL + + (setf (gethash 'foo *h*) 'quux) + + (gethash 'foo *h*) ==> QUUX + +Since ‘GETHASH’ returns ‘NIL’ if the key isn't present in the +table, there's no way to tell from the return value the difference +between a key not being in a hash table at all and being in the table +with the value ‘NIL’. ‘GETHASH’ solves this problem with a +feature I haven't discussed yet-multiple return values. ‘GETHASH’ +actually returns two values; the primary value is the value stored under +the given key or ‘NIL’. The secondary value is a boolean indicating +whether the key is present in the hash table. Because of the way +multiple values work, the extra return value is silently discarded +unless the caller explicitly handles it with a form that can "see" +multiple values. + +I'll discuss multiple return values in greater detail in Chapter 20, but +for now I'll give you a sneak preview of how to use the +‘MULTIPLE-VALUE-BIND’ macro to take advantage of ‘GETHASH’'s +extra return value. ‘MULTIPLE-VALUE-BIND’ creates variable bindings +like ‘LET’ does, filling them with the multiple values returned by a +form. + +The following function shows how you might use +‘MULTIPLE-VALUE-BIND’; the variables it binds are ‘value’ and +‘present’: + + (defun show-value (key hash-table) + (multiple-value-bind (value present) (gethash key hash-table) + (if present + (format nil "Value ~a actually present." value) + (format nil "Value ~a because key not found." value)))) + + (setf (gethash 'bar *h*) nil) ; provide an explicit value of NIL + + (show-value 'foo *h*) ==> "Value QUUX actually present." + (show-value 'bar *h*) ==> "Value NIL actually present." + (show-value 'baz *h*) ==> "Value NIL because key not found." + +Since setting the value under a key to ‘NIL’ leaves the key in the +table, you'll need another function to completely remove a key/value +pair. ‘REMHASH’ takes the same arguments as ‘GETHASH’ and +removes the specified entry. You can also completely clear a hash table +of all its key/value pairs with ‘CLRHASH’. + + ---------- Footnotes ---------- + + (1) By an accident of history, the order of arguments to +‘GETHASH’ is the opposite of ‘ELT’-‘ELT’ takes the +collection first and then the index while ‘GETHASH’ takes the key +first and then the collection. + + +File: pcl.info, Node: 11-12, Next: Chapter 12, Prev: 11-11, Up: Chapter 11 + +Hash Table Iteration +==================== + +Common Lisp provides a couple ways to iterate over the entries in a hash +table. The simplest of these is via the function ‘MAPHASH’. +Analogous to the ‘MAP’ function, ‘MAPHASH’ takes a two-argument +function and a hash table and invokes the function once for each +key/value pair in the hash table. For instance, to print all the +key/value pairs in a hash table, you could use ‘MAPHASH’ like this: + + (maphash #'(lambda (k v) (format t "~a => ~a~%" k v)) *h*) + +The consequences of adding or removing elements from a hash table while +iterating over it aren't specified (and are likely to be bad) with two +exceptions: you can use ‘SETF’ with ‘GETHASH’ to change the +value of the current entry, and you can use ‘REMHASH’ to remove the +current entry. For instance, to remove all the entries whose value is +less than ten, you could write this: + + (maphash #'(lambda (k v) (when (< v 10) (remhash k *h*))) *h*) + +The other way to iterate over a hash table is with the extended +‘LOOP’ macro, which I'll discuss in Chapter 22. (1) The ‘LOOP’ +equivalent of the first ‘MAPHASH’ expression would look like this: + + (loop for k being the hash-keys in *h* using (hash-value v) + do (format t "~a => ~a~%" k v)) + +I could say a lot more about the nonlist collections supported by Common +Lisp. For instance, I haven't discussed multidimensional arrays at all +or the library of functions for manipulating bit arrays. However, what +I've covered in this chapter should suffice for most of your +general-purpose programming needs. Now it's finally time to look at +Lisp's eponymous data structure: lists. + + ---------- Footnotes ---------- + + (1) ‘LOOP’'s hash table iteration is typically implemented on top +of a more primitive form, ‘WITH-HASH-TABLE-ITERATOR’, that you don't +need to worry about; it was added to the language specifically to +support implementing things such as ‘LOOP’ and is of little use +unless you need to write completely new control constructs for iterating +over hash tables. + + +File: pcl.info, Node: Chapter 12, Next: Chapter 13, Prev: Chapter 11, Up: Top + +12. They Called It LISP for a Reason: List Processing +===================================================== + +Lists play an important role in Lisp-for reasons both historical and +practical. Historically, lists were Lisp's original composite data +type, though it has been decades since they were its only such data +type. These days, a Common Lisp programmer is as likely to use a +vector, a hash table, or a user-defined class or structure as to use a +list. + + Practically speaking, lists remain in the language because they're an +excellent solution to certain problems. One such problem-how to +represent code as data in order to support code-transforming and +code-generating macros-is particular to Lisp, which may explain why +other languages don't feel the lack of Lisp-style lists. More +generally, lists are an excellent data structure for representing any +kind of heterogeneous and/or hierarchical data. They're also quite +lightweight and support a functional style of programming that's another +important part of Lisp's heritage. + + Thus, you need to understand lists on their own terms; as you gain a +better understanding of how lists work, you'll be in a better position +to appreciate when you should and shouldn't use them. + +* Menu: + +* 12-1:: "There Is No List" +* 12-2:: Functional Programming and Lists +* 12-3:: "Destructive" Operations +* 12-4:: Combining Recycling with Shared Structure +* 12-5:: List-Manipulation Functions +* 12-6:: Mapping +* 12-7:: Other Structures + + +File: pcl.info, Node: 12-1, Next: 12-2, Prev: Chapter 12, Up: Chapter 12 + +"There Is No List" +================== + + Spoon Boy: Do not try and bend the list. That's impossible. + Instead . . . only try to realize the truth. Neo: What truth? + Spoon Boy: There is no list. Neo: There is no list? Spoon Boy: + Then you'll see that it is not the list that bends; it is only + yourself. (1) + The key to understanding lists is to understand that they're largely +an illusion built on top of objects that are instances of a more +primitive data type. Those simpler objects are pairs of values called +cons cells, after the function CONS used to create them. + + CONS takes two arguments and returns a new cons cell containing the +two values. (2) These values can be references to any kind of object. +Unless the second value is NIL or another cons cell, a cons is printed +as the two values in parentheses separated by a dot, a so-called dotted +pair. + + (cons 1 2) ==> (1 . 2) + + The two values in a cons cell are called the CAR and the CDR after +the names of the functions used to access them. At the dawn of time, +these names were mnemonic, at least to the folks implementing the first +Lisp on an IBM 704. But even then they were just lifted from the +assembly mnemonics used to implement the operations. However, it's not +all bad that these names are somewhat meaningless-when considering +individual cons cells, it's best to think of them simply as an arbitrary +pair of values without any particular semantics. Thus: + + (car (cons 1 2)) ==> 1 + (cdr (cons 1 2)) ==> 2 + + Both CAR and CDR are also SETFable places-given an existing cons +cell, it's possible to assign a new value to either of its values. (3) + + (defparameter *cons* (cons 1 2)) + *cons* ==> (1 . 2) + (setf (car *cons*) 10) ==> 10 + *cons* ==> (10 . 2) + (setf (cdr *cons*) 20) ==> 20 + *cons* ==> (10 . 20) + + Because the values in a cons cell can be references to any kind of +object, you can build larger structures out of cons cells by linking +them together. Lists are built by linking together cons cells in a +chain. The elements of the list are held in the CARs of the cons cells +while the links to subsequent cons cells are held in the CDRs. The last +cell in the chain has a CDR of NIL, which-as I mentioned in Chapter +4-represents the empty list as well as the boolean value false. + + This arrangement is by no means unique to Lisp; it's called a singly +linked list. However, few languages outside the Lisp family provide +such extensive support for this humble data type. + + So when I say a particular value is a list, what I really mean is +it's either NIL or a reference to a cons cell. The CAR of the cons cell +is the first item of the list, and the CDR is a reference to another +list, that is, another cons cell or NIL, containing the remaining +elements. The Lisp printer understands this convention and prints such +chains of cons cells as parenthesized lists rather than as dotted pairs. + + (cons 1 nil) ==> (1) + (cons 1 (cons 2 nil)) ==> (1 2) + (cons 1 (cons 2 (cons 3 nil))) ==> (1 2 3) + + When talking about structures built out of cons cells, a few diagrams +can be a big help. Box-and-arrow diagrams represent cons cells as a +pair of boxes like this: + +[image src="imgs/one-cons-cell.png"] + + The box on the left represents the CAR, and the box on the right is +the CDR. The values stored in a particular cons cell are either drawn in +the appropriate box or represented by an arrow from the box to a +representation of the referenced value. (4) For instance, the list (1 2 +3), which consists of three cons cells linked together by their CDRs, +would be diagrammed like this: + +[image src="imgs/list-1-2-3.png"] + + However, most of the time you work with lists you won't have to deal +with individual cons cells-the functions that create and manipulate +lists take care of that for you. For example, the LIST function builds +a cons cells under the covers for you and links them together; the +following LIST expressions are equivalent to the previous CONS +expressions: + + (list 1) ==> (1) + (list 1 2) ==> (1 2) + (list 1 2 3) ==> (1 2 3) + + Similarly, when you're thinking in terms of lists, you don't have to +use the meaningless names CAR and CDR; FIRST and REST are synonyms for +CAR and CDR that you should use when you're dealing with cons cells as +lists. + + (defparameter *list* (list 1 2 3 4)) + (first *list*) ==> 1 + (rest *list*) ==> (2 3 4) + (first (rest *list*)) ==> 2 + + Because cons cells can hold any kind of values, so can lists. And a +single list can hold objects of different types. + + (list "foo" (list 1 2) 10) ==> ("foo" (1 2) 10) + + The structure of that list would look like this: + +[image src="imgs/mixed-list.png"] + + Because lists can have other lists as elements, you can also use them +to represent trees of arbitrary depth and complexity. As such, they +make excellent representations for any heterogeneous, hierarchical data. +Lisp-based XML processors, for instance, usually represent XML documents +internally as lists. Another obvious example of tree-structured data is +Lisp code itself. In Chapters 30 and 31 you'll write an HTML generation +library that uses lists of lists to represent the HTML to be generated. +I'll talk more next chapter about using cons cells to represent other +data structures. + + Common Lisp provides quite a large library of functions for +manipulating lists. In the sections "List-Manipulation Functions" and +"Mapping," you'll look at some of the more important of these functions. +However, they will be easier to understand in the context of a few ideas +borrowed from functional programming. + + ---------- Footnotes ---------- + + (1) Adapted from The Matrix (http://us.imdb.com/Quotes?0133093) + + (2) CONS was originally short for the verb construct. + + (3) When the place given to SETF is a CAR or CDR, it expands into a +call to the function RPLACA or RPLACD; some old-school Lispers-the same +ones who still use SETQ-will still use RPLACA and RPLACD directly, but +modern style is to use SETF of CAR or CDR. + + (4) Typically, simple objects such as numbers are drawn within the +appropriate box, and more complex objects will be drawn outside the box +with an arrow from the box indicating the reference. This actually +corresponds well with how many Common Lisp implementations work-although +all objects are conceptually stored by reference, certain simple +immutable objects can be stored directly in a cons cell. + + +File: pcl.info, Node: 12-2, Next: 12-3, Prev: 12-1, Up: Chapter 12 + +Functional Programming and Lists +================================ + +The essence of functional programming is that programs are built +entirely of functions with no side effects that compute their results +based solely on the values of their arguments. The advantage of the +functional style is that it makes programs easier to understand. +Eliminating side effects eliminates almost all possibilities for action +at a distance. And since the result of a function is determined only by +the values of its arguments, its behavior is easier to understand and +test. For instance, when you see an expression such as (+ 3 4), you +know the result is uniquely determined by the definition of the + +function and the values 3 and 4. You don't have to worry about what may +have happened earlier in the execution of the program since there's +nothing that can change the result of evaluating that expression. + + Functions that deal with numbers are naturally functional since +numbers are immutable. A list, on the other hand, can be mutated, as +you've just seen, by SETFing the CARs and CDRs of the cons cells that +make up its backbone. However, lists can be treated as a functional +data type if you consider their value to be determined by the elements +they contain. Thus, any list of the form (1 2 3 4) is functionally +equivalent to any other list containing those four values, regardless of +what cons cells are actually used to represent the list. And any +function that takes a list as an argument and returns a value based +solely on the contents of the list can likewise be considered +functional. For instance, the REVERSE sequence function, given the list +(1 2 3 4), always returns a list (4 3 2 1). Different calls to REVERSE +with functionally equivalent lists as the argument will return +functionally equivalent result lists. Another aspect of functional +programming, which I'll discuss in the section "Mapping," is the use of +higher-order functions: functions that treat other functions as data, +taking them as arguments or returning them as results. + + Most of Common Lisp's list-manipulation functions are written in a +functional style. I'll discuss later how to mix functional and other +coding styles, but first you should understand a few subtleties of the +functional style as applied to lists. + + The reason most list functions are written functionally is it allows +them to return results that share cons cells with their arguments. To +take a concrete example, the function APPEND takes any number of list +arguments and returns a new list containing the elements of all its +arguments. For instance: + + (append (list 1 2) (list 3 4)) ==> (1 2 3 4) + + From a functional point of view, APPEND's job is to return the list +(1 2 3 4) without modifying any of the cons cells in the lists (1 2) and +(3 4). One obvious way to achieve that goal is to create a completely +new list consisting of four new cons cells. However, that's more work +than is necessary. Instead, APPEND actually makes only two new cons +cells to hold the values 1 and 2, linking them together and pointing the +CDR of the second cons cell at the head of the last argument, the list +(3 4). It then returns the cons cell containing the 1. None of the +original cons cells has been modified, and the result is indeed the list +(1 2 3 4). The only wrinkle is that the list returned by APPEND shares +some cons cells with the list (3 4). The resulting structure looks like +this: + +[image src="imgs/after-append.png"] + + In general, APPEND must copy all but its last argument, but it can +always return a result that shares structure with the last argument. + + Other functions take similar advantage of lists' ability to share +structure. Some, like APPEND, are specified to always return results +that share structure in a particular way. Others are simply allowed to +return shared structure at the discretion of the implementation. + + +File: pcl.info, Node: 12-3, Next: 12-4, Prev: 12-2, Up: Chapter 12 + +"Destructive" Operations +======================== + +If Common Lisp were a purely functional language, that would be the end +of the story. However, because it's possible to modify a cons cell +after it has been created by SETFing its CAR or CDR, you need to think a +bit about how side effects and structure sharing mix. + + Because of Lisp's functional heritage, operations that modify +existing objects are called destructive-in functional programming, +changing an object's state "destroys" it since it no longer represents +the same value. However, using the same term to describe all +state-modifying operations leads to a certain amount of confusion since +there are two very different kinds of destructive operations, +for-side-effect operations and recycling operations. (1) + + For-side-effect operations are those used specifically for their side +effects. All uses of SETF are destructive in this sense, as are +functions that use SETF under the covers to change the state of an +existing object such as VECTOR-PUSH or VECTOR-POP. But it's a bit unfair +to describe these operations as destructive-they're not intended to be +used in code written in a functional style, so they shouldn't be +described using functional terminology. However, if you mix +nonfunctional, for-side-effect operations with functions that return +structure-sharing results, then you need to be careful not to +inadvertently modify the shared structure. For instance, consider these +three definitions: + + (defparameter *list-1* (list 1 2)) + (defparameter *list-2* (list 3 4)) + (defparameter *list-3* (append *list-1* *list-2*)) + + After evaluating these forms, you have three lists, but *list-3* and +*list-2* share structure just like the lists in the previous diagram. + + *list-1* ==> (1 2) + *list-2* ==> (3 4) + *list-3* ==> (1 2 3 4) + + Now consider what happens when you modify *list-2*. + + (setf (first *list-2*) 0) ==> 0 + *list-2* ==> (0 4) ; as expected + *list-3* ==> (1 2 0 4) ; maybe not what you wanted + + The change to *list-2* also changes *list-3* because of the shared +structure: the first cons cell in *list-2* is also the third cons cell +in *list-3*. SETFing the FIRST of *list-2* changes the value in the CAR +of that cons cell, affecting both lists. + + On the other hand, the other kind of destructive operations, +recycling operations, are intended to be used in functional code. They +use side effects only as an optimization. In particular, they reuse +certain cons cells from their arguments when building their result. +However, unlike functions such as APPEND that reuse cons cells by +including them, unmodified, in the list they return, recycling functions +reuse cons cells as raw material, modifying the CAR and CDR as necessary +to build the desired result. Thus, recycling functions can be used +safely only when the original lists aren't going to be needed after the +call to the recycling function. + + To see how a recycling function works, let's compare REVERSE, the +nondestructive function that returns a reversed version of a sequence, +to NREVERSE, a recycling version of the same function. Because REVERSE +doesn't modify its argument, it must allocate a new cons cell for each +element in the list being reversed. But suppose you write something +like this: + + (setf *list* (reverse *list*)) + + By assigning the result of REVERSE back to *list*, you've removed the +reference to the original value of *list*. Assuming the cons cells in +the original list aren't referenced anywhere else, they're now eligible +to be garbage collected. However, in many Lisp implementations it'd be +more efficient to immediately reuse the existing cons cells rather than +allocating new ones and letting the old ones become garbage. + + NREVERSE allows you to do exactly that. The N stands for +non-consing, meaning it doesn't need to allocate any new cons cells. +The exact side effects of NREVERSE are intentionally not specified-it's +allowed to modify any CAR or CDR of any cons cell in the list-but a +typical implementation might walk down the list changing the CDR of each +cons cell to point to the previous cons cell, eventually returning the +cons cell that was previously the last cons cell in the old list and is +now the head of the reversed list. No new cons cells need to be +allocated, and no garbage is created. + + Most recycling functions, like NREVERSE, have nondestructive +counterparts that compute the same result. In general, the recycling +functions have names that are the same as their non-destructive +counterparts except with a leading N. However, not all do, including +several of the more commonly used recycling functions such as NCONC, the +recycling version of APPEND, and DELETE, DELETE-IF, DELETE-IF-NOT, and +DELETE-DUPLICATES, the recycling versions of the REMOVE family of +sequence functions. + + In general, you use recycling functions in the same way you use their +nondestructive counterparts except it's safe to use them only when you +know the arguments aren't going to be used after the function returns. +The side effects of most recycling functions aren't specified tightly +enough to be relied upon. + + However, the waters are further muddied by a handful of recycling +functions with specified side effects that can be relied upon. They are +NCONC, the recycling version of APPEND, and NSUBSTITUTE and its -IF and +-IF-NOT variants, the recycling versions of the sequence functions +SUBSTITUTE and friends. + + Like APPEND, NCONC returns a concatenation of its list arguments, but +it builds its result in the following way: for each nonempty list it's +passed, NCONC sets the CDR of the list's last cons cell to point to the +first cons cell of the next nonempty list. It then returns the first +list, which is now the head of the spliced-together result. Thus: + + (defparameter *x* (list 1 2 3)) + + (nconc *x* (list 4 5 6)) ==> (1 2 3 4 5 6) + + *x* ==> (1 2 3 4 5 6) + + NSUBSTITUTE and variants can be relied on to walk down the list +structure of the list argument and to SETF the CARs of any cons cells +holding the old value to the new value and to otherwise leave the list +intact. It then returns the original list, which now has the same value +as would've been computed by SUBSTITUTE. (2) + + The key thing to remember about NCONC and NSUBSTITUTE is that they're +the exceptions to the rule that you can't rely on the side effects of +recycling functions. It's perfectly acceptable-and arguably good +style-to ignore the reliability of their side effects and use them, like +any other recycling function, only for the value they return. + + ---------- Footnotes ---------- + + (1) The phrase for-side-effect is used in the language standard, but +recycling is my own invention; most Lisp literature simply uses the term +destructive for both kinds of operations, leading to the confusion I'm +trying to dispel. + + (2) The string functions NSTRING-CAPITALIZE, NSTRING-DOWNCASE, and +NSTRING-UPCASE are similar-they return the same results as their N-less +counterparts but are specified to modify their string argument in place. + + +File: pcl.info, Node: 12-4, Next: 12-5, Prev: 12-3, Up: Chapter 12 + +Combining Recycling with Shared Structure +========================================= + +Although you can use recycling functions whenever the arguments to the +recycling function won't be used after the function call, it's worth +noting that each recycling function is a loaded gun pointed footward: if +you accidentally use a recycling function on an argument that is used +later, you're liable to lose some toes. + + To make matters worse, shared structure and recycling functions tend +to work at cross-purposes. Nondestructive list functions return lists +that share structure under the assumption that cons cells are never +modified, but recycling functions work by violating that assumption. +Or, put another way, sharing structure is based on the premise that you +don't care exactly what cons cells make up a list while using recycling +functions requires that you know exactly what cons cells are referenced +from where. + + In practice, recycling functions tend to be used in a few idiomatic +ways. By far the most common recycling idiom is to build up a list to +be returned from a function by "consing" onto the front of a list, +usually by PUSHing elements onto a list stored in a local variable and +then returning the result of NREVERSEing it. (1) + + This is an efficient way to build a list because each PUSH has to +create only one cons cell and modify a local variable and the NREVERSE +just has to zip down the list reassigning the CDRs. Because the list is +created entirely within the function, there's no danger any code outside +the function has a reference to any of its cons cells. Here's a +function that uses this idiom to build a list of the first n numbers, +starting at zero: (2) + + (defun upto (max) + (let ((result nil)) + (dotimes (i max) + (push i result)) + (nreverse result))) + + (upto 10) ==> (0 1 2 3 4 5 6 7 8 9) + + The next most common recycling idiom (3) is to immediately reassign +the value returned by the recycling function back to the place +containing the potentially recycled value. For instance, you'll often +see expressions like the following, using DELETE, the recycling version +of REMOVE: + + (setf foo (delete nil foo)) + + This sets the value of foo to its old value except with all the NILs +removed. However, even this idiom must be used with some care-if foo +shares structure with lists referenced elsewhere, using DELETE instead +of REMOVE can destroy the structure of those other lists. For example, +consider the two lists *list-2* and *list-3* from earlier that share +their last two cons cells. + + *list-2* ==> (0 4) + *list-3* ==> (1 2 0 4) + + You can delete 4 from *list-3* like this: + + (setf *list-3* (delete 4 *list-3*)) ==> (1 2 0) + + However, DELETE will likely perform the necessary deletion by setting +the CDR of the third cons cell to NIL, disconnecting the fourth cons +cell, the one holding the 4, from the list. Because the third cons cell +of *list-3* is also the first cons cell in *list-2*, the following +modifies *list-2* as well: + + *list-2* ==> (0) + + If you had used REMOVE instead of DELETE, it would've built a list +containing the values 1, 2, and 0, creating new cons cells as necessary +rather than modifying any of the cons cells in *list-3*. In that case, +*list-2* wouldn't have been affected. + + The PUSH/NREVERSE and SETF/DELETE idioms probably account for 80 +percent of the uses of recycling functions. Other uses are possible but +require keeping careful track of which functions return shared structure +and which do not. + + In general, when manipulating lists, it's best to write your own code +in a functional style-your functions should depend only on the contents +of their list arguments and shouldn't modify them. Following that rule +will, of course, rule out using any destructive functions, recycling or +otherwise. Once you have your code working, if profiling shows you need +to optimize, you can replace nondestructive list operations with their +recycling counterparts but only if you're certain the argument lists +aren't referenced from anywhere else. + + One last gotcha to watch out for is that the sorting functions SORT, +STABLE-SORT, and MERGE mentioned in Chapter 11 are also recycling +functions when applied to lists. (4) However, these functions don't +have nondestructive counterparts, so if you need to sort a list without +destroying it, you need to pass the sorting function a copy made with +COPY-LIST. In either case you need to be sure to save the result of the +sorting function because the original argument is likely to be in +tatters. For instance: + + CL-USER> (defparameter *list* (list 4 3 2 1)) + *LIST* + CL-USER> (sort *list* #'<) + (1 2 3 4) ; looks good + CL-USER> *list* + (4) ; whoops! + + ---------- Footnotes ---------- + + (1) For example, in an examination of all uses of recycling functions +in the Common Lisp Open Code Collection (CLOCC), a diverse set of +libraries written by various authors, instances of the PUSH/NREVERSE +idiom accounted for nearly half of all uses of recycling functions. + + (2) There are, of course, other ways to do this same thing. The +extended LOOP macro, for instance, makes it particularly easy and likely +generates code that's even more efficient than the PUSH/ NREVERSE +version. + + (3) This idiom accounts for 30 percent of uses of recycling in the +CLOCC code base. + + (4) SORT and STABLE-SORT can be used as for-side-effect operations on +vectors, but since they still return the sorted vector, you should +ignore that fact and use them for return values for the sake of +consistency. + + +File: pcl.info, Node: 12-5, Next: 12-6, Prev: 12-4, Up: Chapter 12 + +List-Manipulation Functions +=========================== + +With that background out of the way, you're ready to look at the library +of functions Common Lisp provides for manipulating lists. + + You've already seen the basic functions for getting at the elements +of a list: FIRST and REST. Although you can get at any element of a list +by combining enough calls to REST (to move down the list) with a FIRST +(to extract the element), that can be a bit tedious. So Common Lisp +provides functions named for the other ordinals from SECOND to TENTH +that return the appropriate element. More generally, the function NTH +takes two arguments, an index and a list, and returns the nth +(zero-based) element of the list. Similarly, NTHCDR takes an index and +a list and returns the result of calling CDR n times. (Thus, (nthcdr 0 +...) simply returns the original list, and (nthcdr 1 ...) is +equivalent to REST.) Note, however, that none of these functions is any +more efficient, in terms of work done by the computer, than the +equivalent combinations of FIRSTs and RESTs-there's no way to get to the +nth element of a list without following n CDR references. (1) + + The 28 composite CAR/CDR functions are another family of functions +you may see used from time to time. Each function is named by placing a +sequence of up to four As and Ds between a C and R, with each A +representing a call to CAR and each D a call to CDR. Thus: + + (caar list) === (car (car list)) + (cadr list) === (car (cdr list)) + (cadadr list) === (car (cdr (car (cdr list)))) + + Note, however, that many of these functions make sense only when +applied to lists that contain other lists. For instance, CAAR extracts +the CAR of the CAR of the list it's given; thus, the list it's passed +must contain another list as its first element. In other words, these +are really functions on trees rather than lists: + + (caar (list 1 2 3)) ==> error + (caar (list (list 1 2) 3)) ==> 1 + (cadr (list (list 1 2) (list 3 4))) ==> (3 4) + (caadr (list (list 1 2) (list 3 4))) ==> 3 + + These functions aren't used as often now as in the old days. And +even the most die-hard old-school Lisp hackers tend to avoid the longer +combinations. However, they're used quite a bit in older Lisp code, so +it's worth at least understanding how they work. (2) + + The FIRST-TENTH and CAR, CADR, and so on, functions can also be used +as SETFable places if you're using lists nonfunctionally. + + Table 12-1 summarizes some other list functions that I won't cover in +detail. + + Table 12-1. Other List Functions + +[image src="tables/12-1.png"] + + ---------- Footnotes ---------- + + (1) NTH is roughly equivalent to the sequence function ELT but works +only with lists. Also, confusingly, NTH takes the index as the first +argument, the opposite of ELT. Another difference is that ELT will +signal an error if you try to access an element at an index greater than +or equal to the length of the list, but NTH will return NIL. + + (2) In particular, they used to be used to extract the various parts +of expressions passed to macros before the invention of destructuring +parameter lists. For example, you could take apart the following +expression: + + (when (> x 10) (print x)) + + Like this: + + ;; the condition + (cadr '(when (> x 10) (print x))) ==> (> X 10) + ;; the body, as a list + (cddr '(when (> x 10) (print x))) ==> ((PRINT X)) + + +File: pcl.info, Node: 12-6, Next: 12-7, Prev: 12-5, Up: Chapter 12 + +Mapping +======= + +Another important aspect of the functional style is the use of +higher-order functions, functions that take other functions as arguments +or return functions as values. You saw several examples of higher-order +functions, such as MAP, in the previous chapter. Although MAP can be +used with both lists and vectors (that is, with any kind of sequence), +Common Lisp also provides six mapping functions specifically for lists. +The differences between the six functions have to do with how they build +up their result and whether they apply the function to the elements of +the list or to the cons cells of the list structure. + + MAPCAR is the function most like MAP. Because it always returns a +list, it doesn't require the result-type argument MAP does. Instead, +its first argument is the function to apply, and subsequent arguments +are the lists whose elements will provide the arguments to the function. +Otherwise, it behaves like MAP: the function is applied to successive +elements of the list arguments, taking one element from each list per +application of the function. The results of each function call are +collected into a new list. For example: + + (mapcar #'(lambda (x) (* 2 x)) (list 1 2 3)) ==> (2 4 6) + (mapcar #'+ (list 1 2 3) (list 10 20 30)) ==> (11 22 33) + + MAPLIST is just like MAPCAR except instead of passing the elements of +the list to the function, it passes the actual cons cells. (1) Thus, +the function has access not only to the value of each element of the +list (via the CAR of the cons cell) but also to the rest of the list +(via the CDR). + + MAPCAN and MAPCON work like MAPCAR and MAPLIST except for the way +they build up their result. While MAPCAR and MAPLIST build a completely +new list to hold the results of the function calls, MAPCAN and MAPCON +build their result by splicing together the results-which must be +lists-as if by NCONC. Thus, each function invocation can provide any +number of elements to be included in the result. (2) MAPCAN, like +MAPCAR, passes the elements of the list to the mapped function while +MAPCON, like MAPLIST, passes the cons cells. + + Finally, the functions MAPC and MAPL are control constructs disguised +as functions-they simply return their first list argument, so they're +useful only when the side effects of the mapped function do something +interesting. MAPC is the cousin of MAPCAR and MAPCAN while MAPL is in +the MAPLIST/MAPCON family. + + ---------- Footnotes ---------- + + (1) Thus, MAPLIST is the more primitive of the two functions-if you +had only MAPLIST, you could build MAPCAR on top of it, but you couldn't +build MAPLIST on top of MAPCAR. + + (2) In Lisp dialects that didn't have filtering functions like +REMOVE, the idiomatic way to filter a list was with MAPCAN. + + (mapcan #'(lambda (x) (if (= x 10) nil (list x))) list) === (remove 10 list) + + +File: pcl.info, Node: 12-7, Next: Chapter 13, Prev: 12-6, Up: Chapter 12 + +Other Structures +================ + +While cons cells and lists are typically considered to be synonymous, +that's not quite right-as I mentioned earlier, you can use lists of +lists to represent trees. Just as the functions discussed in this +chapter allow you to treat structures built out of cons cells as lists, +other functions allow you to use cons cells to represent trees, sets, +and two kinds of key/value maps. I'll discuss some of those functions +in the next chapter. + + +File: pcl.info, Node: Chapter 13, Next: Chapter 14, Prev: Chapter 12, Up: Top + +13. Beyond Lists: Other Uses for Cons Cells +=========================================== + +As you saw in the previous chapter, the list data type is an illusion +created by a set of functions that manipulate cons cells. Common Lisp +also provides functions that let you treat data structures built out of +cons cells as trees, sets, and lookup tables. In this chapter I'll give +you a quick tour of some of these other data structures and the +functions for manipulating them. As with the list-manipulation +functions, many of these functions will be useful when you start writing +more complicated macros and need to manipulate Lisp code as data. + +* Menu: + +* 13-1:: Trees +* 13-2:: Sets +* 13-3:: Lookup Tables: Alists and Plists +* 13-4:: DESTRUCTURING-BIND + + +File: pcl.info, Node: 13-1, Next: 13-2, Prev: Chapter 13, Up: Chapter 13 + +Trees +===== + +Treating structures built from cons cells as trees is just about as +natural as treating them as lists. What is a list of lists, after all, +but another way of thinking of a tree? The difference between a +function that treats a bunch of cons cells as a list and a function that +treats the same bunch of cons cells as a tree has to do with which cons +cells the functions traverse to find the values of the list or tree. +The cons cells traversed by a list function, called the list structure, +are found by starting at the first cons cell and following CDR +references until reaching a NIL. The elements of the list are the +objects referenced by the CARs of the cons cells in the list structure. +If a cons cell in the list structure has a CAR that references another +cons cell, the referenced cons cell is considered to be the head of a +list that's an element of the outer list. (1) Tree structure, on the +other hand, is traversed by following both CAR and CDR references for as +long as they point to other cons cells. The values in a tree are thus +the atomic-non-cons-cell-values referenced by either the CARs or the +CDRs of the cons cells in the tree structure. + + For instance, the following box-and-arrow diagram shows the cons +cells that make up the list of lists: ((1 2) (3 4) (5 6)). The list +structure includes only the three cons cells inside the dashed box while +the tree structure includes all the cons cells. + +[image src="imgs/list-or-tree.png"] + + To see the difference between a list function and a tree function, +you can consider how the functions COPY-LIST and COPY-TREE will copy +this bunch of cons cells. COPY-LIST, as a list function, copies the +cons cells that make up the list structure. That is, it makes a new +cons cell corresponding to each of the cons cells inside the dashed box. +The CARs of each of these new cons cells reference the same object as +the CARs of the original cons cells in the list structure. Thus, +COPY-LIST doesn't copy the sublists (1 2), (3 4), or (5 6), as shown in +this diagram: + +[image src="imgs/copy-list-list-or-tree.png"] + + COPY-TREE, on the other hand, makes a new cons cell for each of the +cons cells in the diagram and links them together in the same structure, +as shown in this diagram: + +[image src="imgs/copy-tree-list-or-tree.png"] + + Where a cons cell in the original referenced an atomic value, the +corresponding cons cell in the copy will reference the same value. +Thus, the only objects referenced in common by the original tree and the +copy produced by COPY-TREE are the numbers 1-6, and the symbol NIL. + + Another function that walks both the CARs and the CDRs of a tree of +cons cells is TREE-EQUAL, which compares two trees, considering them +equal if the tree structure is the same shape and if the leaves are EQL +(or if they satisfy the test supplied with the :test keyword argument). + + Some other tree-centric functions are the tree analogs to the +SUBSTITUTE and NSUBSTITUTE sequence functions and their -IF and -IF-NOT +variants. The function SUBST, like SUBSTITUTE, takes a new item, an old +item, and a tree (as opposed to a sequence), along with :key and :test +keyword arguments, and it returns a new tree with the same shape as the +original tree but with all instances of the old item replaced with the +new item. For example: + + CL-USER> (subst 10 1 '(1 2 (3 2 1) ((1 1) (2 2)))) + (10 2 (3 2 10) ((10 10) (2 2))) + + SUBST-IF is analogous to SUBSTITUTE-IF. Instead of an old item, it +takes a one-argument function-the function is called with each atomic +value in the tree, and whenever it returns true, the position in the new +tree is filled with the new value. SUBST-IF-NOT is the same except the +values where the test returns NIL are replaced. NSUBST, NSUBST-IF, and +NSUBST-IF-NOT are the recycling versions of the SUBST functions. As +with most other recycling functions, you should use these functions only +as drop-in replacements for their nondestructive counterparts in +situations where you know there's no danger of modifying a shared +structure. In particular, you must continue to save the return value of +these functions since you have no guarantee that the result will be EQ +to the original tree. (2) + + ---------- Footnotes ---------- + + (1) It's possible to build a chain of cons cells where the CDR of the +last cons cell isn't NIL but some other atom. This is called a dotted +list because the last cons is a dotted pair. + + (2) It may seem that the NSUBST family of functions can and in fact +does modify the tree in place. However, there's one edge case: when the +"tree" passed is, in fact, an atom, it can't be modified in place, so +the result of NSUBST will be a different object than the argument: +(nsubst 'x 'y 'y) X. + + +File: pcl.info, Node: 13-2, Next: 13-3, Prev: 13-1, Up: Chapter 13 + +Sets +==== + +Sets can also be implemented in terms of cons cells. In fact, you can +treat any list as a set-Common Lisp provides several functions for +performing set-theoretic operations on lists. However, you should bear +in mind that because of the way lists are structured, these operations +get less and less efficient the bigger the sets get. + + That said, using the built-in set functions makes it easy to write +set-manipulation code. And for small sets they may well be more +efficient than the alternatives. If profiling shows you that these +functions are a performance bottleneck in your code, you can always +replace the lists with sets built on top of hash tables or bit vectors. + + To build up a set, you can use the function ADJOIN. ADJOIN takes an +item and a list representing a set and returns a list representing the +set containing the item and all the items in the original set. To +determine whether the item is present, it must scan the list; if the +item isn't found, ADJOIN creates a new cons cell holding the item and +pointing to the original list and returns it. Otherwise, it returns the +original list. + + ADJOIN also takes :key and :test keyword arguments, which are used +when determining whether the item is present in the original list. Like +CONS, ADJOIN has no effect on the original list-if you want to modify a +particular list, you need to assign the value returned by ADJOIN to the +place where the list came from. The modify macro PUSHNEW does this for +you automatically. + + CL-USER> (defparameter *set* ()) + *SET* + CL-USER> (adjoin 1 *set*) + (1) + CL-USER> *set* + NIL + CL-USER> (setf *set* (adjoin 1 *set*)) + (1) + CL-USER> (pushnew 2 *set*) + (2 1) + CL-USER> *set* + (2 1) + CL-USER> (pushnew 2 *set*) + (2 1) + + You can test whether a given item is in a set with MEMBER and the +related functions MEMBER-IF and MEMBER-IF-NOT. These functions are +similar to the sequence functions FIND, FIND-IF, and FIND-IF-NOT except +they can be used only with lists. And instead of returning the item +when it's present, they return the cons cell containing the item-in +other words, the sublist starting with the desired item. When the +desired item isn't present in the list, all three functions return NIL. + + The remaining set-theoretic functions provide bulk operations: +INTERSECTION, UNION, SET-DIFFERENCE, and SET-EXCLUSIVE-OR. Each of these +functions takes two lists and :key and :test keyword arguments and +returns a new list representing the set resulting from performing the +appropriate set-theoretic operation on the two lists: INTERSECTION +returns a list containing all the elements found in both arguments. +UNION returns a list containing one instance of each unique element from +the two arguments. (1) SET-DIFFERENCE returns a list containing all the +elements from the first argument that don't appear in the second +argument. And SET-EXCLUSIVE-OR returns a list containing those elements +appearing in only one or the other of the two argument lists but not in +both. Each of these functions also has a recycling counterpart whose +name is the same except with an N prefix. + + Finally, the function SUBSETP takes two lists and the usual :key and +:test keyword arguments and returns true if the first list is a subset +of the second-if every element in the first list is also present in the +second list. The order of the elements in the lists doesn't matter. + + CL-USER> (subsetp '(3 2 1) '(1 2 3 4)) + T + CL-USER> (subsetp '(1 2 3 4) '(3 2 1)) + NIL + + ---------- Footnotes ---------- + + (1) UNION takes only one element from each list, but if either list +contains duplicate elements, the result may also contain duplicates. + + +File: pcl.info, Node: 13-3, Next: 13-4, Prev: 13-2, Up: Chapter 13 + +Lookup Tables: Alists and Plists +================================ + +In addition to trees and sets, you can build tables that map keys to +values out of cons cells. Two flavors of cons-based lookup tables are +commonly used, both of which I've mentioned in passing in previous +chapters. They're association lists, also called alists, and property +lists, also known as plists. While you wouldn't use either alists or +plists for large tables-for that you'd use a hash table-it's worth +knowing how to work with them both because for small tables they can be +more efficient than hash tables and because they have some useful +properties of their own. + + An alist is a data structure that maps keys to values and also +supports reverse lookups, finding the key when given a value. Alists +also support adding key/value mappings that shadow existing mappings in +such a way that the shadowing mapping can later be removed and the +original mappings exposed again. + + Under the covers, an alist is essentially a list whose elements are +themselves cons cells. Each element can be thought of as a key/value +pair with the key in the cons cell's CAR and the value in the CDR. For +instance, the following is a box-and-arrow diagram of an alist mapping +the symbol A to the number 1, B to 2, and C to 3: + +[image src="imgs/alist-abc-123.png"] + + Unless the value in the CDR is a list, cons cells representing the +key/value pairs will be dotted pairs in s-expression notation. The +alist diagramed in the previous figure, for instance, is printed like +this: + + ((A . 1) (B . 2) (C . 3)) + + The main lookup function for alists is ASSOC, which takes a key and +an alist and returns the first cons cell whose CAR matches the key or +NIL if no match is found. + + CL-USER> (assoc 'a '((a . 1) (b . 2) (c . 3))) + (A . 1) + CL-USER> (assoc 'c '((a . 1) (b . 2) (c . 3))) + (C . 3) + CL-USER> (assoc 'd '((a . 1) (b . 2) (c . 3))) + NIL + + To get the value corresponding to a given key, you simply pass the +result of ASSOC to CDR. + + CL-USER> (cdr (assoc 'a '((a . 1) (b . 2) (c . 3)))) + 1 + + By default the key given is compared to the keys in the alist using +EQL, but you can change that with the standard combination of :key and +:test keyword arguments. For instance, if you wanted to use string +keys, you might write this: + + CL-USER> (assoc "a" '(("a" . 1) ("b" . 2) ("c" . 3)) :test #'string=) + ("a" . 1) + + Without specifying :test to be STRING=, that ASSOC would probably +return NIL because two strings with the same contents aren't necessarily +EQL. + + CL-USER> (assoc "a" '(("a" . 1) ("b" . 2) ("c" . 3))) + NIL + + Because ASSOC searches the list by scanning from the front of the +list, one key/value pair in an alist can shadow other pairs with the +same key later in the list. + + CL-USER> (assoc 'a '((a . 10) (a . 1) (b . 2) (c . 3))) + (A . 10) + + You can add a pair to the front of an alist with CONS like this: + + (cons (cons 'new-key 'new-value) alist) + + However, as a convenience, Common Lisp provides the function ACONS, +which lets you write this: + + (acons 'new-key 'new-value alist) + + Like CONS, ACONS is a function and thus can't modify the place +holding the alist it's passed. If you want to modify an alist, you need +to write either this: + + (setf alist (acons 'new-key 'new-value alist)) + + or this: + + (push (cons 'new-key 'new-value) alist) + + Obviously, the time it takes to search an alist with ASSOC is a +function of how deep in the list the matching pair is found. In the +worst case, determining that no pair matches requires ASSOC to scan +every element of the alist. However, since the basic mechanism for +alists is so lightweight, for small tables an alist can outperform a +hash table. Also, alists give you more flexibility in how you do the +lookup. I already mentioned that ASSOC takes :key and :test keyword +arguments. When those don't suit your needs, you may be able to use the +ASSOC-IF and ASSOC-IF-NOT functions, which return the first key/value +pair whose CAR satisfies (or not, in the case of ASSOC-IF-NOT) the test +function passed in the place of a specific item. And three +functions-RASSOC, RASSOC-IF, and RASSOC-IF-NOT-work just like the +corresponding ASSOC functions except they use the value in the CDR of +each element as the key, performing a reverse lookup. + + The function COPY-ALIST is similar to COPY-TREE except, instead of +copying the whole tree structure, it copies only the cons cells that +make up the list structure, plus the cons cells directly referenced from +the CARs of those cells. In other words, the original alist and the +copy will both contain the same objects as the keys and values, even if +those keys or values happen to be made up of cons cells. + + Finally, you can build an alist from two separate lists of keys and +values with the function PAIRLIS. The resulting alist may contain the +pairs either in the same order as the original lists or in reverse +order. For example, you may get this result: + + CL-USER> (pairlis '(a b c) '(1 2 3)) + ((C . 3) (B . 2) (A . 1)) + + Or you could just as well get this: + + CL-USER> (pairlis '(a b c) '(1 2 3)) + ((A . 1) (B . 2) (C . 3)) + + The other kind of lookup table is the property list, or plist, which +you used to represent the rows in the database in Chapter 3. +Structurally a plist is just a regular list with the keys and values as +alternating values. For instance, a plist mapping A, B, and C, to 1, 2, +and 3 is simply the list (A 1 B 2 C 3). In boxes-and-arrows form, it +looks like this: + +[image src="imgs/plist-abc-123.png"] + + However, plists are less flexible than alists. In fact, plists +support only one fundamental lookup operation, the function GETF, which +takes a plist and a key and returns the associated value or NIL if the +key isn't found. GETF also takes an optional third argument, which will +be returned in place of NIL if the key isn't found. + + Unlike ASSOC, which uses EQL as its default test and allows a +different test function to be supplied with a :test argument, GETF +always uses EQ to test whether the provided key matches the keys in the +plist. Consequently, you should never use numbers or characters as keys +in a plist; as you saw in Chapter 4, the behavior of EQ for those types +is essentially undefined. Practically speaking, the keys in a plist are +almost always symbols, which makes sense since plists were first +invented to implement symbolic "properties," arbitrary mappings between +names and values. + + You can use SETF with GETF to set the value associated with a given +key. SETF also treats GETF a bit specially in that the first argument +to GETF is treated as the place to modify. Thus, you can use SETF of +GETF to add a new key/value pair to an existing plist. + + CL-USER> (defparameter *plist* ()) + *PLIST* + CL-USER> *plist* + NIL + CL-USER> (setf (getf *plist* :a) 1) + 1 + CL-USER> *plist* + (:A 1) + CL-USER> (setf (getf *plist* :a) 2) + 2 + CL-USER> *plist* + (:A 2) + + To remove a key/value pair from a plist, you use the macro REMF, +which sets the place given as its first argument to a plist containing +all the key/value pairs except the one specified. It returns true if +the given key was actually found. + + CL-USER> (remf *plist* :a) + T + CL-USER> *plist* + NIL + + Like GETF, REMF always uses EQ to compare the given key to the keys +in the plist. + + Since plists are often used in situations where you want to extract +several properties from the same plist, Common Lisp provides a function, +GET-PROPERTIES, that makes it more efficient to extract multiple values +from a single plist. It takes a plist and a list of keys to search for +and returns, as multiple values, the first key found, the corresponding +value, and the head of the list starting with the found key. This +allows you to process a property list, extracting the desired +properties, without continually rescanning from the front of the list. +For instance, the following function efficiently processes-using the +hypothetical function process-property-all the key/value pairs in a +plist for a given list of keys: + + (defun process-properties (plist keys) + (loop while plist do + (multiple-value-bind (key value tail) (get-properties plist keys) + (when key (process-property key value)) + (setf plist (cddr tail))))) + + The last special thing about plists is the relationship they have +with symbols: every symbol object has an associated plist that can be +used to store information about the symbol. The plist can be obtained +via the function SYMBOL-PLIST. However, you rarely care about the whole +plist; more often you'll use the functions GET, which takes a symbol and +a key and is shorthand for a GETF of the same key in the symbols +SYMBOL-PLIST. + + (get 'symbol 'key) === (getf (symbol-plist 'symbol) 'key) + Like GETF, GET is SETFable, so you can attach arbitrary information +to a symbol like this: + + (setf (get 'some-symbol 'my-key) "information") + + To remove a property from a symbol's plist, you can use either REMF +of SYMBOL-PLIST or the convenience function REMPROP. (1) + + (remprop 'symbol 'key) === (remf (symbol-plist 'symbol key)) + + Being able to attach arbitrary information to names is quite handy +when doing any kind of symbolic programming. For instance, one of the +macros you'll write in Chapter 24 will attach information to names that +other instances of the same macros will extract and use when generating +their expansions. + + ---------- Footnotes ---------- + + (1) It's also possible to directly SETF SYMBOL-PLIST. However, that's +a bad idea, as different code may have added different properties to the +symbol's plist for different reasons. If one piece of code clobbers the +symbol's whole plist, it may break other code that added its own +properties to the plist. + + +File: pcl.info, Node: 13-4, Next: Chapter 14, Prev: 13-3, Up: Chapter 13 + +DESTRUCTURING-BIND +================== + +One last tool for slicing and dicing lists that I need to cover since +you'll need it in later chapters is the DESTRUCTURING-BIND macro. This +macro provides a way to destructure arbitrary lists, similar to the way +macro parameter lists can take apart their argument list. The basic +skeleton of a DESTRUCTURING-BIND is as follows: + + (destructuring-bind (parameter*) list + body-form*) + + The parameter list can include any of the types of parameters +supported in macro parameter lists such as &optional, &rest, and &key +parameters. (1) And, as in macro parameter lists, any parameter can be +replaced with a nested destructuring parameter list, which takes apart +the list that would otherwise have been bound to the replaced parameter. +The list form is evaluated once and should return a list, which is then +destructured and the appropriate values are bound to the variables in +the parameter list. Then the body-forms are evaluated in order with +those bindings in effect. Some simple examples follow: + + (destructuring-bind (x y z) (list 1 2 3) + (list :x x :y y :z z)) ==> (:X 1 :Y 2 :Z 3) + + (destructuring-bind (x y z) (list 1 (list 2 20) 3) + (list :x x :y y :z z)) ==> (:X 1 :Y (2 20) :Z 3) + + (destructuring-bind (x (y1 y2) z) (list 1 (list 2 20) 3) + (list :x x :y1 y1 :y2 y2 :z z)) ==> (:X 1 :Y1 2 :Y2 20 :Z 3) + + (destructuring-bind (x (y1 &optional y2) z) (list 1 (list 2 20) 3) + (list :x x :y1 y1 :y2 y2 :z z)) ==> (:X 1 :Y1 2 :Y2 20 :Z 3) + + (destructuring-bind (x (y1 &optional y2) z) (list 1 (list 2) 3) + (list :x x :y1 y1 :y2 y2 :z z)) ==> (:X 1 :Y1 2 :Y2 NIL :Z 3) + + (destructuring-bind (&key x y z) (list :x 1 :y 2 :z 3) + (list :x x :y y :z z)) ==> (:X 1 :Y 2 :Z 3) + + (destructuring-bind (&key x y z) (list :z 1 :y 2 :x 3) + (list :x x :y y :z z)) ==> (:X 3 :Y 2 :Z 1) + + One kind of parameter you can use with DESTRUCTURING-BIND and also in +macro parameter lists, though I didn't mention it in Chapter 8, is a +&whole parameter. If specified, it must be the first parameter in a +parameter list, and it's bound to the whole list form. (2) After a +&whole parameter, other parameters can appear as usual and will extract +specific parts of the list just as they would if the &whole parameter +weren't there. An example of using &whole with DESTRUCTURING-BIND looks +like this: + + (destructuring-bind (&whole whole &key x y z) (list :z 1 :y 2 :x 3) + (list :x x :y y :z z :whole whole)) + ==> (:X 3 :Y 2 :Z 1 :WHOLE (:Z 1 :Y 2 :X 3)) + + You'll use a &whole parameter in one of the macros that's part of the +HTML generation library you'll develop in Chapter 31. However, I have a +few more topics to cover before you can get to that. After two chapters +on the rather Lispy topic of cons cells, you can now turn to the more +prosaic matter of how to deal with files and filenames. + + ---------- Footnotes ---------- + + (1) Macro parameter lists do support one parameter type, &environment +parameters, which DESTRUCTURING-BIND doesn't. However, I didn't discuss +that parameter type in Chapter 8, and you don't need to worry about it +now either. + + (2) When a &whole parameter is used in a macro parameter list, the +form it's bound to is the whole macro form, including the name of the +macro. + + +File: pcl.info, Node: Chapter 14, Next: Chapter 15, Prev: Chapter 13, Up: Top + +14. Files and File I/O +====================== + +Common Lisp provides a rich library of functionality for dealing with +files. In this chapter I'll focus on a few basic file-related tasks: +reading and writing files and listing files in the file system. For +these basic tasks, Common Lisp's I/O facilities are similar to those in +other languages. Common Lisp provides a stream abstraction for reading +and writing data and an abstraction, called pathnames, for manipulating +filenames in an operating system-independent way. Additionally, Common +Lisp provides other bits of functionality unique to Lisp such as the +ability to read and write s-expressions. + +* Menu: + +* 14-1:: Reading File Data +* 14-2:: Reading Binary Data +* 14-3:: Bulk Reads +* 14-4:: File Output +* 14-5:: Closing Files +* 14-6:: Filenames +* 14-7:: How Pathnames Represent Filenames +* 14-8:: Constructing New Pathnames +* 14-9:: Two Representations of Directory Names +* 14-10:: Interacting with the File System +* 14-11:: Other Kinds of I/O + + +File: pcl.info, Node: 14-1, Next: 14-2, Prev: Chapter 14, Up: Chapter 14 + +Reading File Data +================= + +The most basic file I/O task is to read the contents of a file. You +obtain a stream from which you can read a file's contents with the OPEN +function. By default OPEN returns a character-based input stream you +can pass to a variety of functions that read one or more characters of +text: READ-CHAR reads a single character; READ-LINE reads a line of +text, returning it as a string with the end-of-line character(s) +removed; and READ reads a single s-expression, returning a Lisp object. +When you're done with the stream, you can close it with the CLOSE +function. + + The only required argument to OPEN is the name of the file to read. +As you'll see in the section "Filenames," Common Lisp provides a couple +of ways to represent a filename, but the simplest is to use a string +containing the name in the local file-naming syntax. So assuming that +/some/file/name.txt is a file, you can open it like this: + + (open "/some/file/name.txt") + + You can use the object returned as the first argument to any of the +read functions. For instance, to print the first line of the file, you +can combine OPEN, READ-LINE, and CLOSE as follows: + + (let ((in (open "/some/file/name.txt"))) + (format t "~a~%" (read-line in)) + (close in)) + + Of course, a number of things can go wrong while trying to open and +read from a file. The file may not exist. Or you may unexpectedly hit +the end of the file while reading. By default OPEN and the READ-* +functions will signal an error in these situations. In Chapter 19, I'll +discuss how to recover from such errors. For now, however, there's a +lighter-weight solution: each of these functions accepts arguments that +modify its behavior in these exceptional situations. + + If you want to open a possibly nonexistent file without OPEN +signaling an error, you can use the keyword argument :if-does-not-exist +to specify a different behavior. The three possible values are :error, +the default; :create, which tells it to go ahead and create the file and +then proceed as if it had already existed; and NIL, which tells it to +return NIL instead of a stream. Thus, you can change the previous +example to deal with the possibility that the file may not exist. + + (let ((in (open "/some/file/name.txt" :if-does-not-exist nil))) + (when in + (format t "~a~%" (read-line in)) + (close in))) + + The reading functions-READ-CHAR, READ-LINE, and READ-all take an +optional argument, which defaults to true, that specifies whether they +should signal an error if they're called at the end of the file. If +that argument is NIL, they instead return the value of their third +argument, which defaults to NIL. Thus, you could print all the lines in +a file like this: + + (let ((in (open "/some/file/name.txt" :if-does-not-exist nil))) + (when in + (loop for line = (read-line in nil) + while line do (format t "~a~%" line)) + (close in))) + + Of the three text-reading functions, READ is unique to Lisp. This is +the same function that provides the R in the REPL and that's used to +read Lisp source code. Each time it's called, it reads a single +s-expression, skipping whitespace and comments, and returns the Lisp +object denoted by the s-expression. For instance, suppose +/some/file/name.txt has the following contents: + + (1 2 3) + 456 + "a string" ; this is a comment + ((a b) + (c d)) + + In other words, it contains four s-expressions: a list of numbers, a +number, a string, and a list of lists. You can read those expressions +like this: + + CL-USER> (defparameter *s* (open "/some/file/name.txt")) + *S* + CL-USER> (read *s*) + (1 2 3) + CL-USER> (read *s*) + 456 + CL-USER> (read *s*) + "a string" + CL-USER> (read *s*) + ((A B) (C D)) + CL-USER> (close *s*) + T + + As you saw in Chapter 3, you can use PRINT to print Lisp objects in +"readable" form. Thus, whenever you need to store a bit of data in a +file, PRINT and READ provide an easy way to do it without having to +design a data format or write a parser. They even-as the previous +example demonstrated-give you comments for free. And because +s-expressions were designed to be human editable, it's also a fine +format for things like configuration files. (1) + + ---------- Footnotes ---------- + + (1) Note, however, that while the Lisp reader knows how to skip +comments, it completely skips them. Thus, if you use READ to read in a +configuration file containing comments and then use PRINT to save +changes to the data, you'll lose the comments. + + +File: pcl.info, Node: 14-2, Next: 14-3, Prev: 14-1, Up: Chapter 14 + +Reading Binary Data +=================== + +By default OPEN returns character streams, which translate the +underlying bytes to characters according to a particular +character-encoding scheme. (1) To read the raw bytes, you need to pass +OPEN an :element-type argument of '(unsigned-byte 8). (2) You can pass +the resulting stream to the function READ-BYTE, which will return an +integer between 0 and 255 each time it's called. READ-BYTE, like the +character-reading functions, also accepts optional arguments to specify +whether it should signal an error if called at the end of the file and +what value to return if not. In Chapter 24 you'll build a library that +allows you to conveniently read structured binary data using READ-BYTE. +(3) + + ---------- Footnotes ---------- + + (1) By default OPEN uses the default character encoding for the +operating system, but it also accepts a keyword parameter, +:external-format, that can pass implementation-defined values that +specify a different encoding. Character streams also translate the +platform-specific end-of-line sequence to the single character +#\Newline. + + (2) The type (unsigned-byte 8) indicates an 8-bit byte; Common Lisp +"byte" types aren't a fixed size since Lisp has run at various times on +architectures with byte sizes from 6 to 9 bits, to say nothing of the +PDP-10, which had individually addressable variable-length bit fields of +1 to 36 bits. + + (3) In general, a stream is either a character stream or a binary +stream, so you can't mix calls to READ-BYTE and READ-CHAR or other +character-based read functions. However, some implementations, such as +Allegro, support so-called bivalent streams, which support both +character and binary I/O. + + +File: pcl.info, Node: 14-3, Next: 14-4, Prev: 14-2, Up: Chapter 14 + +Bulk Reads +========== + +One last reading function, READ-SEQUENCE, works with both character and +binary streams. You pass it a sequence (typically a vector) and a +stream, and it attempts to fill the sequence with data from the stream. +It returns the index of the first element of the sequence that wasn't +filled or the length of the sequence if it was able to completely fill +it. You can also pass :start and :end keyword arguments to specify a +subsequence that should be filled instead. The sequence argument must +be a type that can hold elements of the stream's element type. Since +most operating systems support some form of block I/O, READ-SEQUENCE is +likely to be quite a bit more efficient than filling a sequence by +repeatedly calling READ-BYTE or READ-CHAR. + + +File: pcl.info, Node: 14-4, Next: 14-5, Prev: 14-3, Up: Chapter 14 + +File Output +=========== + +To write data to a file, you need an output stream, which you obtain by +calling OPEN with a :direction keyword argument of :output. When +opening a file for output, OPEN assumes the file shouldn't already exist +and will signal an error if it does. However, you can change that +behavior with the :if-exists keyword argument. Passing the value +:supersede tells OPEN to replace the existing file. Passing :append +causes OPEN to open the existing file such that new data will be written +at the end of the file, while :overwrite returns a stream that will +overwrite existing data starting from the beginning of the file. And +passing NIL will cause OPEN to return NIL instead of a stream if the +file already exists. A typical use of OPEN for output looks like this: + + (open "/some/file/name.txt" :direction :output :if-exists :supersede) + + Common Lisp also provides several functions for writing data: +WRITE-CHAR writes a single character to the stream. WRITE-LINE writes a +string followed by a newline, which will be output as the appropriate +end-of-line character or characters for the platform. Another function, +WRITE-STRING, writes a string without adding any end-of-line characters. +Two different functions can print just a newline: TERPRI-short for +"terminate print"-unconditionally prints a newline character, and +FRESH-LINE prints a newline character unless the stream is at the +beginning of a line. FRESH-LINE is handy when you want to avoid +spurious blank lines in textual output generated by different functions +called in sequence. For example, suppose you have one function that +generates output that should always be followed by a line break and +another that should start on a new line. But assume that if the +functions are called one after the other, you don't want a blank line +between the two bits of output. If you use FRESH-LINE at the beginning +of the second function, its output will always start on a new line, but +if it's called right after the first, it won't emit an extra line break. + + Several functions output Lisp data as s-expressions: PRINT prints an +s-expression preceded by an end-of-line and followed by a space. PRIN1 +prints just the s-expression. And the function PPRINT prints +s-expressions like PRINT and PRIN1 but using the "pretty printer," which +tries to print its output in an aesthetically pleasing way. + + However, not all objects can be printed in a form that READ will +understand. The variable *PRINT-READABLY* controls what happens if you +try to print such an object with PRINT, PRIN1, or PPRINT. When it's NIL, +these functions will print the object in a special syntax that's +guaranteed to cause READ to signal an error if it tries to read it; +otherwise they will signal an error rather than print the object. + + Another function, PRINC, also prints Lisp objects, but in a way +designed for human consumption. For instance, PRINC prints strings +without quotation marks. You can generate more elaborate text output +with the incredibly flexible if somewhat arcane FORMAT function. I'll +discuss some of the more important details of FORMAT, which essentially +defines a mini-language for emitting formatted output, in Chapter 18. + + To write binary data to a file, you have to OPEN the file with the +same :element-type argument as you did to read it: '(unsigned-byte 8). +You can then write individual bytes to the stream with WRITE-BYTE. + + The bulk output function WRITE-SEQUENCE accepts both binary and +character streams as long as all the elements of the sequence are of an +appropriate type for the stream, either characters or bytes. As with +READ-SEQUENCE, this function is likely to be quite a bit more efficient +than writing the elements of the sequence one at a time. + + +File: pcl.info, Node: 14-5, Next: 14-6, Prev: 14-4, Up: Chapter 14 + +Closing Files +============= + +As anyone who has written code that deals with lots of files knows, it's +important to close files when you're done with them, because file +handles tend to be a scarce resource. If you open files and don't close +them, you'll soon discover you can't open any more files. (1) It might +seem straightforward enough to just be sure every OPEN has a matching +CLOSE. For instance, you could always structure your file using code +like this: + + (let ((stream (open "/some/file/name.txt"))) + ;; do stuff with stream + (close stream)) + + However, this approach suffers from two problems. One is simply that +it's error prone-if you forget the CLOSE, the code will leak a file +handle every time it runs. The other-and more significant-problem is +that there's no guarantee you'll get to the CLOSE. For instance, if the +code prior to the CLOSE contains a RETURN or RETURN-FROM, you could +leave the LET without closing the stream. Or, as you'll see in Chapter +19, if any of the code before the CLOSE signals an error, control may +jump out of the LET to an error handler and never come back to close the +stream. + + Common Lisp provides a general solution to the problem of how to +ensure that certain code always runs: the special operator +UNWIND-PROTECT, which I'll discuss in Chapter 20. However, because the +pattern of opening a file, doing something with the resulting stream, +and then closing the stream is so common, Common Lisp provides a macro, +WITH-OPEN-FILE, built on top of UNWIND-PROTECT, to encapsulate this +pattern. This is the basic form: + + (with-open-file (stream-var open-argument*) + body-form*) + + The forms in body-forms are evaluated with stream-var bound to a file +stream opened by a call to OPEN with open-arguments as its arguments. +WITH-OPEN-FILE then ensures the stream in stream-var is closed before +the WITH-OPEN-FILE form returns. Thus, you can write this to read a +line from a file: + + (with-open-file (stream "/some/file/name.txt") + (format t "~a~%" (read-line stream))) + + To create a new file, you can write something like this: + + (with-open-file (stream "/some/file/name.txt" :direction :output) + (format stream "Some text.")) + + You'll probably use WITH-OPEN-FILE for 90-99 percent of the file I/O +you do-the only time you need to use raw OPEN and CLOSE calls is if you +need to open a file in a function and keep the stream around after the +function returns. In that case, you must take care to eventually close +the stream yourself, or you'll leak file descriptors and may eventually +end up unable to open any more files. + + ---------- Footnotes ---------- + + (1) Some folks expect this wouldn't be a problem in a +garbage-collected language such as Lisp. It is the case in most Lisp +implementations that a stream that becomes garbage will automatically be +closed. However, this isn't something to rely on-the problem is that +garbage collectors usually run only when memory is low; they don't know +about other scarce resources such as file handles. If there's plenty of +memory available, it's easy to run out of file handles long before the +garbage collector runs. + + +File: pcl.info, Node: 14-6, Next: 14-7, Prev: 14-5, Up: Chapter 14 + +Filenames +========= + +So far you've used strings to represent filenames. However, using +strings as filenames ties your code to a particular operating system and +file system. Likewise, if you programmatically construct names +according to the rules of a particular naming scheme (separating +directories with /, say), you also tie your code to a particular file +system. + + To avoid this kind of nonportability, Common Lisp provides another +representation of filenames: pathname objects. Pathnames represent +filenames in a structured way that makes them easy to manipulate without +tying them to a particular filename syntax. And the burden of +translating back and forth between strings in the local syntax-called +namestrings-and pathnames is placed on the Lisp implementation. + + Unfortunately, as with many abstractions designed to hide the details +of fundamentally different underlying systems, the pathname abstraction +introduces its own complications. When pathnames were designed, the set +of file systems in general use was quite a bit more variegated than +those in common use today. Consequently, some nooks and crannies of the +pathname abstraction make little sense if all you're concerned about is +representing Unix or Windows filenames. However, once you understand +which parts of the pathname abstraction you can ignore as artifacts of +pathnames' evolutionary history, they do provide a convenient way to +manipulate filenames. (1) + + Most places a filename is called for, you can use either a namestring +or a pathname. Which to use depends mostly on where the name +originated. Filenames provided by the user-for example, as arguments or +as values in configuration files-will typically be namestrings, since +the user knows what operating system they're running on and shouldn't be +expected to care about the details of how Lisp represents filenames. +But programmatically generated filenames will be pathnames because you +can create them portably. A stream returned by OPEN also represents a +filename, namely, the filename that was originally used to open the +stream. Together these three types are collectively referred to as +pathname designators. All the built-in functions that expect a filename +argument accept all three types of pathname designator. For instance, +all the places in the previous section where you used a string to +represent a filename, you could also have passed a pathname object or a +stream. + +* Menu: + +* 14-6-1:: How We Got Here + + ---------- Footnotes ---------- + + (1) Another reason the pathname system is considered somewhat baroque +is because of the inclusion of logical pathnames. However, you can use +the rest of the pathname system perfectly well without knowing anything +more about logical pathnames than that you can safely ignore them. +Briefly, logical pathnames allow Common Lisp programs to contain +references to pathnames without naming specific files. Logical +pathnames could then be mapped to specific locations in an actual file +system when the program was installed by defining a "logical pathname +translation" that translates logical pathnames matching certain +wildcards to pathnames representing files in the file system, so-called +physical pathnames. They have their uses in certain situations, but you +can get pretty far without worrying about them. + + +File: pcl.info, Node: 14-6-1, Next: 14-6, Prev: 14-6, Up: 14-6 + +How We Got Here +=============== + +The historical diversity of file systems in existence during the 70s and +80s can be easy to forget. Kent Pitman, one of the principal technical +editors of the Common Lisp standard, described the situation once in +comp.lang.lisp (Message-ID: sfwzo74np6w.fsf@world.std.com) thusly: + + The dominant file systems at the time the design [of Common Lisp] + was done were TOPS-10, TENEX, TOPS-20, VAX VMS, AT&T Unix, MIT + Multics, MIT ITS, not to mention a bunch of mainframe [OSs]. Some + were uppercase only, some mixed, some were case-sensitive but case- + translating (like CL). Some had dirs as files, some not. Some had + quote chars for funny file chars, some not. Some had wildcards, + some didn't. Some had :up in relative pathnames, some didn't. + Some had namable root dirs, some didn't. There were file systems + with no directories, file systems with non-hierarchical + directories, file systems with no file types, file systems with no + versions, file systems with no devices, and so on. + + If you look at the pathname abstraction from the point of view of any +single file system, it seems baroque. However, if you take even two +such similar file systems as Windows and Unix, you can already begin to +see differences the pathname system can help abstract away-Windows +filenames contain a drive letter, for instance, while Unix filenames +don't. The other advantage of having the pathname abstraction designed +to handle the wide variety of file systems that existed in the past is +that it's more likely to be able to handle file systems that may exist +in the future. If, say, versioning file systems come back into vogue, +Common Lisp will be ready. + + +File: pcl.info, Node: 14-7, Next: 14-8, Prev: 14-6, Up: Chapter 14 + +How Pathnames Represent Filenames +================================= + +A pathname is a structured object that represents a filename using six +components: host, device, directory, name, type, and version. Most of +these components take on atomic values, usually strings; only the +directory component is further structured, containing a list of +directory names (as strings) prefaced with the keyword :absolute or +:relative. However, not all pathname components are needed on all +platforms-this is one of the reasons pathnames strike many new Lispers +as gratuitously complex. On the other hand, you don't really need to +worry about which components may or may not be used to represent names +on a particular file system unless you need to create a new pathname +object from scratch, which you'll almost never need to do. Instead, +you'll usually get hold of pathname objects either by letting the +implementation parse a file system-specific namestring into a pathname +object or by creating a new pathname that takes most of its components +from an existing pathname. + + For instance, to translate a namestring to a pathname, you use the +PATHNAME function. It takes a pathname designator and returns an +equivalent pathname object. When the designator is already a pathname, +it's simply returned. When it's a stream, the original filename is +extracted and returned. When the designator is a namestring, however, +it's parsed according to the local filename syntax. The language +standard, as a platform-neutral document, doesn't specify any particular +mapping from namestring to pathname, but most implementations follow the +same conventions on a given operating system. + + On Unix file systems, only the directory, name, and type components +are typically used. On Windows, one more component-usually the device +or host-holds the drive letter. On these platforms, a namestring is +parsed by first splitting it into elements on the path separator-a slash +on Unix and a slash or backslash on Windows. The drive letter on +Windows will be placed into either the device or the host component. +All but the last of the other name elements are placed in a list +starting with :absolute or :relative depending on whether the name +(ignoring the drive letter, if any) began with a path separator. This +list becomes the directory component of the pathname. The last element +is then split on the rightmost dot, if any, and the two parts put into +the name and type components of the pathname. (1) + + You can examine these individual components of a pathname with the +functions PATHNAME-DIRECTORY, PATHNAME-NAME, and PATHNAME-TYPE. + + (pathname-directory (pathname "/foo/bar/baz.txt")) ==> (:ABSOLUTE "foo" "bar") + (pathname-name (pathname "/foo/bar/baz.txt")) ==> "baz" + (pathname-type (pathname "/foo/bar/baz.txt")) ==> "txt" + + Three other functions-PATHNAME-HOST, PATHNAME-DEVICE, and +PATHNAME-VERSION-allow you to get at the other three pathname +components, though they're unlikely to have interesting values on Unix. +On Windows either PATHNAME-HOST or PATHNAME-DEVICE will return the drive +letter. + + Like many other built-in objects, pathnames have their own read +syntax, #p followed by a double-quoted string. This allows you to print +and read back s-expressions containing pathname objects, but because the +syntax depends on the namestring parsing algorithm, such data isn't +necessarily portable between operating systems. + + (pathname "/foo/bar/baz.txt") ==> #p"/foo/bar/baz.txt" + + To translate a pathname back to a namestring-for instance, to present +to the user-you can use the function NAMESTRING, which takes a pathname +designator and returns a namestring. Two other functions, +DIRECTORY-NAMESTRING and FILE-NAMESTRING, return a partial namestring. +DIRECTORY-NAMESTRING combines the elements of the directory component +into a local directory name, and FILE-NAMESTRING combines the name and +type components. (2) + + (namestring #p"/foo/bar/baz.txt") ==> "/foo/bar/baz.txt" + (directory-namestring #p"/foo/bar/baz.txt") ==> "/foo/bar/" + (file-namestring #p"/foo/bar/baz.txt") ==> "baz.txt" + + ---------- Footnotes ---------- + + (1) Many Unix-based implementations treat filenames whose last +element starts with a dot and don't contain any other dots specially, +putting the whole element, with the dot, in the name component and +leaving the type component NIL. + + (pathname-name (pathname "/foo/.emacs")) ==> ".emacs" + (pathname-type (pathname "/foo/.emacs")) ==> NIL + + However, not all implementations follow this convention; some will +create a pathname with "" as the name and emacs as the type. + + (2) The name returned by FILE-NAMESTRING also includes the version +component on file systems that use it. + + +File: pcl.info, Node: 14-8, Next: 14-9, Prev: 14-7, Up: Chapter 14 + +Constructing New Pathnames +========================== + +You can construct arbitrary pathnames using the MAKE-PATHNAME function. +It takes one keyword argument for each pathname component and returns a +pathname with any supplied components filled in and the rest NIL. (1) + + (make-pathname + :directory '(:absolute "foo" "bar") + :name "baz" + :type "txt") ==> #p"/foo/bar/baz.txt" + + However, if you want your programs to be portable, you probably don't +want to make pathnames completely from scratch: even though the pathname +abstraction protects you from unportable filename syntax, filenames can +be unportable in other ways. For instance, the filename +/home/peter/foo.txt is no good on an OS X box where /home/ is called +/Users/. + + Another reason not to make pathnames completely from scratch is that +different implementations use the pathname components slightly +differently. For instance, as mentioned previously, some Windows-based +Lisp implementations store the drive letter in the device component +while others store it in the host component. If you write code like +this: + + (make-pathname :device "c" :directory '(:absolute "foo" "bar") :name "baz") + + it will be correct on some implementations but not on others. + + Rather than making names from scratch, you can build a new pathname +based on an existing pathname with MAKE-PATHNAME's keyword parameter +:defaults. With this parameter you can provide a pathname designator, +which will supply the values for any components not specified by other +arguments. For example, the following expression creates a pathname +with an .html extension and all other components the same as the +pathname in the variable input-file: + + (make-pathname :type "html" :defaults input-file) + + Assuming the value in input-file was a user-provided name, this code +will be robust in the face of operating system and implementation +differences such as whether filenames have drive letters in them and +where they're stored in a pathname if they do. (2) + + You can use the same technique to create a pathname with a different +directory component. + + (make-pathname :directory '(:relative "backups") :defaults input-file) + + However, this will create a pathname whose whole directory component +is the relative directory backups/, regardless of any directory +component input-file may have had. For example: + + (make-pathname :directory '(:relative "backups") + :defaults #p"/foo/bar/baz.txt") ==> #p"backups/baz.txt" + + Sometimes, though, you want to combine two pathnames, at least one of +which has a relative directory component, by combining their directory +components. For instance, suppose you have a relative pathname such as +#p"foo/bar.html" that you want to combine with an absolute pathname such +as #p"/www/html/" to get #p"/www/html/foo/bar.html". In that case, +MAKE-PATHNAME won't do; instead, you want MERGE-PATHNAMES. + + MERGE-PATHNAMES takes two pathnames and merges them, filling in any +NIL components in the first pathname with the corresponding value from +the second pathname, much like MAKE-PATHNAME fills in any unspecified +components with components from the :defaults argument. However, +MERGE-PATHNAMES treats the directory component specially: if the first +pathname's directory is relative, the directory component of the +resulting pathname will be the first pathname's directory relative to +the second pathname's directory. Thus: + + (merge-pathnames #p"foo/bar.html" #p"/www/html/") ==> #p"/www/html/foo/bar.html" + + The second pathname can also be relative, in which case the resulting +pathname will also be relative. + + (merge-pathnames #p"foo/bar.html" #p"html/") ==> #p"html/foo/bar.html" + + To reverse this process and obtain a filename relative to a +particular root directory, you can use the handy function +ENOUGH-NAMESTRING. + + (enough-namestring #p"/www/html/foo/bar.html" #p"/www/") ==> "html/foo/bar.html" + + You can then combine ENOUGH-NAMESTRING with MERGE-PATHNAMES to create +a pathname representing the same name but in a different root. + + (merge-pathnames + (enough-namestring #p"/www/html/foo/bar/baz.html" #p"/www/") + #p"/www-backups/") ==> #p"/www-backups/html/foo/bar/baz.html" + + MERGE-PATHNAMES is also used internally by the standard functions +that actually access files in the file system to fill in incomplete +pathnames. For instance, suppose you make a pathname with just a name +and a type. + + (make-pathname :name "foo" :type "txt") ==> #p"foo.txt" + + If you try to use this pathname as an argument to OPEN, the missing +components, such as the directory, must be filled in before Lisp will be +able to translate the pathname to an actual filename. Common Lisp will +obtain values for the missing components by merging the given pathname +with the value of the variable *DEFAULT-PATHNAME-DEFAULTS*. The initial +value of this variable is determined by the implementation but is +usually a pathname with a directory component representing the directory +where Lisp was started and appropriate values for the host and device +components, if needed. If invoked with just one argument, +MERGE-PATHNAMES will merge the argument with the value of +*DEFAULT-PATHNAME-DEFAULTS*. For instance, if +*DEFAULT-PATHNAME-DEFAULTS* is #p"/home/peter/", then you'd get the +following: + + (merge-pathnames #p"foo.txt") ==> #p"/home/peter/foo.txt" + + ---------- Footnotes ---------- + + (1) The host component may not default to NIL, but if not, it will be +an opaque implementation-defined value. + + (2) For absolutely maximum portability, you should really write this: + + (make-pathname :type "html" :version :newest :defaults input-file) + + Without the :version argument, on a file system with built-in +versioning, the output pathname would inherit its version number from +the input file which isn't likely to be right-if the input file has been +saved many times it will have a much higher version number than the +generated HTML file. On implementations without file versioning, the +:version argument should be ignored. It's up to you if you care that +much about portability. + + +File: pcl.info, Node: 14-9, Next: 14-10, Prev: 14-8, Up: Chapter 14 + +Two Representations of Directory Names +====================================== + +When dealing with pathnames that name directories, you need to be aware +of one wrinkle. Pathnames separate the directory and name components, +but Unix and Windows consider directories just another kind of file. +Thus, on those systems, every directory has two different pathname +representations. + + One representation, which I'll call file form, treats a directory +like any other file and puts the last element of the namestring into the +name and type components. The other representation, directory form, +places all the elements of the name in the directory component, leaving +the name and type components NIL. If /foo/bar/ is a directory, then both +of the following pathnames name it. + + (make-pathname :directory '(:absolute "foo") :name "bar") ; file form + (make-pathname :directory '(:absolute "foo" "bar")) ; directory form + + When you create pathnames with MAKE-PATHNAME, you can control which +form you get, but you need to be careful when dealing with namestrings. +All current implementations create file form pathnames unless the +namestring ends with a path separator. But you can't rely on +user-supplied namestrings necessarily being in one form or another. For +instance, suppose you've prompted the user for a directory to save a +file in and they entered "/home/peter". If you pass that value as the +:defaults argument of MAKE-PATHNAME like this: + + (make-pathname :name "foo" :type "txt" :defaults user-supplied-name) + + you'll end up saving the file in /home/foo.txt rather than the +intended /home/peter/foo.txt because the "peter" in the namestring will +be placed in the name component when user-supplied-name is converted to +a pathname. In the pathname portability library I'll discuss in the +next chapter, you'll write a function called pathname-as-directory that +converts a pathname to directory form. With that function you can +reliably save the file in the directory indicated by the user. + + (make-pathname + :name "foo" :type "txt" :defaults (pathname-as-directory user-supplied-name)) + + +File: pcl.info, Node: 14-10, Next: 14-11, Prev: 14-9, Up: Chapter 14 + +Interacting with the File System +================================ + +While the most common interaction with the file system is probably +OPENing files for reading and writing, you'll also occasionally want to +test whether a file exists, list the contents of a directory, delete and +rename files, create directories, and get information about a file such +as who owns it, when it was last modified, and its length. This is +where the generality of the pathname abstraction begins to cause a bit +of pain: because the language standard doesn't specify how functions +that interact with the file system map to any specific file system, +implementers are left with a fair bit of leeway. + + That said, most of the functions that interact with the file system +are still pretty straightforward. I'll discuss the standard functions +here and point out the ones that suffer from nonportability between +implementations. In the next chapter you'll develop a pathname +portability library to smooth over some of those nonportability issues. + + To test whether a file exists in the file system corresponding to a +pathname designator-a pathname, namestring, or file stream-you can use +the function PROBE-FILE. If the file named by the pathname designator +exists, PROBE-FILE returns the file's truename, a pathname with any file +system-level translations such as resolving symbolic links performed. +Otherwise, it returns NIL. However, not all implementations support +using this function to test whether a directory exists. Also, Common +Lisp doesn't provide a portable way to test whether a given file that +exists is a regular file or a directory. In the next chapter you'll +wrap PROBE-FILE with a new function, file-exists-p, that can both test +whether a directory exists and tell you whether a given name is the name +of a file or directory. + + Similarly, the standard function for listing files in the file +system, DIRECTORY, works fine for simple cases, but the differences +between implementations make it tricky to use portably. In the next +chapter you'll define a list-directory function that smoothes over some +of these differences. + + DELETE-FILE and RENAME-FILE do what their names suggest. DELETE-FILE +takes a pathname designator and deletes the named file, returning true +if it succeeds. Otherwise it signals a FILE-ERROR. (1) + + RENAME-FILE takes two pathname designators and renames the file named +by the first name to the second name. + + You can create directories with the function +ENSURE-DIRECTORIES-EXIST. It takes a pathname designator and ensures +that all the elements of the directory component exist and are +directories, creating them as necessary. It returns the pathname it was +passed, which makes it convenient to use inline. + + (with-open-file (out (ensure-directories-exist name) :direction :output) + ... + ) + + Note that if you pass ENSURE-DIRECTORIES-EXIST a directory name, it +should be in directory form, or the leaf directory won't be created. + + The functions FILE-WRITE-DATE and FILE-AUTHOR both take a pathname +designator. FILE-WRITE-DATE returns the time in number of seconds since +midnight January 1, 1900, Greenwich mean time (GMT), that the file was +last written, and FILE-AUTHOR returns, on Unix and Windows, the file +owner. (2) + + To find the length of a file, you can use the function FILE-LENGTH. +For historical reasons FILE-LENGTH takes a stream as an argument rather +than a pathname. In theory this allows FILE-LENGTH to return the length +in terms of the element type of the stream. However, since on most +present-day operating systems, the only information available about the +length of a file, short of actually reading the whole file to measure +it, is its length in bytes, that's what most implementations return, +even when FILE-LENGTH is passed a character stream. However, the +standard doesn't require this behavior, so for predictable results, the +best way to get the length of a file is to use a binary stream. (3) + + (with-open-file (in filename :element-type '(unsigned-byte 8)) + (file-length in)) + + A related function that also takes an open file stream as its +argument is FILE-POSITION. When called with just a stream, this function +returns the current position in the file-the number of elements that +have been read from or written to the stream. When called with two +arguments, the stream and a position designator, it sets the position of +the stream to the designated position. The position designator must be +the keyword :start, the keyword :end, or a non-negative integer. The +two keywords set the position of the stream to the start or end of the +file while an integer moves to the indicated position in the file. With +a binary stream the position is simply a byte offset into the file. +However, for character streams things are a bit more complicated because +of character-encoding issues. Your best bet, if you need to jump around +within a file of textual data, is to only ever pass, as a second +argument to the two-argument version of FILE-POSITION, a value +previously returned by the one-argument version of FILE-POSITION with +the same stream argument. + + ---------- Footnotes ---------- + + (1) See Chapter 19 for more on handling errors. + + (2) For applications that need access to other file attributes on a +particular operating system or file system, libraries provide bindings +to underlying C system calls. The Osicat library at +http://common-lisp.net/project/osicat/ provides a simple API built using +the Universal Foreign Function Interface (UFFI), which should run on +most Common Lisps that run on a POSIX operating system. + + (3) The number of bytes and characters in a file can differ even if +you're not using a multibyte character encoding. Because character +streams also translate platform-specific line endings to a single +#\Newline character, on Windows (which uses CRLF as its line ending) the +number of characters will typically be smaller than the number of bytes. +If you really have to know the number of characters in a file, you have +to bite the bullet and write something like this: + + (with-open-file (in filename) + (loop while (read-char in nil) count t)) + + or maybe something more efficient like this: + + (with-open-file (in filename) + (let ((scratch (make-string 4096))) + (loop for read = (read-sequence scratch in) + while (plusp read) sum read))) + + +File: pcl.info, Node: 14-11, Next: Chapter 15, Prev: 14-10, Up: Chapter 14 + +Other Kinds of I/O +================== + +In addition to file streams, Common Lisp supports other kinds of +streams, which can also be used with the various reading, writing, and +printing I/O functions. For instance, you can read data from, or write +data to, a string using STRING-STREAMs, which you can create with the +functions MAKE-STRING-INPUT-STREAM and MAKE-STRING-OUTPUT-STREAM. + + MAKE-STRING-INPUT-STREAM takes a string and optional start and end +indices to bound the area of the string from which data should be read +and returns a character stream that you can pass to any of the +character-based input functions such as READ-CHAR, READ-LINE, or READ. +For example, if you have a string containing a floating-point literal in +Common Lisp's syntax, you can convert it to a float like this: + + (let ((s (make-string-input-stream "1.23"))) + (unwind-protect (read s) + (close s))) + + Similarly, MAKE-STRING-OUTPUT-STREAM creates a stream you can use +with FORMAT, PRINT, WRITE-CHAR, WRITE-LINE, and so on. It takes no +arguments. Whatever you write, a string output stream will be +accumulated into a string that can then be obtained with the function +GET-OUTPUT-STREAM-STRING. Each time you call GET-OUTPUT-STREAM-STRING, +the stream's internal string is cleared so you can reuse an existing +string output stream. + + However, you'll rarely use these functions directly, because the +macros WITH-INPUT-FROM-STRING and WITH-OUTPUT-TO-STRING provide a more +convenient interface. WITH-INPUT-FROM-STRING is similar to +WITH-OPEN-FILE-it creates a string input stream from a given string and +then executes the forms in its body with the stream bound to the +variable you provide. For instance, instead of the LET form with the +explicit UNWIND-PROTECT, you'd probably write this: + + (with-input-from-string (s "1.23") + (read s)) + + The WITH-OUTPUT-TO-STRING macro is similar: it binds a newly created +string output stream to a variable you name and then executes its body. +After all the body forms have been executed, WITH-OUTPUT-TO-STRING +returns the value that would be returned by GET-OUTPUT-STREAM-STRING. + + CL-USER> (with-output-to-string (out) + (format out "hello, world ") + (format out "~s" (list 1 2 3))) + "hello, world (1 2 3)" + + The other kinds of streams defined in the language standard provide +various kinds of stream "plumbing," allowing you to plug together +streams in almost any configuration. A BROADCAST-STREAM is an output +stream that sends any data written to it to a set of output streams +provided as arguments to its constructor function, +MAKE-BROADCAST-STREAM. (1) Conversely, a CONCATENATED-STREAM is an input +stream that takes its input from a set of input streams, moving from +stream to stream as it hits the end of each stream. +CONCATENATED-STREAMs are constructed with the function +MAKE-CONCATENATED-STREAM, which takes any number of input streams as +arguments. + + Two kinds of bidirectional streams that can plug together streams in +a couple ways are TWO-WAY-STREAM and ECHO-STREAM. Their constructor +functions, MAKE-TWO-WAY-STREAM and MAKE-ECHO-STREAM, both take two +arguments, an input stream and an output stream, and return a stream of +the appropriate type, which you can use with both input and output +functions. + + In a TWO-WAY-STREAM every read you perform will return data read from +the underlying input stream, and every write will send data to the +underlying output stream. An ECHO-STREAM works essentially the same way +except that all the data read from the underlying input stream is also +echoed to the output stream. Thus, the output stream of an ECHO-STREAM +stream will contain a transcript of both sides of the conversation. + + Using these five kinds of streams, you can build almost any topology +of stream plumbing you want. + + Finally, although the Common Lisp standard doesn't say anything about +networking APIs, most implementations support socket programming and +typically implement sockets as another kind of stream, so you can use +all the regular I/O functions with them. (2) + + Now you're ready to move on to building a library that smoothes over +some of the differences between how the basic pathname functions behave +in different Common Lisp implementations. + + ---------- Footnotes ---------- + + (1) MAKE-BROADCAST-STREAM can make a data black hole by calling it +with no arguments. + + (2) The biggest missing piece in Common Lisp's standard I/O +facilities is a way for users to define new stream classes. There are, +however, two de facto standards for user-defined streams. During the +Common Lisp standardization, David Gray of Texas Instruments wrote a +draft proposal for an API to allow users to define new stream classes. +Unfortunately, there wasn't time to work out all the issues raised by +his draft to include it in the language standard. However, many +implementations support some form of so-called Gray Streams, basing +their API on Gray's draft proposal. Another, newer API, called Simple +Streams, has been developed by Franz and included in Allegro Common +Lisp. It was designed to improve the performance of user-defined +streams relative to Gray Streams and has been adopted by some of the +open-source Common Lisp implementations. + + +File: pcl.info, Node: Chapter 15, Next: Chapter 16, Prev: Chapter 14, Up: Top + +15. Practical: A Portable Pathname Library +========================================== + +As I discussed in the previous chapter, Common Lisp provides an +abstraction, the pathname, that's supposed to insulate you from the +details of how different operating systems and file systems name files. +Pathnames provide a useful API for manipulating names as names, but when +it comes to the functions that actually interact with the file system, +things get a bit hairy. + + The root of the problem, as I mentioned, is that the pathname +abstraction was designed to represent filenames on a much wider variety +of file systems than are commonly used now. Unfortunately, by making +pathnames abstract enough to account for a wide variety of file systems, +Common Lisp's designers left implementers with a fair number of choices +to make about how exactly to map the pathname abstraction onto any +particular file system. Consequently, different implementers, each +implementing the pathname abstraction for the same file system, just by +making different choices at a few key junctions, could end up with +conforming implementations that nonetheless provide different behavior +for several of the main pathname-related functions. + + However, one way or another, all implementations provide the same +basic functionality, so it's not too hard to write a library that +provides a consistent interface for the most common operations across +different implementations. That's your task for this chapter. In +addition to giving you several useful functions that you'll use in +future chapters, writing this library will give you a chance to learn +how to write code that deals with differences between implementations. + +* Menu: + +* 15-1:: The API +* 15-2:: *FEATURES* and Read-Time Conditionalization +* 15-3:: Listing a Directory +* 15-4:: Testing a File's Existence +* 15-5:: Walking a Directory Tree + + +File: pcl.info, Node: 15-1, Next: 15-2, Prev: Chapter 15, Up: Chapter 15 + +The API +======= + +The basic operations the library will support will be getting a list of +files in a directory and determining whether a file or directory with a +given name exists. You'll also write a function for recursively walking +a directory hierarchy, calling a given function for each pathname in the +tree. + + In theory, these directory listing and file existence operations are +already provided by the standard functions DIRECTORY and PROBE-FILE. +However, as you'll see, there are enough different ways to implement +these functions-all within the bounds of valid interpretations of the +language standard-that you'll want to write new functions that provide a +consistent behavior across implementations. + + +File: pcl.info, Node: 15-2, Next: 15-3, Prev: 15-1, Up: Chapter 15 + +*FEATURES* and Read-Time Conditionalization +=========================================== + +Before you can implement this API in a library that will run correctly +on multiple Common Lisp implementations, I need to show you the +mechanism for writing implementation-specific code. + + While most of the code you write can be "portable" in the sense that +it will run the same on any conforming Common Lisp implementation, you +may occasionally need to rely on implementation-specific functionality +or to write slightly different bits of code for different +implementations. To allow you to do so without totally destroying the +portability of your code, Common Lisp provides a mechanism, called +read-time conditionalization, that allows you to conditionally include +code based on various features such as what implementation it's being +run in. + + The mechanism consists of a variable *FEATURES* and two extra bits of +syntax understood by the Lisp reader. *FEATURES* is a list of symbols; +each symbol represents a "feature" that's present in the implementation +or on the underlying platform. These symbols are then used in feature +expressions that evaluate to true or false depending on whether the +symbols in the expression are present in *FEATURES*. The simplest +feature expression is a single symbol; the expression is true if the +symbol is in *FEATURES* and false if it isn't. Other feature +expressions are boolean expressions built out of NOT, AND, and OR +operators. For instance, if you wanted to conditionalize some code to +be included only if the features foo and bar were present, you could +write the feature expression (and foo bar). + + The reader uses feature expressions in conjunction with two bits of +syntax, #+ and #-. When the reader sees either of these bits of syntax, +it first reads a feature expression and then evaluates it as I just +described. When a feature expression following a #+ is true, the reader +reads the next expression normally. Otherwise it skips the next +expression, treating it as whitespace. #- works the same way except it +reads the form if the feature expression is false and skips it if it's +true. + + The initial value of *FEATURES* is implementation dependent, and what +functionality is implied by the presence of any given symbol is likewise +defined by the implementation. However, all implementations include at +least one symbol that indicates what implementation it is. For +instance, Allegro Common Lisp includes the symbol :allegro, CLISP +includes :clisp, SBCL includes :sbcl, and CMUCL includes :cmu. To avoid +dependencies on packages that may or may not exist in different +implementations, the symbols in *FEATURES* are usually keywords, and the +reader binds *PACKAGE* to the KEYWORD package while reading feature +expressions. Thus, a name with no package qualification will be read as +a keyword symbol. So, you could write a function that behaves slightly +differently in each of the implementations just mentioned like this: + + (defun foo () + #+allegro (do-one-thing) + #+sbcl (do-another-thing) + #+clisp (something-else) + #+cmu (yet-another-version) + #-(or allegro sbcl clisp cmu) (error "Not implemented")) + + In Allegro that code will be read as if it had been written like +this: + + (defun foo () + (do-one-thing)) + + while in SBCL the reader will read this: + + (defun foo () + (do-another-thing)) + + while in an implementation other than one of the ones specifically +conditionalized, it will read this: + + (defun foo () + (error "Not implemented")) + + Because the conditionalization happens in the reader, the compiler +doesn't even see expressions that are skipped. (1) This means you pay +no runtime cost for having different versions for different +implementations. Also, when the reader skips conditionalized +expressions, it doesn't bother interning symbols, so the skipped +expressions can safely contain symbols from packages that may not exist +in other implementations. + +* Menu: + +* 15-2-1:: Packaging the Library + + ---------- Footnotes ---------- + + (1) One slightly annoying consequence of the way read-time +conditionalization works is that there's no easy way to write a +fall-through case. For example, if you add support for another +implementation to foo by adding another expression guarded with #+, you +need to remember to also add the same feature to the or feature +expression after the #- or the ERROR form will be evaluated after your +new code runs. + + +File: pcl.info, Node: 15-2-1, Next: 15-2, Prev: 15-2, Up: 15-2 + +Packaging the Library +===================== + +Speaking of packages, if you download the complete code for this +library, you'll see that it's defined in a new package, +com.gigamonkeys.pathnames. I'll discuss the details of defining and +using packages in Chapter 21. For now you should note that some +implementations provide their own packages that contain functions with +some of the same names as the ones you'll define in this chapter and +make those names available in the CL-USER package. Thus, if you try to +define the functions from this library while in the CL-USER package, you +may get errors or warnings about clobbering existing definitions. To +avoid this possibility, you can create a file called packages.lisp with +the following contents: + + (in-package :cl-user) + + (defpackage :com.gigamonkeys.pathnames + (:use :common-lisp) + (:export + :list-directory + :file-exists-p + :directory-pathname-p + :file-pathname-p + :pathname-as-directory + :pathname-as-file + :walk-directory + :directory-p + :file-p)) + + and LOAD it. Then at the REPL or at the top of the file where you +type the definitions from this chapter, type the following expression: + + (in-package :com.gigamonkeys.pathnames) + + In addition to avoiding name conflicts with symbols already available +in CL-USER, packaging the library this way also makes it easier to use +in other code, as you'll see in several future chapters. + + +File: pcl.info, Node: 15-3, Next: 15-4, Prev: 15-2, Up: Chapter 15 + +Listing a Directory +=================== + +You can implement the function for listing a single directory, +list-directory, as a thin wrapper around the standard function +DIRECTORY. DIRECTORY takes a special kind of pathname, called a wild +pathname, that has one or more components containing the special value +:wild and returns a list of pathnames representing files in the file +system that match the wild pathname. (1) The matching algorithm-like +most things having to do with the interaction between Lisp and a +particular file system-isn't defined by the language standard, but most +implementations on Unix and Windows follow the same basic scheme. + + The DIRECTORY function has two problems that you need to address with +list-directory. The main one is that certain aspects of its behavior +differ fairly significantly between different Common Lisp +implementations, even on the same operating system. The other is that +while DIRECTORY provides a powerful interface for listing files, to use +it properly requires understanding some rather subtle points about the +pathname abstraction. Between these subtleties and the idiosyncrasies +of different implementations, actually writing portable code that uses +DIRECTORY to do something as simple as listing all the files and +subdirectories in a single directory can be a frustrating experience. +You can deal with those subtleties and idiosyncrasies once and for all, +by writing list-directory, and forget them thereafter. + + One subtlety I discussed in Chapter 14 is the two ways to represent +the name of a directory as a pathname: directory form and file form. + + To get DIRECTORY to return a list of files in /home/peter/, you need +to pass it a wild pathname whose directory component is the directory +you want to list and whose name and type components are :wild. Thus, to +get a listing of the files in /home/peter/, it might seem you could +write this: + + (directory (make-pathname :name :wild :type :wild :defaults home-dir)) + + where home-dir is a pathname representing /home/peter/. This would +work if home-dir were in directory form. But if it were in file +form-for example, if it had been created by parsing the namestring +"/home/peter"-then that same expression would list all the files in +/home since the name component "peter" would be replaced with :wild. + + To avoid having to worry about explicitly converting between +representations, you can define list-directory to accept a nonwild +pathname in either form, which it will then convert to the appropriate +wild pathname. + + To help with this, you should define a few helper functions. One, +component-present-p, will test whether a given component of a pathname +is "present," meaning neither NIL nor the special value :unspecific. +(2) Another, directory-pathname-p, tests whether a pathname is already +in directory form, and the third, pathname-as-directory, converts any +pathname to a directory form pathname. + + (defun component-present-p (value) + (and value (not (eql value :unspecific)))) + + (defun directory-pathname-p (p) + (and + (not (component-present-p (pathname-name p))) + (not (component-present-p (pathname-type p))) + p)) + + (defun pathname-as-directory (name) + (let ((pathname (pathname name))) + (when (wild-pathname-p pathname) + (error "Can't reliably convert wild pathnames.")) + (if (not (directory-pathname-p name)) + (make-pathname + :directory (append (or (pathname-directory pathname) (list :relative)) + (list (file-namestring pathname))) + :name nil + :type nil + :defaults pathname) + pathname))) + + Now it seems you could generate a wild pathname to pass to DIRECTORY +by calling MAKE-PATHNAME with a directory form name returned by +pathname-as-directory. Unfortunately, it's not quite that simple, +thanks to a quirk in CLISP's implementation of DIRECTORY. In CLISP, +DIRECTORY won't return files with no extension unless the type component +of the wildcard is NIL rather than :wild. So you can define a function, +directory-wildcard, that takes a pathname in either directory or file +form and returns a proper wildcard for the given implementation using +read-time conditionalization to make a pathname with a :wild type +component in all implementations except for CLISP and NIL in CLISP. + + (defun directory-wildcard (dirname) + (make-pathname + :name :wild + :type #-clisp :wild #+clisp nil + :defaults (pathname-as-directory dirname))) + + Note how each read-time conditional operates at the level of a single +expression After #-clisp, the expression :wild is either read or +skipped; likewise, after #+clisp, the NIL is read or skipped. + + Now you can take a first crack at the list-directory function. + + (defun list-directory (dirname) + (when (wild-pathname-p dirname) + (error "Can only list concrete directory names.")) + (directory (directory-wildcard dirname))) + + As it stands, this function would work in SBCL, CMUCL, and LispWorks. +Unfortunately, a couple more implementation differences remain to be +smoothed over. One is that not all implementations will return +subdirectories of the given directory. Allegro, SBCL, CMUCL, and +LispWorks do. OpenMCL doesn't by default but will if you pass DIRECTORY +a true value via the implementation-specific keyword argument +:directories. CLISP's DIRECTORY returns subdirectories only when it's +passed a wildcard pathname with :wild as the last element of the +directory component and NIL name and type components. In this case, it +returns only subdirectories, so you'll need to call DIRECTORY twice with +different wildcards and combine the results. + + Once you get all the implementations returning directories, you'll +discover they can also differ in whether they return the names of +directories in directory or file form. You want list-directory to +always return directory names in directory form so you can differentiate +subdirectories from regular files based on just the name. Except for +Allegro, all the implementations this library will support do that. +Allegro, on the other hand, requires you to pass DIRECTORY the +implementation-specific keyword argument :directories-are-files NIL to +get it to return directories in file form. + + Once you know how to make each implementation do what you want, +actually writing list-directory is simply a matter of combining the +different versions using read-time conditionals. + + (defun list-directory (dirname) + (when (wild-pathname-p dirname) + (error "Can only list concrete directory names.")) + (let ((wildcard (directory-wildcard dirname))) + + #+(or sbcl cmu lispworks) + (directory wildcard) + + #+openmcl + (directory wildcard :directories t) + + #+allegro + (directory wildcard :directories-are-files nil) + + #+clisp + (nconc + (directory wildcard) + (directory (clisp-subdirectories-wildcard wildcard))) + + #-(or sbcl cmu lispworks openmcl allegro clisp) + (error "list-directory not implemented"))) + + The function clisp-subdirectories-wildcard isn't actually specific to +CLISP, but since it isn't needed by any other implementation, you can +guard its definition with a read-time conditional. In this case, since +the expression following the #+ is the whole DEFUN, the whole function +definition will be included or not, depending on whether clisp is +present in *FEATURES*. + + #+clisp + (defun clisp-subdirectories-wildcard (wildcard) + (make-pathname + :directory (append (pathname-directory wildcard) (list :wild)) + :name nil + :type nil + :defaults wildcard)) + + ---------- Footnotes ---------- + + (1) Another special value, :wild-inferiors, can appear as part of the +directory component of a wild pathname, but you won't need it in this +chapter. + + (2) Implementations are allowed to return :unspecific instead of NIL +as the value of pathname components in certain situations such as when +the component isn't used by that implementation. + + +File: pcl.info, Node: 15-4, Next: 15-5, Prev: 15-3, Up: Chapter 15 + +Testing a File's Existence +========================== + +To replace PROBE-FILE, you can define a function called file-exists-p. +It should accept a pathname and return an equivalent pathname if the +file exists and NIL if it doesn't. It should be able to accept the name +of a directory in either directory or file form but should always return +a directory form pathname if the file exists and is a directory. This +will allow you to use file-exists-p, along with directory-pathname-p, to +test whether an arbitrary name is the name of a file or directory. + + In theory, file-exists-p is quite similar to the standard function +PROBE-FILE; indeed, in several implementations-SBCL, LispWorks, and +OpenMCL-PROBE-FILE already gives you the behavior you want for +file-exists-p. But not all implementations of PROBE-FILE behave quite +the same. + + Allegro and CMUCL's PROBE-FILE functions are close to what you +need-they will accept the name of a directory in either form but, +instead of returning a directory form name, simply return the name in +the same form as the argument it was passed. Luckily, if passed the +name of a nondirectory in directory form, they return NIL. So with those +implementations you can get the behavior you want by first passing the +name to PROBE-FILE in directory form-if the file exists and is a +directory, it will return the directory form name. If that call returns +NIL, then you try again with a file form name. + + CLISP, on the other hand, once again has its own way of doing things. +Its PROBE-FILE immediately signals an error if passed a name in +directory form, regardless of whether a file or directory exists with +that name. It also signals an error if passed a name in file form +that's actually the name of a directory. For testing whether a +directory exists, CLISP provides its own function: probe-directory (in +the ext package). This is almost the mirror image of PROBE-FILE: it +signals an error if passed a name in file form or if passed a name in +directory form that happens to name a file. The only difference is it +returns T rather than a pathname when the named directory exists. + + But even in CLISP you can implement the desired semantics by wrapping +the calls to PROBE-FILE and probe-directory in IGNORE-ERRORS. (1) + + (defun file-exists-p (pathname) + #+(or sbcl lispworks openmcl) + (probe-file pathname) + + #+(or allegro cmu) + (or (probe-file (pathname-as-directory pathname)) + (probe-file pathname)) + + #+clisp + (or (ignore-errors + (probe-file (pathname-as-file pathname))) + (ignore-errors + (let ((directory-form (pathname-as-directory pathname))) + (when (ext:probe-directory directory-form) + directory-form)))) + + #-(or sbcl cmu lispworks openmcl allegro clisp) + (error "file-exists-p not implemented")) + + The function pathname-as-file that you need for the CLISP +implementation of file-exists-p is the inverse of the previously defined +pathname-as-directory, returning a pathname that's the file form +equivalent of its argument. This function, despite being needed here +only by CLISP, is generally useful, so define it for all implementations +and make it part of the library. + + (defun pathname-as-file (name) + (let ((pathname (pathname name))) + (when (wild-pathname-p pathname) + (error "Can't reliably convert wild pathnames.")) + (if (directory-pathname-p name) + (let* ((directory (pathname-directory pathname)) + (name-and-type (pathname (first (last directory))))) + (make-pathname + :directory (butlast directory) + :name (pathname-name name-and-type) + :type (pathname-type name-and-type) + :defaults pathname)) + pathname))) + + ---------- Footnotes ---------- + + (1) This is slightly broken in the sense that if PROBE-FILE signals +an error for some other reason, this code will interpret it incorrectly. +Unfortunately, the CLISP documentation doesn't specify what errors might +be signaled by PROBE-FILE and probe-directory, and experimentation seems +to show that they signal simple-file-errors in most erroneous +situations. + + +File: pcl.info, Node: 15-5, Next: Chapter 16, Prev: 15-4, Up: Chapter 15 + +Walking a Directory Tree +======================== + +Finally, to round out this library, you can implement a function called +walk-directory. Unlike the functions defined previously, this function +doesn't need to do much of anything to smooth over implementation +differences; it just needs to use the functions you've already defined. +However, it's quite handy, and you'll use it several times in subsequent +chapters. It will take the name of a directory and a function and call +the function on the pathnames of all the files under the directory, +recursively. It will also take two keyword arguments: :directories and +:test. When :directories is true, it will call the function on the +pathnames of directories as well as regular files. The :test argument, +if provided, specifies another function that's invoked on each pathname +before the main function is; the main function will be called only if +the test function returns true. + + (defun walk-directory (dirname fn &key directories (test (constantly t))) + (labels + ((walk (name) + (cond + ((directory-pathname-p name) + (when (and directories (funcall test name)) + (funcall fn name)) + (dolist (x (list-directory name)) (walk x))) + ((funcall test name) (funcall fn name))))) + (walk (pathname-as-directory dirname)))) + + Now you have a useful library of functions for dealing with +pathnames. As I mentioned, these functions will come in handy in later +chapters, particularly Chapters 23 and 27, where you'll use +walk-directory to crawl through directory trees containing spam messages +and MP3 files. But before we get to that, though, I need to talk about +object orientation, the topic of the next two chapters. + + +File: pcl.info, Node: Chapter 16, Next: Chapter 17, Prev: Chapter 15, Up: Top + +16. Object Reorientation: Generic Functions +=========================================== + +Because the invention of Lisp predated the rise of object-oriented +programming by a couple decades, (1) new Lispers are sometimes surprised +to discover what a thoroughly object-oriented language Common Lisp is. +Common Lisp's immediate predecessors were developed at a time when +object orientation was an exciting new idea and there were many +experiments with ways to incorporate the ideas of object orientation, +especially as manifested in Smalltalk, into Lisp. As part of the Common +Lisp standardization, a synthesis of several of these experiments +emerged under the name Common Lisp Object System, or CLOS. The ANSI +standard incorporated CLOS into the language, so it no longer really +makes sense to speak of CLOS as a separate entity. + + The features CLOS contributed to Common Lisp range from those that +can hardly be avoided to relatively esoteric manifestations of Lisp's +language-as-language-building-tool philosophy. Complete coverage of all +these features is beyond the scope of this book, but in this chapter and +the next I'll describe the bread-and-butter features and give an +overview of Common Lisp's approach to objects. + + You should note at the outset that Common Lisp's object system offers +a fairly different embodiment of the principles of object orientation +than many other languages. If you have a deep understanding of the +fundamental ideas behind object orientation, you'll likely appreciate +the particularly powerful and general way Common Lisp manifests those +ideas. On the other hand, if your experience with object orientation +has been largely with a single language, you may find Common Lisp's +approach somewhat foreign; you should try to avoid assuming that there's +only one way for a language to support object orientation. (2) If you +have little object-oriented programming experience, you should have no +trouble understanding the explanations here, though it may help to +ignore the occasional comparisons to the way other languages do things. + +* Menu: + +* 16-1:: Generic Functions and Classes +* 16-2:: Generic Functions and Methods +* 16-3:: DEFGENERIC +* 16-4:: DEFMETHOD +* 16-5:: Method Combination +* 16-6:: The Standard Method Combination +* 16-7:: Other Method Combinations +* 16-8:: Multimethods +* 16-9:: To Be Continued . . . + + ---------- Footnotes ---------- + + (1) 1 + + (2) 2 + + +File: pcl.info, Node: 16-1, Next: 16-2, Prev: Chapter 16, Up: Chapter 16 + +Generic Functions and Classes +============================= + +The fundamental idea of object orientation is that a powerful way to +organize a program is to define data types and then associate operations +with those data types. In particular, you want to be able to invoke an +operation and have the exact behavior determined by the type of the +object or objects on which the operation was invoked. The classic +example used, seemingly by all introductions to object orientation, is +an operation draw that can be applied to objects representing various +geometric shapes. Different implementations of the draw operation can +be provided for drawing circles, triangles, and squares, and a call to +draw will actually result in drawing a circle, triangle, or square, +depending on the type of the object to which the draw operation is +applied. The different implementations of draw are defined separately, +and new versions can be defined that draw other shapes without having to +change the code of either the caller or any of the other draw +implementations. This feature of object orientation goes by the fancy +Greek name polymorphism, meaning "many forms," because a single +conceptual operation, such as drawing an object, can take many different +concrete forms. + + Common Lisp, like most object-oriented languages today, is +class-based; all objects are instances of a particular class. (1) The +class of an object determines its representation-built-in classes such +as NUMBER and STRING have opaque representations accessible only via the +standard functions for manipulating those types, while instances of +user-defined classes, as you'll see in the next chapter, consist of +named parts called slots. + + Classes are arranged in a hierarchy, a taxonomy for all objects. A +class can be defined as a subclass of other classes, called its +superclasses. A class inherits part of its definition from its +superclasses and instances of a class are also considered instances of +the superclasses. In Common Lisp, the hierarchy of classes has a single +root, the class T, which is a direct or indirect superclass of every +other class. Thus, every datum in Common Lisp is an instance of T. (2) +Common Lisp also supports multiple inheritance-a single class can have +multiple direct superclasses. + + Outside the Lisp family, almost all object-oriented languages follow +the basic pattern established by Simula of having behavior associated +with classes through methods or member functions that belong to a +particular class. In these languages, a method is invoked on a +particular object, and the class of that object determines what code +runs. This model of method invocation is called-after the Smalltalk +terminology-message passing. Conceptually, method invocation in a +message-passing system starts by sending a message containing the name +of the method to run and any arguments to the object on which the method +is being invoked. The object then uses its class to look up the method +associated with the name in the message and runs it. Because each class +can have its own method for a given name, the same message, sent to +different objects, can invoke different methods. + + Early Lisp object systems worked in a similar way, providing a +special function SEND that could be used to send a message to a +particular object. However, this wasn't entirely satisfactory, as it +made method invocations different from normal function calls. +Syntactically method invocations were written like this: + + (send object 'foo) + + rather than like this: + + (foo object) + + More significantly, because methods weren't functions, they couldn't +be passed as arguments to higher-order functions such as MAPCAR; if one +wanted to call a method on all the elements of a list with MAPCAR, one +had to write this: + + (mapcar #'(lambda (object) (send object 'foo)) objects) + + rather than this: + + (mapcar #'foo objects) + + Eventually the folks working on Lisp object systems unified methods +with functions by creating a new kind of function called a generic +function. In addition to solving the problems just described, generic +functions opened up new possibilities for the object system, including +many features that simply don't make sense in a message-passing object +system. + + Generic functions are the heart of Common Lisp's object system and +the topic of the rest of this chapter. While I can't talk about generic +functions without some mention of classes, for now I'll focus on how to +define and use generic functions. In the next chapter I'll show you how +to define your own classes. + + ---------- Footnotes ---------- + + (1) 3 + + (2) 4 + + +File: pcl.info, Node: 16-2, Next: 16-3, Prev: 16-1, Up: Chapter 16 + +Generic Functions and Methods +============================= + +A generic function defines an abstract operation, specifying its name +and a parameter list but no implementation. Here, for example, is how +you might define a generic function, draw, that will be used to draw +different kinds of shapes on the screen: + + (defgeneric draw (shape) + (:documentation "Draw the given shape on the screen.")) + + I'll discuss the syntax of DEFGENERIC in the next section; for now +just note that this definition doesn't contain any actual code. + + A generic function is generic in the sense that it can-at least in +theory-accept any objects as arguments. (1) However, by itself a +generic function can't actually do anything; if you just define a +generic function, no matter what arguments you call it with, it will +signal an error. The actual implementation of a generic function is +provided by methods. Each method provides an implementation of the +generic function for particular classes of arguments. Perhaps the +biggest difference between a generic function-based system and a +message-passing system is that methods don't belong to classes; they +belong to the generic function, which is responsible for determining +what method or methods to run in response to a particular invocation. + + Methods indicate what kinds of arguments they can handle by +specializing the required parameters defined by the generic function. +For instance, on the generic function draw, you might define one method +that specializes the shape parameter for objects that are instances of +the class circle while another method specializes shape for objects that +are instances of the class triangle. They would look like this, eliding +the actual drawing code: + + (defmethod draw ((shape circle)) + ...) + + (defmethod draw ((shape triangle)) + ...) + + When a generic function is invoked, it compares the actual arguments +it was passed with the specializers of each of its methods to find the +applicable methods-those methods whose specializers are compatible with +the actual arguments. If you invoke draw, passing an instance of +circle, the method that specialized shape on the class circle is +applicable, while if you pass it a triangle, then the method that +specializes shape on the class triangle applies. In simple cases, only +one method will be applicable, and it will handle the invocation. In +more complex cases, there may be multiple methods that apply; they're +then combined, as I'll discuss in the section "Method Combination," into +a single effective method that handles the invocation. + + You can specialize a parameter in two ways-usually you'll specify a +class that the argument must be an instance of. Because instances of a +class are also considered instances of that class's superclasses, a +method with a parameter specialized on a particular class can be +applicable whenever the corresponding argument is a direct instance of +the specializing class or of any of its subclasses. The other kind of +specializer is a so-called EQL specializer, which specifies a particular +object to which the method applies. + + When a generic function has only methods specialized on a single +parameter and all the specializers are class specializers, the result of +invoking a generic function is quite similar to the result of invoking a +method in a message-passing system-the combination of the name of the +operation and the class of the object on which it's invoked determines +what method to run. + + However, reversing the order of lookup opens up possibilities not +found in message-passing systems. Generic functions support methods +that specialize on multiple parameters, provide a framework that makes +multiple inheritance much more manageable, and let you use declarative +constructs to control how methods are combined into an effective method, +supporting several common usage patterns without a lot of boilerplate +code. I'll discuss those topics in a moment. But first you need to +look at the basics of the two macros used to define the generic +functions DEFGENERIC and DEFMETHOD. + + ---------- Footnotes ---------- + + (1) 5 + + +File: pcl.info, Node: 16-3, Next: 16-4, Prev: 16-2, Up: Chapter 16 + +DEFGENERIC +========== + +To give you a feel for these macros and the various facilities they +support, I'll show you some code you might write as part of a banking +application-or, rather, a toy banking application; the point is to look +at a few language features, not to learn how to really write banking +software. For instance, this code doesn't even pretend to deal with +such issues as multiple currencies let alone audit trails and +transactional integrity. + + Because I'm not going to discuss how to define new classes until the +next chapter, for now you can just assume that certain classes already +exist: for starters, assume there's a class bank-account and that it has +two subclasses, checking-account and savings-account. The class +hierarchy looks like this: + +[image src="imgs/account-hierarchy.png"] + + The first generic function will be withdraw, which decreases the +account balance by a specified amount. If the balance is less than the +amount, it should signal an error and leave the balance unchanged. You +can start by defining the generic function with DEFGENERIC. + + The basic form of DEFGENERIC is similar to DEFUN except with no body. +The parameter list of DEFGENERIC specifies the parameters that must be +accepted by all the methods that will be defined on the generic +function. In the place of the body, a DEFGENERIC can contain various +options. One option you should always include is :documentation, which +you use to provide a string describing the purpose of the generic +function. Because a generic function is purely abstract, it's important +to be clear to both users and implementers what it's for. Thus, you +might define withdraw like this: + + (defgeneric withdraw (account amount) + (:documentation "Withdraw the specified amount from the account. + + Signal an error if the current balance is less than amount.")) + + +File: pcl.info, Node: 16-4, Next: 16-5, Prev: 16-3, Up: Chapter 16 + +DEFMETHOD +========= + +Now you're ready to use DEFMETHOD to define methods that implement +withdraw. (1) + + A method's parameter list must be congruent with its generic +function's. In this case, that means all methods defined on withdraw +must have exactly two required parameters. More generally, methods must +have the same number of required and optional parameters and must be +capable of accepting any arguments corresponding to any &rest or &key +parameters specified by the generic function. (2) + + Since the basics of withdrawing are the same for all accounts, you +can define a method that specializes the account parameter on the +bank-account class. You can assume the function balance returns the +current balance of the account and can be used with SETF-and thus with +DECF-to set the balance. The function ERROR is a standard function used +to signal an error, which I'll discuss in greater detail in Chapter 19. +Using those two functions, you can define a basic withdraw method that +looks like this: + + (defmethod withdraw ((account bank-account) amount) + (when (< (balance account) amount) + (error "Account overdrawn.")) + (decf (balance account) amount)) + + As this code suggests, the form of DEFMETHOD is even more like that +of DEFUN than DEFGENERIC's is. The only difference is that the required +parameters can be specialized by replacing the parameter name with a +two-element list. The first element is the name of the parameter, and +the second element is the specializer, either the name of a class or an +EQL specializer, the form of which I'll discuss in a moment. The +parameter name can be anything-it doesn't have to match the name used in +the generic function, though it often will. + + This method will apply whenever the first argument to withdraw is an +instance of bank-account. The second parameter, amount, is implicitly +specialized on T, and since all objects are instances of T, it doesn't +affect the applicability of the method. + + Now suppose all checking accounts have overdraft protection. That +is, each checking account is linked to another bank account that's drawn +upon when the balance of the checking account itself can't cover a +withdrawal. You can assume that the function overdraft-account takes a +checking-account object and returns a bank-account object representing +the linked account. + + Thus, withdrawing from a checking-account object requires a few extra +steps compared to withdrawing from a standard bank-account object. You +must first check whether the amount being withdrawn is greater than the +account's current balance and, if it is, transfer the difference from +the overdraft account. Then you can proceed as with a standard +bank-account object. + + So what you'd like to do is define a method on withdraw that +specializes on checking-account to handle the transfer and then lets the +method specialized on bank-account take control. Such a method might +look like this: + + (defmethod withdraw ((account checking-account) amount) + (let ((overdraft (- amount (balance account)))) + (when (plusp overdraft) + (withdraw (overdraft-account account) overdraft) + (incf (balance account) overdraft))) + (call-next-method)) + + The function CALL-NEXT-METHOD is part of the generic function +machinery used to combine applicable methods. It indicates that control +should be passed from this method to the method specialized on +bank-account. (3) When it's called with no arguments, as it is here, +the next method is invoked with whatever arguments were originally +passed to the generic function. It can also be called with arguments, +which will then be passed onto the next method. + + You aren't required to invoke CALL-NEXT-METHOD in every method. +However, if you don't, the new method is then responsible for completely +implementing the desired behavior of the generic function. For example, +if you had a subclass of bank-account, proxy-account, that didn't +actually keep track of its own balance but instead delegated withdrawals +to another account, you might write a method like this (assuming a +function, proxied-account, that returns the proxied account): + + (defmethod withdraw ((proxy proxy-account) amount) + (withdraw (proxied-account proxy) amount)) + + Finally, DEFMETHOD also allows you to create methods specialized on a +particular object with an EQL specializer. For example, suppose the +banking app is going to be deployed in a particularly corrupt bank. +Suppose the variable *account-of-bank-president* holds a reference to a +particular bank account that belongs-as the name suggests-to the bank's +president. Further suppose the variable *bank* represents the bank as a +whole, and the function embezzle steals money from the bank. The bank +president might ask you to "fix" withdraw to handle his account +specially. + + (defmethod withdraw ((account (eql *account-of-bank-president*)) amount) + (let ((overdraft (- amount (balance account)))) + (when (plusp overdraft) + (incf (balance account) (embezzle *bank* overdraft))) + (call-next-method))) + + Note, however, that the form in the EQL specializer that provides the +object to specialize on-*account-of-bank-president* in this case-is +evaluated once, when the DEFMETHOD is evaluated. This method will be +specialized on the value of *account-of-bank-president* at the time the +method is defined; changing the variable later won't change the method. + + ---------- Footnotes ---------- + + (1) 6 + + (2) 7 + + (3) 8 + + +File: pcl.info, Node: 16-5, Next: 16-6, Prev: 16-4, Up: Chapter 16 + +Method Combination +================== + +Outside the body of a method, CALL-NEXT-METHOD has no meaning. Within a +method, it's given a meaning by the generic function machinery that +builds an effective method each time the generic function is invoked +using all the methods applicable to that particular invocation. This +notion of building an effective method by combining applicable methods +is the heart of the generic function concept and is the thing that +allows generic functions to support facilities not found in +message-passing systems. So it's worth taking a closer look at what's +really happening. Folks with the message-passing model deeply ingrained +in their consciousness should pay particular attention because generic +functions turn method dispatching inside out compared to message +passing, making the generic function, rather than the class, the prime +mover. + + Conceptually, the effective method is built in three steps: First, +the generic function builds a list of applicable methods based on the +actual arguments it was passed. Second, the list of applicable methods +is sorted according to the specificity of their parameter specializers. +Finally, methods are taken in order from the sorted list and their code +combined to produce the effective method. (1) + + To find applicable methods, the generic function compares the actual +arguments with the corresponding parameter specializers in each of its +methods. A method is applicable if, and only if, all the specializers +are compatible with the corresponding arguments. + + When the specializer is the name of a class, it's compatible if it +names the actual class of the argument or one of its superclasses. +(Recall that parameters without explicit specializers are implicitly +specialized on the class T so will be compatible with any argument.) An +EQL specializer is compatible only when the argument is the same object +as was specified in the specializer. + + Because all the arguments are checked against the corresponding +specializers, they all affect whether a method is applicable. Methods +that explicitly specialize more than one parameter are called +multimethods; I'll discuss them in the section "Multimethods." + + After the applicable methods have been found, the generic function +machinery needs to sort them before it can combine them into an +effective method. To order two applicable methods, the generic function +compares their parameter specializers from left to right, (2) and the +first specializer that's different between the two methods determines +their ordering, with the method with the more specific specializer +coming first. + + Because only applicable methods are being sorted, you know all class +specializers will name classes that the corresponding argument is +actually an instance of. In the typical case, if two class specializers +differ, one will be a subclass of the other. In that case, the +specializer naming the subclass is considered more specific. This is +why the method that specialized account on checking-account was +considered more specific than the method that specialized it on +bank-account. + + Multiple inheritance slightly complicates the notion of specificity +since the actual argument may be an instance of two classes, neither of +which is a subclass of the other. If such classes are used as parameter +specializers, the generic function can't order them using only the rule +that subclasses are more specific than their superclasses. In the next +chapter I'll discuss how the notion of specificity is extended to deal +with multiple inheritance. For now, suffice it to say that there's a +deterministic algorithm for ordering class specializers. + + Finally, an EQL specializer is always more specific than any class +specializer, and because only applicable methods are being considered, +if more than one method has an EQL specializer for a particular +parameter, they must all have the same EQL specializer. The comparison +of those methods will thus be decided based on other parameters. + + ---------- Footnotes ---------- + + (1) 9 + + (2) 10 + + +File: pcl.info, Node: 16-6, Next: 16-7, Prev: 16-5, Up: Chapter 16 + +The Standard Method Combination +=============================== + +Now that you understand how the applicable methods are found and sorted, +you're ready to take a closer look at the last step-how the sorted list +of methods is combined into a single effective method. By default, +generic functions use what's called the standard method combination. +The standard method combination combines methods so that +CALL-NEXT-METHOD works as you've already seen-the most specific method +runs first, and each method can pass control to the next most specific +method via CALL-NEXT-METHOD. + + However, there's a bit more to it than that. The methods I've been +discussing so far are called primary methods. Primary methods, as their +name suggests, are responsible for providing the primary implementation +of a generic function. The standard method combination also supports +three kinds of auxiliary methods: :before, :after, and :around methods. +An auxiliary method definition is written with DEFMETHOD like a primary +method but with a method qualifier, which names the type of method, +between the name of the method and the parameter list. For instance, a +:before method on withdraw that specializes the account parameter on the +class bank-account would start like this: + + (defmethod withdraw :before ((account bank-account) amount) ...) + + Each kind of auxiliary method is combined into the effective method +in a different way. All the applicable :before methods-not just the +most specific-are run as part of the effective method. They run, as +their name suggests, before the most specific primary method and are run +in most-specific-first order. Thus, :before methods can be used to do +any preparation needed to ensure that the primary method can run. For +instance, you could've used a :before method specialized on +checking-account to implement the overdraft protection on checking +accounts like this: + + (defmethod withdraw :before ((account checking-account) amount) + (let ((overdraft (- amount (balance account)))) + (when (plusp overdraft) + (withdraw (overdraft-account account) overdraft) + (incf (balance account) overdraft)))) + + This :before method has three advantages over a primary method. One +is that it makes it immediately obvious how the method changes the +overall behavior of the withdraw function-it's not going to interfere +with the main behavior or change the result returned. + + The next advantage is that a primary method specialized on a class +more specific than checking-account won't interfere with this :before +method, making it easier for an author of a subclass of checking-account +to extend the behavior of withdraw while keeping part of the old +behavior. + + Lastly, since a :before method doesn't have to call CALL-NEXT-METHOD +to pass control to the remaining methods, it's impossible to introduce a +bug by forgetting to. + + The other auxiliary methods also fit into the effective method in +ways suggested by their names. All the :after methods run after the +primary methods in most-specific-last order, that is, the reverse of the +:before methods. Thus, the :before and :after methods combine to create +a sort of nested wrapping around the core functionality provided by the +primary methods-each more-specific :before method will get a chance to +set things up so the less-specific :before methods and primary methods +can run successfully, and each more-specific :after method will get a +chance to clean up after all the primary methods and less-specific +:after methods. + + Finally, :around methods are combined much like primary methods +except they're run "around" all the other methods. That is, the code +from the most specific :around method is run before anything else. +Within the body of an :around method, CALL-NEXT-METHOD will lead to the +code of the next most specific :around method or, in the least specific +:around method, to the complex of :before, primary, and :after methods. +Almost all :around methods will contain such a call to CALL-NEXT-METHOD +because an :around method that doesn't will completely hijack the +implementation of the generic function from all the methods except for +more-specific :around methods. + + Occasionally that kind of hijacking is called for, but typically +:around methods are used to establish some dynamic context in which the +rest of the methods will run-to bind a dynamic variable, for example, or +to establish an error handler (as I'll discuss in Chapter 19). About +the only time it's appropriate for an :around method to not call +CALL-NEXT-METHOD is when it returns a result cached from a previous call +to CALL-NEXT-METHOD. At any rate, an :around method that doesn't call +CALL-NEXT-METHOD is responsible for correctly implementing the semantics +of the generic function for all classes of arguments to which the method +may apply, including future subclasses. + + Auxiliary methods are just a convenient way to express certain common +patterns more concisely and concretely. They don't actually allow you +to do anything you couldn't do by combining primary methods with +diligent adherence to a few coding conventions and some extra typing. +Perhaps their biggest benefit is that they provide a uniform framework +for extending generic functions. Often a library will define a generic +function and provide a default primary method, allowing users of the +library to customize its behavior by defining appropriate auxiliary +methods. + + +File: pcl.info, Node: 16-7, Next: 16-8, Prev: 16-6, Up: Chapter 16 + +Other Method Combinations +========================= + +In addition to the standard method combination, the language specifies +nine other built-in method combinations known as the simple built-in +method combinations. You can also define custom method combinations, +though that's a fairly esoteric feature and beyond the scope of this +book. I'll briefly cover how to use the simple built-in combinations to +give you a sense of the possibilities. + + All the simple combinations follow the same pattern: instead of +invoking the most specific primary method and letting it invoke +less-specific primary methods via CALL-NEXT-METHOD, the simple method +combinations produce an effective method that contains the code of all +the primary methods, one after another, all wrapped in a call to the +function, macro, or special operator that gives the method combination +its name. The nine combinations are named for the operators: +, AND, +OR, LIST, APPEND, NCONC, MIN, MAX, and PROGN. The simple combinations +also support only two kinds of methods, primary methods, which are +combined as just described, and :around methods, which work like :around +methods in the standard method combination. + + For example, a generic function that uses the + method combination +will return the sum of all the results returned by its primary methods. +Note that the AND and OR method combinations won't necessarily run all +the primary methods because of those macros' short-circuiting behavior-a +generic function using the AND combination will return NIL as soon as +one of the methods does and will return the value of the last method +otherwise. Similarly, the OR combination will return the first non-NIL +value returned by any of the methods. + + To define a generic function that uses a particular method +combination, you include a :method-combination option in the DEFGENERIC +form. The value supplied with this option is the name of the method +combination you want to use. For example, to define a generic function, +priority, that returns the sum of values returned by individual methods +using the + method combination, you might write this: + + (defgeneric priority (job) + (:documentation "Return the priority at which the job should be run.") + (:method-combination +)) + + By default all these method combinations combine the primary methods +in most-specific-first order. However, you can reverse the order by +including the keyword :most-specific-last after the name of the method +combination in the DEFGENERIC form. The order probably doesn't matter +if you're using the + combination unless the methods have side effects, +but for demonstration purposes you can change priority to use +most-specific-last order like this: + + (defgeneric priority (job) + (:documentation "Return the priority at which the job should be run.") + (:method-combination + :most-specific-last)) + + The primary methods on a generic function that uses one of these +combinations must be qualified with the name of the method combination. +Thus, a primary method defined on priority might look like this: + + (defmethod priority + ((job express-job)) 10) + + This makes it obvious when you see a method definition that it's part +of a particular kind of generic function. + + All the simple built-in method combinations also support :around +methods that work like :around methods in the standard method +combination: the most specific :around method runs before any other +methods, and CALL-NEXT-METHOD is used to pass control to +less-and-less-specific :around methods until it reaches the combined +primary methods. The :most-specific-last option doesn't affect the +order of :around methods. And, as I mentioned before, the built-in +method combinations don't support :before or :after methods. + + Like the standard method combination, these method combinations don't +allow you to do anything you couldn't do "by hand." Rather, they allow +you to express what you want and let the language take care of wiring +everything together for you, making your code both more concise and more +expressive. + + That said, probably 99 percent of the time, the standard method +combination will be exactly what you want. Of the remaining 1 percent, +probably 99 percent of them will be handled by one of the simple +built-in method combinations. If you run into one of the 1 percent of 1 +percent of cases where none of the built-in combinations suffices, you +can look up DEFINE-METHOD-COMBINATION in your favorite Common Lisp +reference. + + +File: pcl.info, Node: 16-8, Next: 16-9, Prev: 16-7, Up: Chapter 16 + +Multimethods +============ + +Methods that explicitly specialize more than one of the generic +function's required parameters are called multimethods. Multimethods +are where generic functions and message passing really part ways. +Multimethods don't fit into message-passing languages because they don't +belong to a particular class; instead, each multimethod defines a part +of the implementations of a given generic function that applies when the +generic function is invoked with arguments that match all the method's +specialized parameters. + +* Menu: + +* 16-8-1:: Multimethods vs. Method Overloading + + Multimethods are perfect for all those situations where, in a +message-passing language, you struggle to decide to which class a +certain behavior ought to belong. Is the sound a drum makes when it's +hit with a drumstick a function of what kind of drum it is or what kind +of stick you use to hit it? Both, of course. To model this situation +in Common Lisp, you simply define a generic function beat that takes two +arguments. + + (defgeneric beat (drum stick) + (:documentation + "Produce a sound by hitting the given drum with the given stick.")) + + Then you can define various multimethods to implement beat for the +combinations you care about. For example: + + (defmethod beat ((drum snare-drum) (stick wooden-drumstick)) ...) + (defmethod beat ((drum snare-drum) (stick brush)) ...) + (defmethod beat ((drum snare-drum) (stick soft-mallet)) ...) + (defmethod beat ((drum tom-tom) (stick wooden-drumstick)) ...) + (defmethod beat ((drum tom-tom) (stick brush)) ...) + (defmethod beat ((drum tom-tom) (stick soft-mallet)) ...) + + Multimethods don't help with the combinatorial explosion-if you need +to model five kinds of drums and six kinds of sticks, and every +combination makes a different sound, there's no way around it; you need +thirty different methods to implement all the combinations, with or +without multimethods. What multimethods do save you from is having to +write a bunch of dispatching code by letting you use the same built-in +polymorphic dispatching that's so useful when dealing with methods +specialized on a single parameter. (1) + + Multimethods also save you from having to tightly couple one set of +classes with the other. In the drum/stick example, nothing requires the +implementation of the drum classes to know about the various classes of +drumstick, and nothing requires the drumstick classes to know anything +about the various classes of drum. The multimethods connect the +otherwise independent classes to describe their joint behavior without +requiring any cooperation from the classes themselves. + + ---------- Footnotes ---------- + + (1) 11 + + +File: pcl.info, Node: 16-8-1, Next: 16-8, Prev: 16-8, Up: 16-8 + +Multimethods vs. Method Overloading +=================================== + +Programmers used to statically typed message-passing languages such as +Java and C++ may think multimethods sound similar to a feature of those +languages called method overloading. However, these two language +features are actually quite different since overloaded methods are +chosen at compile time, based on the compile-time type of the arguments, +not at runtime. To see how this works, consider the following two Java +classes: + + public class A { + public void foo(A a) { System.out.println("A/A"); } + public void foo(B b) { System.out.println("A/B"); } + } + public class B extends A { + public void foo(A a) { System.out.println("B/A"); } + public void foo(B b) { System.out.println("B/B"); } + } + + Now consider what happens when you run the main method from this +class. + + public class Main { + public static void main(String[] argv) { + A obj = argv[0].equals("A") ? new A() : new B(); + obj.foo(obj); + } + } + + When you tell Main to instantiate an A, it prints "A/A" as you'd +probably expect. + + bash$ java com.gigamonkeys.Main A + A/A + + However, if you tell Main to instantiate a B, then the true type of +obj is taken into account for only half the dispatching. + + bash$ java com.gigamonkeys.Main B + B/A + + If overloaded methods worked like Common Lisp's multimethods, then +that would print "B/B" instead. It is possible to implement multiple +dispatch by hand in message-passing languages, but this runs against the +grain of the message-passing model since the code in a multiply +dispatched method doesn't belong to any one class. + + +File: pcl.info, Node: 16-9, Next: Chapter 17, Prev: 16-8, Up: Chapter 16 + +To Be Continued . . . +===================== + +I've covered the basics-and a bit beyond-of generic functions, the verbs +of Common Lisp's object system. In the next chapter I'll show you how +to define your own classes. + + +File: pcl.info, Node: Chapter 17, Next: Chapter 18, Prev: Chapter 16, Up: Top + +17. Object Reorientation: Classes +================================= + +If generic functions are the verbs of the object system, classes are the +nouns. As I mentioned in the previous chapter, all values in a Common +Lisp program are instances of some class. Furthermore, all classes are +organized into a single hierarchy rooted at the class T. + + The class hierarchy consists of two major families of classes, +built-in and user-defined classes. Classes that represent the data +types you've been learning about up until now, classes such as INTEGER, +STRING, and LIST, are all built-in. They live in their own section of +the class hierarchy, arranged into appropriate sub- and superclass +relationships, and are manipulated by the functions I've been discussing +for much of the book up until now. You can't subclass these classes, +but, as you saw in the previous chapter, you can define methods that +specialize on them, effectively extending the behavior of those classes. +(1) + + But when you want to create new nouns-for instance, the classes used +in the previous chapter for representing bank accounts-you need to +define your own classes. That's the subject of this chapter. + +* Menu: + +* 17-1:: DEFCLASS +* 17-2:: Slot Specifiers +* 17-3:: Object Initialization +* 17-4:: Accessor Functions +* 17-5:: WITH-SLOTS and WITH-ACCESSORS +* 17-6:: Class-Allocated Slots +* 17-7:: Slots and Inheritance +* 17-8:: Multiple Inheritance +* 17-9:: Good Object-Oriented Design + + ---------- Footnotes ---------- + + (1) Defining new methods for an existing class may seem strange to +folks used to statically typed languages such as C++ and Java in which +all the methods of a class must be defined as part of the class +definition. But programmers with experience in dynamically typed +object-oriented languages such as Smalltalk and Objective C will find +nothing strange about adding new behaviors to existing classes. + + +File: pcl.info, Node: 17-1, Next: 17-2, Prev: Chapter 17, Up: Chapter 17 + +DEFCLASS +======== + +You create user-defined classes with the DEFCLASS macro. Because +behaviors are associated with a class by defining generic functions and +methods specialized on the class, DEFCLASS is responsible only for +defining the class as a data type. + + The three facets of the class as a data type are its name, its +relation to other classes, and the names of the slots that make up +instances of the class. (1) The basic form of a DEFCLASS is quite +simple. + + (defclass name (direct-superclass-name*) + (slot-specifier*)) + +* Menu: + +* 17-1-1:: What Are "User-Defined Classes"? + + As with functions and variables, you can use any symbol as the name +of a new class. (2) Class names are in a separate namespace from both +functions and variables, so you can have a class, function, and variable +all with the same name. You'll use the class name as the argument to +MAKE-INSTANCE, the function that creates new instances of user-defined +classes. + + The direct-superclass-names specify the classes of which the new +class is a subclass. If no superclasses are listed, the new class will +directly subclass STANDARD-OBJECT. Any classes listed must be other +user-defined classes, which ensures that each new class is ultimately +descended from STANDARD-OBJECT. STANDARD-OBJECT in turn subclasses T, so +all user-defined classes are part of the single class hierarchy that +also contains all the built-in classes. + + Eliding the slot specifiers for a moment, the DEFCLASS forms of some +of the classes you used in the previous chapter might look like this: + + (defclass bank-account () ...) + + (defclass checking-account (bank-account) ...) + + (defclass savings-account (bank-account) ...) + + I'll discuss in the section "Multiple Inheritance" what it means to +list more than one direct superclass in direct-superclass-names. + + ---------- Footnotes ---------- + + (1) In other object-oriented languages, slots might be called fields, +member variables, or attributes. + + (2) As when naming functions and variables, it's not quite true that +you can use any symbol as a class name-you can't use names defined by +the language standard. You'll see in Chapter 21 how to avoid such name +conflicts. + + +File: pcl.info, Node: 17-1-1, Next: 17-1, Prev: 17-1, Up: 17-1 + +What Are "User-Defined Classes"? +================================ + +The term user-defined classes isn't a term from the language +standard-technically what I'm talking about when I say user-defined +classes are classes that subclass STANDARD-OBJECT and whose metaclass is +STANDARD-CLASS. But since I'm not going to talk about the ways you can +define classes that don't subclass STANDARD-OBJECT and whose metaclass +isn't STANDARD-CLASS, you don't really have to worry about that. +User-defined isn't a perfect term for these classes since the +implementation may define certain classes the same way. However, to +call them standard classes would be even more confusing since the +built-in classes, such as INTEGER and STRING, are just as standard, if +not more so, because they're defined by the language standard but they +don't extend STANDARD-OBJECT. To further complicate matters, it's also +possible for users to define new classes that don't subclass +STANDARD-OBJECT. In particular, the macro DEFSTRUCT also defines new +classes. But that's largely for backward compatibility-DEFSTRUCT +predated CLOS and was retrofitted to define classes when CLOS was +integrated into the language. But the classes it creates are fairly +limited compared to DEFCLASSed classes. So in this chapter I'll be +discussing only classes defined with DEFCLASS that use the default +metaclass of STANDARD-CLASS, and I'll refer to them as user-defined for +lack of a better term. + + +File: pcl.info, Node: 17-2, Next: 17-3, Prev: 17-1, Up: Chapter 17 + +Slot Specifiers +=============== + +The bulk of a DEFCLASS form consists of the list of slot specifiers. +Each slot specifier defines a slot that will be part of each instance of +the class. Each slot in an instance is a place that can hold a value, +which can be accessed using the SLOT-VALUE function. SLOT-VALUE takes +an object and the name of a slot as arguments and returns the value of +the named slot in the given object. It can be used with SETF to set the +value of a slot in an object. + + A class also inherits slot specifiers from its superclasses, so the +set of slots actually present in any object is the union of all the +slots specified in a class's DEFCLASS form and those specified in all +its superclasses. + + At the minimum, a slot specifier names the slot, in which case the +slot specifier can be just a name. For instance, you could define a +bank-account class with two slots, customer-name and balance, like this: + + (defclass bank-account () + (customer-name + balance)) + + Each instance of this class will contain two slots, one to hold the +name of the customer the account belongs to and another to hold the +current balance. With this definition, you can create new bank-account +objects using MAKE-INSTANCE. + + (make-instance 'bank-account) ==> # + + The argument to MAKE-INSTANCE is the name of the class to +instantiate, and the value returned is the new object. (1) The printed +representation of an object is determined by the generic function +PRINT-OBJECT. In this case, the applicable method will be one provided +by the implementation, specialized on STANDARD-OBJECT. Since not every +object can be printed so that it can be read back, the STANDARD-OBJECT +print method uses the #<> syntax, which will cause the reader to signal +an error if it tries to read it. The rest of the representation is +implementation-defined but will typically be something like the output +just shown, including the name of the class and some distinguishing +value such as the address of the object in memory. In Chapter 23 you'll +see an example of how to define a method on PRINT-OBJECT to make objects +of a certain class be printed in a more informative form. + + Using the definition of bank-account just given, new objects will be +created with their slots unbound. Any attempt to get the value of an +unbound slot signals an error, so you must set a slot before you can +read it. + + (defparameter *account* (make-instance 'bank-account)) ==> *ACCOUNT* + (setf (slot-value *account* 'customer-name) "John Doe") ==> "John Doe" + (setf (slot-value *account* 'balance) 1000) ==> 1000 + + Now you can access the value of the slots. + + (slot-value *account* 'customer-name) ==> "John Doe" + (slot-value *account* 'balance) ==> 1000 + + ---------- Footnotes ---------- + + (1) The argument to MAKE-INSTANCE can actually be either the name of +the class or a class object returned by the function CLASS-OF or +FIND-CLASS. + + +File: pcl.info, Node: 17-3, Next: 17-4, Prev: 17-2, Up: Chapter 17 + +Object Initialization +===================== + +Since you can't do much with an object with unbound slots, it'd be nice +to be able to create objects with their slots already initialized. +Common Lisp provides three ways to control the initial value of slots. +The first two involve adding options to the slot specifier in the +DEFCLASS form: with the :initarg option, you can specify a name that can +then be used as a keyword parameter to MAKE-INSTANCE and whose argument +will be stored in the slot. A second option, :initform, lets you +specify a Lisp expression that will be used to compute a value for the +slot if no :initarg argument is passed to MAKE-INSTANCE. Finally, for +complete control over the initialization, you can define a method on the +generic function INITIALIZE-INSTANCE, which is called by MAKE-INSTANCE. +(1) + + A slot specifier that includes options such as :initarg or :initform +is written as a list starting with the name of the slot followed by the +options. For example, if you want to modify the definition of +bank-account to allow callers of MAKE-INSTANCE to pass the customer name +and the initial balance and to provide a default value of zero dollars +for the balance, you'd write this: + + (defclass bank-account () + ((customer-name + :initarg :customer-name) + (balance + :initarg :balance + :initform 0))) + + Now you can create an account and specify the slot values at the same +time. + + (defparameter *account* + (make-instance 'bank-account :customer-name "John Doe" :balance 1000)) + + (slot-value *account* 'customer-name) ==> "John Doe" + (slot-value *account* 'balance) ==> 1000 + + If you don't supply a :balance argument to MAKE-INSTANCE, the +SLOT-VALUE of balance will be computed by evaluating the form specified +with the :initform option. But if you don't supply a :customer-name +argument, the customer-name slot will be unbound, and an attempt to read +it before you set it will signal an error. + + (slot-value (make-instance 'bank-account) 'balance) ==> 0 + (slot-value (make-instance 'bank-account) 'customer-name) ==> error + + If you want to ensure that the customer name is supplied when the +account is created, you can signal an error in the initform since it +will be evaluated only if an initarg isn't supplied. You can also use +initforms that generate a different value each time they're +evaluated-the initform is evaluated anew for each object. To experiment +with these techniques, you can modify the customer-name slot specifier +and add a new slot, account-number, that's initialized with the value of +an ever-increasing counter. + + (defvar *account-numbers* 0) + + (defclass bank-account () + ((customer-name + :initarg :customer-name + :initform (error "Must supply a customer name.")) + (balance + :initarg :balance + :initform 0) + (account-number + :initform (incf *account-numbers*)))) + + Most of the time the combination of :initarg and :initform options +will be sufficient to properly initialize an object. However, while an +initform can be any Lisp expression, it has no access to the object +being initialized, so it can't initialize one slot based on the value of +another. For that you need to define a method on the generic function +INITIALIZE-INSTANCE. + + The primary method on INITIALIZE-INSTANCE specialized on +STANDARD-OBJECT takes care of initializing slots based on their :initarg +and :initform options. Since you don't want to disturb that, the most +common way to add custom initialization code is to define an :after +method specialized on your class. (2) For instance, suppose you want to +add a slot account-type that needs to be set to one of the values :gold, +:silver, or :bronze based on the account's initial balance. You might +change your class definition to this, adding the account-type slot with +no options: + + (defclass bank-account () + ((customer-name + :initarg :customer-name + :initform (error "Must supply a customer name.")) + (balance + :initarg :balance + :initform 0) + (account-number + :initform (incf *account-numbers*)) + account-type)) + + Then you can define an :after method on INITIALIZE-INSTANCE that sets +the account-type slot based on the value that has been stored in the +balance slot. (3) + + (defmethod initialize-instance :after ((account bank-account) &key) + (let ((balance (slot-value account 'balance))) + (setf (slot-value account 'account-type) + (cond + ((>= balance 100000) :gold) + ((>= balance 50000) :silver) + (t :bronze))))) + + The &key in the parameter list is required to keep the method's +parameter list congruent with the generic function's-the parameter list +specified for the INITIALIZE-INSTANCE generic function includes &key in +order to allow individual methods to supply their own keyword parameters +but doesn't require any particular ones. Thus, every method must +specify &key even if it doesn't specify any &key parameters. + + On the other hand, if an INITIALIZE-INSTANCE method specialized on a +particular class does specify a &key parameter, that parameter becomes a +legal parameter to MAKE-INSTANCE when creating an instance of that +class. For instance, if the bank sometimes pays a percentage of the +initial balance as a bonus when an account is opened, you could +implement that using a method on INITIALIZE-INSTANCE that takes a +keyword argument to specify the percentage of the bonus like this: + + (defmethod initialize-instance :after ((account bank-account) + &key opening-bonus-percentage) + (when opening-bonus-percentage + (incf (slot-value account 'balance) + (* (slot-value account 'balance) (/ opening-bonus-percentage 100))))) + + By defining this INITIALIZE-INSTANCE method, you make +:opening-bonus-percentage a legal argument to MAKE-INSTANCE when +creating a bank-account object. + + CL-USER> (defparameter *acct* (make-instance + 'bank-account + :customer-name "Sally Sue" + :balance 1000 + :opening-bonus-percentage 5)) + *ACCT* + CL-USER> (slot-value *acct* 'balance) + 1050 + + ---------- Footnotes ---------- + + (1) Another way to affect the values of slots is with the +:default-initargs option to DEFCLASS. This option is used to specify +forms that will be evaluated to provide arguments for specific +initialization parameters that aren't given a value in a particular call +to MAKE-INSTANCE. You don't need to worry about :default-initargs for +now. + + (2) Adding an :after method to INITIALIZE-INSTANCE is the Common Lisp +analog to defining a constructor in Java or C++ or an __init__ method in +Python. + + (3) One mistake you might make until you get used to using auxiliary +methods is to define a method on INITIALIZE-INSTANCE but without the +:after qualifier. If you do that, you'll get a new primary method that +shadows the default one. You can remove the unwanted primary method +using the functions REMOVE-METHOD and FIND-METHOD. Certain development +environments may provide a graphical user interface to do the same +thing. + + (remove-method #'initialize-instance + (find-method #'initialize-instance () (list (find-class 'bank-account)))) + + +File: pcl.info, Node: 17-4, Next: 17-5, Prev: 17-3, Up: Chapter 17 + +Accessor Functions +================== + +Between MAKE-INSTANCE and SLOT-VALUE, you have all the tools you need +for creating and manipulating instances of your classes. Everything +else you might want to do can be implemented in terms of those two +functions. However, as anyone familiar with the principles of good +object-oriented programming practices knows, directly accessing the +slots (or fields or member variables) of an object can lead to fragile +code. The problem is that directly accessing slots ties your code too +tightly to the concrete structure of your class. For example, suppose +you decide to change the definition of bank-account so that, instead of +storing the current balance as a number, you store a list of +time-stamped withdrawals and deposits. Code that directly accesses the +balance slot will likely break if you change the class definition to +remove the slot or to store the new list in the old slot. On the other +hand, if you define a function, balance, that accesses the slot, you can +redefine it later to preserve its behavior even if the internal +representation changes. And code that uses such a function will +continue to work without modification. + + Another advantage to using accessor functions rather than direct +access to slots via SLOT-VALUE is that they let you limit the ways +outside code can modify a slot. (1) It may be fine for users of the +bank-account class to get the current balance, but you may want all +modifications to the balance to go through other functions you'll +provide, such as deposit and withdraw. If clients know they're supposed +to manipulate objects only through the published functional API, you can +provide a balance function but not make it SETFable if you want the +balance to be read-only. + + Finally, using accessor functions makes your code tidier since it +helps you avoid lots of uses of the rather verbose SLOT-VALUE function. + + It's trivial to define a function that reads the value of the balance +slot. + + (defun balance (account) + (slot-value account 'balance)) + + However, if you know you're going to define subclasses of +bank-account, it might be a good idea to define balance as a generic +function. That way, you can provide different methods on balance for +those subclasses or extend its definition with auxiliary methods. So +you might write this instead: + + (defgeneric balance (account)) + + (defmethod balance ((account bank-account)) + (slot-value account 'balance)) + + As I just discussed, you don't want callers to be able to directly +set the balance, but for other slots, such as customer-name, you may +also want to provide a function to set them. The cleanest way to define +such a function is as a SETF function. + + A SETF function is a way to extend SETF, defining a new kind of place +that it knows how to set. The name of a SETF function is a two-item +list whose first element is the symbol setf and whose second element is +a symbol, typically the name of a function used to access the place the +SETF function will set. A SETF function can take any number of +arguments, but the first argument is always the value to be assigned to +the place. (2) You could, for instance, define a SETF function to set +the customer-name slot in a bank-account like this: + + (defun (setf customer-name) (name account) + (setf (slot-value account 'customer-name) name)) + + After evaluating that definition, an expression like the following +one: + + (setf (customer-name my-account) "Sally Sue") + + will be compiled as a call to the SETF function you just defined with +"Sally Sue" as the first argument and the value of my-account as the +second argument. + + Of course, as with reader functions, you'll probably want your SETF +function to be generic, so you'd actually define it like this: + + (defgeneric (setf customer-name) (value account)) + + (defmethod (setf customer-name) (value (account bank-account)) + (setf (slot-value account 'customer-name) value)) + + And of course you'll also want to define a reader function for +customer-name. + + (defgeneric customer-name (account)) + + (defmethod customer-name ((account bank-account)) + (slot-value account 'customer-name)) + + This allows you to write the following: + + (setf (customer-name *account*) "Sally Sue") ==> "Sally Sue" + + (customer-name *account*) ==> "Sally Sue" + + There's nothing hard about writing these accessor functions, but it +wouldn't be in keeping with The Lisp Way to have to write them all by +hand. Thus, DEFCLASS supports three slot options that allow you to +automatically create reader and writer functions for a specific slot. + + The :reader option specifies a name to be used as the name of a +generic function that accepts an object as its single argument. When +the DEFCLASS is evaluated, the generic function is created, if it +doesn't already exist. Then a method specializing its single argument +on the new class and returning the value of the slot is added to the +generic function. The name can be anything, but it's typical to name it +the same as the slot itself. Thus, instead of explicitly writing the +balance generic function and method as shown previously, you could +change the slot specifier for the balance slot in the definition of +bank-account to this: + + (balance + :initarg :balance + :initform 0 + :reader balance) + + The :writer option is used to create a generic function and method +for setting the value of a slot. The function and method created follow +the requirements for a SETF function, taking the new value as the first +argument and returning it as the result, so you can define a SETF +function by providing a name such as (setf customer-name). For +instance, you could provide reader and writer methods for customer-name +equivalent to the ones you just wrote by changing the slot specifier to +this: + + (customer-name + :initarg :customer-name + :initform (error "Must supply a customer name.") + :reader customer-name + :writer (setf customer-name)) + + Since it's quite common to want both reader and writer functions, +DEFCLASS also provides an option, :accessor, that creates both a reader +function and the corresponding SETF function. So instead of the slot +specifier just shown, you'd typically write this: + + (customer-name + :initarg :customer-name + :initform (error "Must supply a customer name.") + :accessor customer-name) + + Finally, one last slot option you should know about is the +:documentation option, which you can use to provide a string that +documents the purpose of the slot. Putting it all together and adding a +reader method for the account-number and account-type slots, the +DEFCLASS form for the bank-account class would look like this: + + (defclass bank-account () + ((customer-name + :initarg :customer-name + :initform (error "Must supply a customer name.") + :accessor customer-name + :documentation "Customer's name") + (balance + :initarg :balance + :initform 0 + :reader balance + :documentation "Current account balance") + (account-number + :initform (incf *account-numbers*) + :reader account-number + :documentation "Account number, unique within a bank.") + (account-type + :reader account-type + :documentation "Type of account, one of :gold, :silver, or :bronze."))) + + ---------- Footnotes ---------- + + (1) Of course, providing an accessor function doesn't really limit +anything since other code can still use SLOT-VALUE to get at slots +directly. Common Lisp doesn't provide strict encapsulation of slots the +way some languages such as C++ and Java do; however, if the author of a +class provides accessor functions and you ignore them, using SLOT-VALUE +instead, you had better know what you're doing. It's also possible to +use the package system, which I'll discuss in Chapter 21, to make it +even more obvious that certain slots aren't to be accessed directly, by +not exporting the names of the slots. + + (2) One consequence of defining a SETF function-say, (setf foo)-is +that if you also define the corresponding accessor function, foo in this +case, you can use all the modify macros built upon SETF, such as INCF, +DECF, PUSH, and POP, on the new kind of place. + + +File: pcl.info, Node: 17-5, Next: 17-6, Prev: 17-4, Up: Chapter 17 + +WITH-SLOTS and WITH-ACCESSORS +============================= + +While using accessor functions will make your code easier to maintain, +they can still be a bit verbose. And there will be times, when writing +methods that implement the low-level behaviors of a class, that you may +specifically want to access slots directly to set a slot that has no +writer function or to get at the slot value without causing any +auxiliary methods defined on the reader function to run. + + This is what SLOT-VALUE is for; however, it's still quite verbose. +To make matters worse, a function or method that accesses the same slot +several times can become clogged with calls to accessor functions and +SLOT-VALUE. For example, even a fairly simple method such as the +following, which assesses a penalty on a bank-account if its balance +falls below a certain minimum, is cluttered with calls to balance and +SLOT-VALUE: + + (defmethod assess-low-balance-penalty ((account bank-account)) + (when (< (balance account) *minimum-balance*) + (decf (slot-value account 'balance) (* (balance account) .01)))) + + And if you decide you want to directly access the slot value in order +to avoid running auxiliary methods, it gets even more cluttered. + + (defmethod assess-low-balance-penalty ((account bank-account)) + (when (< (slot-value account 'balance) *minimum-balance*) + (decf (slot-value account 'balance) (* (slot-value account 'balance) .01)))) + + Two standard macros, WITH-SLOTS and WITH-ACCESSORS, can help tidy up +this clutter. Both macros create a block of code in which simple +variable names can be used to refer to slots on a particular object. +WITH-SLOTS provides direct access to the slots, as if by SLOT-VALUE, +while WITH-ACCESSORS provides a shorthand for accessor methods. + + The basic form of WITH-SLOTS is as follows: + + (with-slots (slot*) instance-form + body-form*) + + Each element of slots can be either the name of a slot, which is also +used as a variable name, or a two-item list where the first item is a +name to use as a variable and the second is the name of the slot. The +instance-form is evaluated once to produce the object whose slots will +be accessed. Within the body, each occurrence of one of the variable +names is translated to a call to SLOT-VALUE with the object and the +appropriate slot name as arguments. (1) Thus, you can write +assess-low-balance-penalty like this: + + (defmethod assess-low-balance-penalty ((account bank-account)) + (with-slots (balance) account + (when (< balance *minimum-balance*) + (decf balance (* balance .01))))) + + or, using the two-item list form, like this: + + (defmethod assess-low-balance-penalty ((account bank-account)) + (with-slots ((bal balance)) account + (when (< bal *minimum-balance*) + (decf bal (* bal .01))))) + + If you had defined balance with an :accessor rather than just a +:reader, then you could also use WITH-ACCESSORS. The form of +WITH-ACCESSORS is the same as WITH-SLOTS except each element of the slot +list is a two-item list containing a variable name and the name of an +accessor function. Within the body of WITH-ACCESSORS, a reference to +one of the variables is equivalent to a call to the corresponding +accessor function. If the accessor function is SETFable, then so is the +variable. + + (defmethod assess-low-balance-penalty ((account bank-account)) + (with-accessors ((balance balance)) account + (when (< balance *minimum-balance*) + (decf balance (* balance .01))))) + + The first balance is the name of the variable, and the second is the +name of the accessor function; they don't have to be the same. You +could, for instance, write a method to merge two accounts using two +calls to WITH-ACCESSORS, one for each account. + + (defmethod merge-accounts ((account1 bank-account) (account2 bank-account)) + (with-accessors ((balance1 balance)) account1 + (with-accessors ((balance2 balance)) account2 + (incf balance1 balance2) + (setf balance2 0)))) + + The choice of whether to use WITH-SLOTS versus WITH-ACCESSORS is the +same as the choice between SLOT-VALUE and an accessor function: +low-level code that provides the basic functionality of a class may use +SLOT-VALUE or WITH-SLOTS to directly manipulate slots in ways not +supported by accessor functions or to explicitly avoid the effects of +auxiliary methods that may have been defined on the accessor functions. +But you should generally use accessor functions or WITH-ACCESSORS unless +you have a specific reason not to. + + ---------- Footnotes ---------- + + (1) The "variable" names provided by WITH-SLOTS and WITH-ACCESSORS +aren't true variables; they're implemented using a special kind of +macro, called a symbol macro, that allows a simple name to expand into +arbitrary code. Symbol macros were introduced into the language to +support WITH-SLOTS and WITH-ACCESSORS, but you can also use them for +your own purposes. I'll discuss them in a bit more detail in Chapter +20. + + +File: pcl.info, Node: 17-6, Next: 17-7, Prev: 17-5, Up: Chapter 17 + +Class-Allocated Slots +===================== + +The last slot option you need to know about is :allocation. The value +of :allocation can be either :instance or :class and defaults to +:instance if not specified. When a slot has :class allocation, the slot +has only a single value, which is stored in the class and shared by all +instances. + + However, :class slots are accessed the same as :instance +slots-they're accessed with SLOT-VALUE or an accessor function, which +means you can access the slot value only through an instance of the +class even though it isn't actually stored in the instance. The +:initform and :initarg options have essentially the same effect except +the initform is evaluated once when the class is defined rather than +each time an instance is created. On the other hand, passing an initarg +to MAKE-INSTANCE will set the value, affecting all instances of the +class. + + Because you can't get at a class-allocated slot without an instance +of the class, class-allocated slots aren't really equivalent to static +or class fields in languages such as Java, C++, and Python. (1) Rather, +class-allocated slots are used primarily to save space; if you're going +to create many instances of a class and all instances are going to have +a reference to the same object-say, a pool of shared resources-you can +save the cost of each instance having its own reference by making the +slot class-allocated. + + ---------- Footnotes ---------- + + (1) The Meta Object Protocol (MOP), which isn't part of the language +standard but is supported by most Common Lisp implementations, provides +a function, class-prototype, that returns an instance of a class that +can be used to access class slots. If you're using an implementation +that supports the MOP and happen to be translating some code from +another language that makes heavy use of static or class fields, this +may give you a way to ease the translation. But it's not all that +idiomatic. + + +File: pcl.info, Node: 17-7, Next: 17-8, Prev: 17-6, Up: Chapter 17 + +Slots and Inheritance +===================== + +As I discussed in the previous chapter, classes inherit behavior from +their superclasses thanks to the generic function machinery-a method +specialized on class A is applicable not only to direct instances of A +but also to instances of A's subclasses. Classes also inherit slots +from their superclasses, but the mechanism is slightly different. + + In Common Lisp a given object can have only one slot with a +particular name. However, it's possible that more than one class in the +inheritance hierarchy of a given class will specify a slot with a +particular name. This can happen either because a subclass includes a +slot specifier with the same name as a slot specified in a superclass or +because multiple superclasses specify slots with the same name. + + Common Lisp resolves these situations by merging all the specifiers +with the same name from the new class and all its superclasses to create +a single specifier for each unique slot name. When merging specifiers, +different slot options are treated differently. For instance, since a +slot can have only a single default value, if multiple classes specify +an :initform, the new class uses the one from the most specific class. +This allows a subclass to specify a different default value than the one +it would otherwise inherit. + + On the other hand, :initargs needn't be exclusive-each :initarg +option in a slot specifier creates a keyword parameter that can be used +to initialize the slot; multiple parameters don't create a conflict, so +the new slot specifier contains all the :initargs. Callers of +MAKE-INSTANCE can use any of the :initargs to initialize the slot. If a +caller passes multiple keyword arguments that initialize the same slot, +then the leftmost argument in the call to MAKE-INSTANCE is used. + + Inherited :reader, :writer, and :accessor options aren't included in +the merged slot specifier since the methods created by the superclass's +DEFCLASS will already apply to the new class. The new class can, +however, create its own accessor functions by supplying its own :reader, +:writer, or :accessor options. + + Finally, the :allocation option is, like :initform, determined by the +most specific class that specifies the slot. Thus, it's possible for +all instances of one class to share a :class slot while instances of a +subclass may each have their own :instance slot of the same name. And a +sub-subclass may then redefine it back to :class slot, so all instances +of that class will again share a single slot. In the latter case, the +slot shared by instances of the sub-subclass is different than the slot +shared by the original superclass. + + For instance, suppose you have these classes: + + (defclass foo () + ((a :initarg :a :initform "A" :accessor a) + (b :initarg :b :initform "B" :accessor b))) + + (defclass bar (foo) + ((a :initform (error "Must supply a value for a")) + (b :initarg :the-b :accessor the-b :allocation :class))) + + When instantiating the class bar, you can use the inherited initarg, +:a, to specify a value for the slot a and, in fact, must do so to avoid +an error, since the :initform supplied by bar supersedes the one +inherited from foo. To initialize the b slot, you can use either the +inherited initarg :b or the new initarg :the-b. However, because of the +:allocation option on the b slot in bar, the value specified will be +stored in the slot shared by all instances of bar. That same slot can +be accessed either with the method on the generic function b that +specializes on foo or with the new method on the generic function the-b +that specializes directly on bar. To access the a slot on either a foo +or a bar, you'll continue to use the generic function a. + + Usually merging slot definitions works quite nicely. However, it's +important to be aware when using multiple inheritance that two unrelated +slots that happen to have the same name can be merged into a single slot +in the new class. Thus, methods specialized on different classes could +end up manipulating the same slot when applied to a class that extends +those classes. This isn't much of a problem in practice since, as +you'll see in Chapter 21, you can use the package system to avoid +collisions between names in independently developed pieces of code. + + +File: pcl.info, Node: 17-8, Next: 17-9, Prev: 17-7, Up: Chapter 17 + +Multiple Inheritance +==================== + +All the classes you've seen so far have had only a single direct +superclass. Common Lisp also supports multiple inheritance-a class can +have multiple direct superclasses, inheriting applicable methods and +slot specifiers from all of them. + + Multiple inheritance doesn't dramatically change any of the +mechanisms of inheritance I've discussed so far-every user-defined class +already has multiple superclasses since they all extend STANDARD-OBJECT, +which extends T, and so have at least two superclasses. The wrinkle +that multiple inheritance adds is that a class can have more than one +direct superclass. This complicates the notion of class specificity +that's used both when building the effective methods for a generic +function and when merging inherited slot specifiers. + + That is, if classes could have only a single direct superclass, +ordering classes by specificity would be trivial-a class and all its +superclasses could be ordered in a straight line starting from the class +itself, followed by its single direct superclass, followed by its direct +superclass, all the way up to T. But when a class has multiple direct +superclasses, those superclasses are typically not related to each +other-indeed, if one was a subclass of another, you wouldn't need to +subclass both directly. In that case, the rule that subclasses are more +specific than their superclasses isn't enough to order all the +superclasses. So Common Lisp uses a second rule that sorts unrelated +superclasses according to the order they're listed in the DEFCLASS's +direct superclass list-classes earlier in the list are considered more +specific than classes later in the list. This rule is admittedly +somewhat arbitrary but does allow every class to have a linear class +precedence list, which can be used to determine which superclasses +should be considered more specific than others. Note, however, there's +no global ordering of classes-each class has its own class precedence +list, and the same classes can appear in different orders in different +classes' class precedence lists. + + To see how this works, let's add a class to the banking app: +money-market-account. A money market account combines the +characteristics of a checking account and a savings account: a customer +can write checks against it, but it also earns interest. You might +define it like this: + + (defclass money-market-account (checking-account savings-account) ()) + + The class precedence list for money-market-account will be as +follows: + + (money-market-account + checking-account + savings-account + bank-account + standard-object + t) + + Note how this list satisfies both rules: every class appears before +all its superclasses, and checking-account and savings-account appear in +the order specified in DEFCLASS. + + This class defines no slots of its own but will inherit slots from +both of its direct superclasses, including the slots they inherit from +their superclasses. Likewise, any method that's applicable to any class +in the class precedence list will be applicable to a +money-market-account object. Because all slot specifiers for the same +slot are merged, it doesn't matter that money-market-account inherits +the same slot specifiers from bank-account twice. (1) + + Multiple inheritance is easiest to understand when the different +superclasses provide completely independent slots and behaviors. For +instance, money-market-account will inherit slots and behaviors for +dealing with checks from checking-account and slots and behaviors for +computing interest from savings-account. You don't have to worry about +the class precedence list for methods and slots inherited from only one +superclass or another. + + However, it's also possible to inherit different methods for the same +generic function from different superclasses. In that case, the class +precedence list does come into play. For instance, suppose the banking +application defined a generic function print-statement used to generate +monthly statements. Presumably there would already be methods for +print-statement specialized on both checking-account and +savings-account. Both of these methods will be applicable to instances +of money-market-account, but the one specialized on checking-account +will be considered more specific than the one on savings-account because +checking-account precedes savings-account in money-market-account's +class precedence list. + + Assuming the inherited methods are all primary methods and you +haven't defined any other methods, the method specialized on +checking-account will be used if you invoke print-statement on +money-market-account. However, that won't necessarily give you the +behavior you want since you probably want a money market account's +statement to contain elements of both a checking account and a savings +account statement. + + You can modify the behavior of print-statement for +money-market-accounts in a couple ways. One straightforward way is to +define a new primary method specialized on money-market-account. This +gives you the most control over the new behavior but will probably +require more new code than some other options I'll discuss in a moment. +The problem is that while you can use CALL-NEXT-METHOD to call "up" to +the next most specific method, namely, the one specialized on +checking-account, there's no way to invoke a particular less-specific +method, such as the one specialized on savings-account. Thus, if you +want to be able to reuse the code that prints the savings-account part +of the statement, you'll need to break that code into a separate +function, which you can then call directly from both the +money-market-account and savings-account print-statement methods. + + Another possibility is to write the primary methods of all three +classes to call CALL-NEXT-METHOD. Then the method specialized on +money-market-account will use CALL-NEXT-METHOD to invoke the method +specialized on checking-account. When that method calls +CALL-NEXT-METHOD, it will result in running the savings-account method +since it will be the next most specific method according to +money-market-account's class precedence list. + + Of course, if you're going to rely on a coding convention-that every +method calls CALL-NEXT-METHOD-to ensure all the applicable methods run +at some point, you should think about using auxiliary methods instead. +In this case, instead of defining primary methods on print-statement for +checking-account and savings-account, you can define those methods as +:after methods, defining a single primary method on bank-account. Then, +print-statement, called on a money-market-account, will print a basic +account statement, output by the primary method specialized on +bank-account, followed by details output by the :after methods +specialized on savings-account and checking-account. And if you want to +add details specific to money-market-accounts, you can define an :after +method specialized on money-market-account, which will run last of all. + + The advantage of using auxiliary methods is that it makes it quite +clear which methods are primarily responsible for implementing the +generic function and which ones are only contributing additional bits of +functionality. The disadvantage is that you don't get fine-grained +control over the order in which the auxiliary methods run-if you wanted +the checking-account part of the statement to print before the +savings-account part, you'd have to change the order in which the +money-market-account subclasses those classes. But that's a fairly +dramatic change that could affect other methods and inherited slots. In +general, if you find yourself twiddling the order of the direct +superclass list as a way of fine-tuning the behavior of specific +methods, you probably need to step back and rethink your approach. + + On the other hand, if you don't care exactly what the order is but +want it to be consistent across several generic functions, then using +auxiliary methods may be just the thing. For example, if in addition to +print-statement you have a print-detailed-statement generic function, +you can implement both functions using :after methods on the various +subclasses of bank-account, and the order of the parts of both a regular +and a detailed statement will be the same. + + ---------- Footnotes ---------- + + (1) In other words, Common Lisp doesn't suffer from the diamond +inheritance problem the way, say, C++ does. In C++, when one class +subclasses two classes that both inherit a member variable from a common +superclass, the bottom class inherits the member variable twice, leading +to no end of confusion. + + +File: pcl.info, Node: 17-9, Next: Chapter 18, Prev: 17-8, Up: Chapter 17 + +Good Object-Oriented Design +=========================== + +That's about it for the main features of Common Lisp's object system. +If you have lots of experience with object-oriented programming, you can +probably see how Common Lisp's features can be used to implement good +object-oriented designs. However, if you have less experience with +object orientation, you may need to spend some time absorbing the +object-oriented way of thinking. Unfortunately, that's a fairly large +topic and beyond the scope of this book. Or, as the man page for Perl's +object system puts it, "Now you need just to go off and buy a book about +object-oriented design methodology and bang your forehead with it for +the next six months or so." Or you can wait for some of the practical +chapters, later in this book, where you'll see several examples of how +these features are used in practice. For now, however, you're ready to +take a break from all this theory of object orientation and turn to the +rather different topic of how to make good use of Common Lisp's +powerful, but sometimes cryptic, FORMAT function. + + +File: pcl.info, Node: Chapter 18, Next: Chapter 19, Prev: Chapter 17, Up: Top + +18. A Few FORMAT Recipes +======================== + +Common Lisp's FORMAT function is-along with the extended LOOP macro-one +of the two Common Lisp features that inspires a strong emotional +response in a lot of Common Lisp users. Some love it; others hate it. +(1) + + FORMAT's fans love it for its great power and concision, while its +detractors hate it because of the potential for misuse and its opacity. +Complex FORMAT control strings sometimes bear a suspicious resemblance +to line noise, but FORMAT remains popular with Common Lispers who like +to be able to generate little bits of human-readable output without +having to clutter their code with lots of output-generating code. While +FORMAT's control strings can be cryptic, at least a single FORMAT +expression doesn't clutter things up too badly. For instance, suppose +you want to print the values in a list delimited with commas. You could +write this: + + (loop for cons on list + do (format t "~a" (car cons)) + when (cdr cons) do (format t ", ")) + + That's not too bad, but anyone reading this code has to mentally +parse it just to figure out that all it's doing is printing the contents +of list to standard output. On the other hand, you can tell at a glance +that the following expression is printing list, in some form, to +standard output: + + (format t "~{~a~^, ~}" list) + + If you care exactly what form the output will take, then you'll have +to examine the control string, but if all you want is a first-order +approximation of what this line of code is doing, that's immediately +available. + + At any rate, you should have at least a reading knowledge of FORMAT, +and it's worth getting a sense of what it can do before you affiliate +yourself with the pro- or anti-FORMAT camp. It's also important to +understand at least the basics of FORMAT because other standard +functions, such as the condition-signaling functions discussed in the +next chapter, use FORMAT-style control strings to generate output. + + To further complicate matters, FORMAT supports three quite different +kinds of formatting: printing tables of data, pretty-printing +s-expressions, and generating human-readable messages with interpolated +values. Printing tables of data as text is a bit passé these days; +it's one of those reminders that Lisp is nearly as old as FORTRAN. In +fact, several of the directives you can use to print floating-point +values in fixed-width fields were based quite directly on FORTRAN edit +descriptors, which are used in FORTRAN to read and print columns of data +arranged in fixed-width fields. However, using Common Lisp as a FORTRAN +replacement is beyond the scope of this book, so I won't discuss those +aspects of FORMAT. + + Pretty-printing is likewise beyond the scope of this book-not because +it's passé but just because it's too big a topic. Briefly, the Common +Lisp pretty printer is a customizable system for printing +block-structured data such as-but not limited to-s-expressions while +varying indentation and dynamically adding line breaks as needed. It's +a great thing when you need it, but it's not often needed in day-to-day +programming. (2) + + Instead, I'll focus on the parts of FORMAT you can use to generate +human-readable strings with interpolated values. Even limiting the +scope in that way, there's still a fair bit to cover. You shouldn't +feel obliged to remember every detail described in this chapter. You +can get quite far with just a few FORMAT idioms. I'll describe the most +important features of FORMAT first; it's up to you how much of a FORMAT +wizard you want to become. + +* Menu: + +* 18-1:: The FORMAT Function +* 18-2:: FORMAT Directives +* 18-3:: Basic Formatting +* 18-4:: Character and Integer Directives +* 18-5:: Floating-Point Directives +* 18-6:: English-Language Directives +* 18-7:: Conditional Formatting +* 18-8:: Iteration +* 18-9:: Hop, Skip, Jump +* 18-10:: And More . . . + + ---------- Footnotes ---------- + + (1) Of course, most folks realize it's not worth getting that worked +up over anything in a programming language and use it or not without a +lot of angst. On the other hand, it's interesting that these two +features are the two features in Common Lisp that implement what are +essentially domain-specific languages using a syntax not based on +s-expressions. The syntax of FORMAT's control strings is character +based, while the extended LOOP macro can be understood only in terms of +the grammar of the LOOP keywords. That one of the common knocks on both +FORMAT and LOOP is that they "aren't Lispy enough" is evidence that +Lispers really do like the s-expression syntax. + + (2) Readers interested in the pretty printer may want to read the +paper "XP: A Common Lisp Pretty Printing System" by Richard Waters. +It's a description of the pretty printer that was eventually +incorporated into Common Lisp. You can download it from +ftp://publications.ai.mit.edu/ai-publications/pdf/AIM-1102a.pdf. + + +File: pcl.info, Node: 18-1, Next: 18-2, Prev: Chapter 18, Up: Chapter 18 + +The FORMAT Function +=================== + +As you've seen in previous chapters, the FORMAT function takes two +required arguments: a destination for its output and a control string +that contains literal text and embedded directives. Any additional +arguments provide the values used by the directives in the control +string that interpolate values into the output. I'll refer to these +arguments as format arguments. + + The first argument to FORMAT, the destination for the output, can be +T, NIL, a stream, or a string with a fill pointer. T is shorthand for +the stream *STANDARD-OUTPUT*, while NIL causes FORMAT to generate its +output to a string, which it then returns. (1) If the destination is a +stream, the output is written to the stream. And if the destination is +a string with a fill pointer, the formatted output is added to the end +of the string and the fill pointer is adjusted appropriately. Except +when the destination is NIL and it returns a string, FORMAT returns NIL. + + The second argument, the control string, is, in essence, a program in +the FORMAT language. The FORMAT language isn't Lispy at all-its basic +syntax is based on characters, not s-expressions, and it's optimized for +compactness rather than easy comprehension. This is why a complex +FORMAT control string can end up looking like line noise. + + Most of FORMAT's directives simply interpolate an argument into the +output in one form or another. Some directives, such as ~%, which +causes FORMAT to emit a newline, don't consume any arguments. And +others, as you'll see, can consume more than one argument. One +directive even allows you to jump around in the list of arguments in +order to process the same argument more than once or to skip certain +arguments in certain situations. But before I discuss specific +directives, let's look at the general syntax of a directive. + + ---------- Footnotes ---------- + + (1) To slightly confuse matters, most other I/O functions also accept +T and NIL as stream designators but with a different meaning: as a +stream designator, T designates the bidirectional stream *TERMINAL-IO*, +while NIL designates *STANDARD-OUTPUT* as an output stream and +*STANDARD-INPUT* as an input stream. + + +File: pcl.info, Node: 18-2, Next: 18-3, Prev: 18-1, Up: Chapter 18 + +FORMAT Directives +================= + +All directives start with a tilde (~) and end with a single character +that identifies the directive. You can write the character in either +upper- or lowercase. Some directives take prefix parameters, which are +written immediately following the tilde, separated by commas, and used +to control things such as how many digits to print after the decimal +point when printing a floating-point number. For example, the ~$ +directive, one of the directives used to print floating-point values, by +default prints two digits following the decimal point. + + CL-USER> (format t "~$" pi) + 3.14 + NIL + + However, with a prefix parameter, you can specify that it should +print its argument to, say, five decimal places like this: + + CL-USER> (format t "~5$" pi) + 3.14159 + NIL + + The values of prefix parameters are either numbers, written in +decimal, or characters, written as a single quote followed by the +desired character. The value of a prefix parameter can also be derived +from the format arguments in two ways: A prefix parameter of v causes +FORMAT to consume one format argument and use its value for the prefix +parameter. And a prefix parameter of # will be evaluated as the number +of remaining format arguments. For example: + + CL-USER> (format t "~v$" 3 pi) + 3.142 + NIL + CL-USER> (format t "~#$" pi) + 3.1 + NIL + + I'll give some more realistic examples of how you can use the # +argument in the section "Conditional Formatting." + + You can also omit prefix parameters altogether. However, if you want +to specify one parameter but not the ones before it, you must include a +comma for each unspecified parameter. For instance, the ~F directive, +another directive for printing floating-point values, also takes a +parameter to control the number of decimal places to print, but it's the +second parameter rather than the first. If you want to use ~F to print +a number to five decimal places, you can write this: + + CL-USER> (format t "~,5f" pi) + 3.14159 + NIL + + You can also modify the behavior of some directives with colon and +at-sign modifiers, which are placed after any prefix parameters and +before the directive's identifying character. These modifiers change +the behavior of the directive in small ways. For instance, with a colon +modifier, the ~D directive used to output integers in decimal emits the +number with commas separating every three digits, while the at-sign +modifier causes ~D to include a plus sign when the number is positive. + + CL-USER> (format t "~d" 1000000) + 1000000 + NIL + CL-USER> (format t "~:d" 1000000) + 1,000,000 + NIL + CL-USER> (format t "~@d" 1000000) + +1000000 + NIL + + When it makes sense, you can combine the colon and at-sign modifiers +to get both modifications. + + CL-USER> (format t "~:@d" 1000000) + +1,000,000 + NIL + + In directives where the two modified behaviors can't be meaningfully +combined, using both modifiers is either undefined or given a third +meaning. + + +File: pcl.info, Node: 18-3, Next: 18-4, Prev: 18-2, Up: Chapter 18 + +Basic Formatting +================ + +Now you're ready to look at specific directives. I'll start with +several of the most commonly used directives, including some you've seen +in previous chapters. + + The most general-purpose directive is ~A, which consumes one format +argument of any type and outputs it in aesthetic (human-readable) form. +For example, strings are output without quotation marks or escape +characters, and numbers are output in a natural way for the type of +number. If you just want to emit a value for human consumption, this +directive is your best bet. + + (format nil "The value is: ~a" 10) ==> "The value is: 10" + (format nil "The value is: ~a" "foo") ==> "The value is: foo" + (format nil "The value is: ~a" (list 1 2 3)) ==> "The value is: (1 2 3)" + + A closely related directive, ~S, likewise consumes one format +argument of any type and outputs it. However, ~S tries to generate +output that can be read back in with READ. Thus, strings will be +enclosed in quotation marks, symbols will be package-qualified when +necessary, and so on. Objects that don't have a READable representation +are printed with the unreadable object syntax, #<>. With a colon +modifier, both the ~A and ~S directives emit NIL as () rather than NIL. +Both the ~A and ~S directives also take up to four prefix parameters, +which can be used to control whether padding is added after (or before +with the at-sign modifier) the value, but those parameters are only +really useful for generating tabular data. + + The other two most frequently used directives are ~%, which emits a +newline, and ~&, which emits a fresh line. The difference between the +two is that ~% always emits a newline, while ~& emits one only if it's +not already at the beginning of a line. This is handy when writing +loosely coupled functions that each generate a piece of output and that +need to be combined in different ways. For instance, if one function +generates output that ends with a newline (~%) and another function +generates some output that starts with a fresh line (~&), you don't have +to worry about getting an extra blank line if you call them one after +the other. Both of these directives can take a single prefix parameter +that specifies the number of newlines to emit. The ~% directive will +simply emit that many newline characters, while the ~& directive will +emit either n - 1 or n newlines, depending on whether it starts at the +beginning of a line. + + Less frequently used is the related ~~ directive, which causes FORMAT +to emit a literal tilde. Like the ~% and ~& directives, it can be +parameterized with a number that controls how many tildes to emit. + + +File: pcl.info, Node: 18-4, Next: 18-5, Prev: 18-3, Up: Chapter 18 + +Character and Integer Directives +================================ + +In addition to the general-purpose directives, ~A and ~S, FORMAT +supports several directives that can be used to emit values of specific +types in particular ways. One of the simplest of these is the ~C +directive, which is used to emit characters. It takes no prefix +arguments but can be modified with the colon and at-sign modifiers. +Unmodified, its behavior is no different from ~A except that it works +only with characters. The modified versions are more useful. With a +colon modifier, ~:C outputs nonprinting characters such as space, tab, +and newline by name. This is useful if you want to emit a message to +the user about some character. For instance, the following: + + (format t "Syntax error. Unexpected character: ~:c" char) + + can emit messages like this: + + Syntax error. Unexpected character: a + + but also like the following: + + Syntax error. Unexpected character: Space + + With the at-sign modifier, ~@C will emit the character in Lisp's +literal character syntax. + + CL-USER> (format t "~#\a + NIL + + With both the colon and at-sign modifiers, the ~C directive can print +extra information about how to enter the character at the keyboard if it +requires special key combinations. For instance, on the Macintosh, in +certain applications you can enter a null character (character code 0 in +ASCII or in any ASCII superset such as ISO-8859-1 or Unicode) by +pressing the Control key and typing . In OpenMCL, if you print the null +character with the ~:C directive, it tells you this: + + (format nil "~: + + However, not all Lisps implement this aspect of the ~C directive. +And even if they do, it may or may not be accurate-for instance, if +you're running OpenMCL in SLIME, the C- key chord is intercepted by +Emacs, invoking set-mark-command. (1) + + Format directives dedicated to emitting numbers are another important +category. While you can use the ~A and ~S directives to emit numbers, +if you want fine control over how they're printed, you need to use one +of the number-specific directives. The numeric directives can be +divided into two subcategories: directives for formatting integer values +and directives for formatting floating-point values. + + Five closely related directives format integer values: ~D, ~X, ~O, +~B, and ~R. The most frequently used is the ~D directive, which outputs +integers in base 10. + + (format nil "~d" 1000000) ==> "1000000" + + As I mentioned previously, with a colon modifier it adds commas. + + (format nil "~:d" 1000000) ==> "1,000,000" + + And with an at-sign modifier, it always prints a sign. + + (format nil "~@d" 1000000) ==> "+1000000" + + And the two modifiers can be combined. + + (format nil "~:@d" 1000000) ==> "+1,000,000" + + The first prefix parameter can specify a minimum width for the +output, and the second parameter can specify a padding character to use. +The default padding character is space, and padding is always inserted +before the number itself. + + (format nil "~12d" 1000000) ==> " 1000000" + (format nil "~12,'0d" 1000000) ==> "000001000000" + + These parameters are handy for formatting things such as dates in a +fixed-width format. + + (format nil "~4,'0d-~2,'0d-~2,'0d" 2005 6 10) ==> "2005-06-10" + + The third and fourth parameters are used in conjunction with the +colon modifier: the third parameter specifies the character to use as +the separator between groups and digits, and the fourth parameter +specifies the number of digits per group. These parameters default to a +comma and the number 3. Thus, you can use the directive ~:D without +parameters to output large integers in standard format for the United +States but can change the comma to a period and the grouping from 3 to 4 +with ~,,'.,4D. + + (format nil "~:d" 100000000) ==> "100,000,000" + (format nil "~,,'.,4:d" 100000000) ==> "1.0000.0000" + + Note that you must use commas to hold the places of the unspecified +width and padding character parameters, allowing them to keep their +default values. + + The ~X, ~O, and ~B directives work just like the ~D directive except +they emit numbers in hexadecimal (base 16), octal (base 8), and binary +(base 2). + + (format nil "~x" 1000000) ==> "f4240" + (format nil "~o" 1000000) ==> "3641100" + (format nil "~b" 1000000) ==> "11110100001001000000" + + Finally, the ~R directive is the general radix directive. Its first +parameter is a number between 2 and 36 (inclusive) that indicates what +base to use. The remaining parameters are the same as the four +parameters accepted by the ~D, ~X, ~O, and ~B directives, and the colon +and at-sign modifiers modify its behavior in the same way. The ~R +directive also has some special behavior when used with no prefix +parameters, which I'll discuss in the section "English-Language +Directives." + + ---------- Footnotes ---------- + + (1) This variant on the ~C directive makes more sense on platforms +like the Lisp Machines where key press events were represented by Lisp +characters. + + +File: pcl.info, Node: 18-5, Next: 18-6, Prev: 18-4, Up: Chapter 18 + +Floating-Point Directives +========================= + +Four directives format floating-point values: ~F, ~E, ~G, and ~$. The +first three of these are the directives based on FORTRAN's edit +descriptors. I'll skip most of the details of those directives since +they mostly have to do with formatting floating-point values for use in +tabular form. However, you can use the ~F, ~E, and ~$ directives to +interpolate floating-point values into text. The ~G, or general, +floating-point directive, on the other hand, combines aspects of the ~F +and ~E directives in a way that only really makes sense for generating +tabular output. + + The ~F directive emits its argument, which should be a number, (1) in +decimal format, possibly controlling the number of digits after the +decimal point. The ~F directive is, however, allowed to use +computerized scientific notation if the number is sufficiently large or +small. The ~E directive, on the other hand, always emits numbers in +computerized scientific notation. Both of these directives take a +number of prefix parameters, but you need to worry only about the +second, which controls the number of digits to print after the decimal +point. + + (format nil "~f" pi) ==> "3.141592653589793d0" + (format nil "~,4f" pi) ==> "3.1416" + (format nil "~e" pi) ==> "3.141592653589793d+0" + (format nil "~,4e" pi) ==> "3.1416d+0" + + The ~$, or monetary, directive is similar to ~F but a bit simpler. +As its name suggests, it's intended for emitting monetary units. With +no parameters, it's basically equivalent to ~,2F. To modify the number +of digits printed after the decimal point, you use the first parameter, +while the second parameter controls the minimum number of digits to +print before the decimal point. + + (format nil "~$" pi) ==> "3.14" + (format nil "~2,4$" pi) ==> "0003.14" + + All three directives, ~F, ~E, and ~$, can be made to always print a +sign, plus or minus, with the at-sign modifier. (2) + + ---------- Footnotes ---------- + + (1) Technically, if the argument isn't a real number, ~F is supposed +to format it as if by the ~D directive, which in turn behaves like the +~A directive if the argument isn't a number, but not all implementations +get this right. + + (2) Well, that's what the language standard says. For some reason, +perhaps rooted in a common ancestral code base, several Common Lisp +implementations don't implement this aspect of the ~F directive +correctly. + + +File: pcl.info, Node: 18-6, Next: 18-7, Prev: 18-5, Up: Chapter 18 + +English-Language Directives +=========================== + +Some of the handiest FORMAT directives for generating human-readable +messages are the ones for emitting English text. These directives allow +you to emit numbers as English words, to emit plural markers based on +the value of a format argument, and to apply case conversions to +sections of FORMAT's output. + + The ~R directive, which I discussed in "Character and Integer +Directives," when used with no base specified, prints numbers as English +words or Roman numerals. When used with no prefix parameter and no +modifiers, it emits the number in words as a cardinal number. + + (format nil "~r" 1234) ==> "one thousand two hundred thirty-four" + + With the colon modifier, it emits the number as an ordinal. + + (format nil "~:r" 1234) ==> "one thousand two hundred thirty-fourth" + + And with an at-sign modifier, it emits the number as a Roman numeral; +with both an at-sign and a colon, it emits "old-style" Roman numerals in +which fours and nines are written as IIII and VIIII instead of IV and +IX. + + (format nil "~@r" 1234) ==> "MCCXXXIV" + (format nil "~:@r" 1234) ==> "MCCXXXIIII" + + For numbers too large to be represented in the given form, ~R behaves +like ~D. + + To help you generate messages with words properly pluralized, FORMAT +provides the ~P directive, which simply emits an s unless the +corresponding argument is 1. + + (format nil "file~p" 1) ==> "file" + (format nil "file~p" 10) ==> "files" + (format nil "file~p" 0) ==> "files" + + Typically, however, you'll use ~P with the colon modifier, which +causes it to reprocess the previous format argument. + + (format nil "~r file~:p" 1) ==> "one file" + (format nil "~r file~:p" 10) ==> "ten files" + (format nil "~r file~:p" 0) ==> "zero files" + + With the at-sign modifier, which can be combined with the colon +modifier, ~P emits either y or ies. + + (format nil "~r famil~:@p" 1) ==> "one family" + (format nil "~r famil~:@p" 10) ==> "ten families" + (format nil "~r famil~:@p" 0) ==> "zero families" + + Obviously, ~P can't solve all pluralization problems and is no help +for generating messages in other languages, but it's handy for the cases +it does handle. And the ~[ directive, which I'll discuss in a moment, +gives you a more flexible way to conditionalize parts of FORMAT's +output. + + The last directive for dealing with emitting English text is ~(, +which allows you to control the case of text in the output. Each ~( is +paired with a ~), and all the output generated by the portion of the +control string between the two markers will be converted to all +lowercase. + + (format nil "~(~a~)" "FOO") ==> "foo" + (format nil "~(~@r~)" 124) ==> "cxxiv" + + You can modify ~( with an at sign to make it capitalize the first +word in a section of text, with a colon to make it to capitalize all +words, and with both modifiers to convert all text to uppercase. (A +word for the purpose of this directive is a sequence of alphanumeric +characters delimited by nonalphanumeric characters or the ends of the +text.) + + (format nil "~(~a~)" "tHe Quick BROWN foX") ==> "the quick brown fox" + (format nil "~@(~a~)" "tHe Quick BROWN foX") ==> "The quick brown fox" + (format nil "~:(~a~)" "tHe Quick BROWN foX") ==> "The Quick Brown Fox" + (format nil "~:@(~a~)" "tHe Quick BROWN foX") ==> "THE QUICK BROWN FOX" + + +File: pcl.info, Node: 18-7, Next: 18-8, Prev: 18-6, Up: Chapter 18 + +Conditional Formatting +====================== + +In addition to directives that interpolate arguments and modify other +output, FORMAT provides several directives that implement simple control +constructs within the control string. One of these, which you used in +Chapter 9, is the conditional directive ~[. This directive is closed by +a corresponding ~], and in between are a number of clauses separated by +~;. The job of the ~[ directive is to pick one of the clauses, which is +then processed by FORMAT. With no modifiers or parameters, the clause is +selected by numeric index; the ~[ directive consumes a format argument, +which should be a number, and takes the nth (zero-based) clause where N +is the value of the argument. + + (format nil "~[cero~;uno~;dos~]" 0) ==> "cero" + (format nil "~[cero~;uno~;dos~]" 1) ==> "uno" + (format nil "~[cero~;uno~;dos~]" 2) ==> "dos" + + If the value of the argument is greater than the number of clauses, +nothing is printed. + + (format nil "~[cero~;uno~;dos~]" 3) ==> "" + + However, if the last clause separator is ~:; instead of ~;, then the +last clause serves as a default clause. + + (format nil "~[cero~;uno~;dos~:;mucho~]" 3) ==> "mucho" + (format nil "~[cero~;uno~;dos~:;mucho~]" 100) ==> "mucho" + + It's also possible to specify the clause to be selected using a +prefix parameter. While it'd be silly to use a literal value in the +control string, recall that # used as a prefix parameter means the +number of arguments remaining to be processed. Thus, you can define a +format string such as the following: + + (defparameter *list-etc* + "~#[NONE~;~a~;~a and ~a~:;~a, ~a~]~#[~; and ~a~:;, ~a, etc~].") + + and then use it like this: + + (format nil *list-etc*) ==> "NONE." + (format nil *list-etc* 'a) ==> "A." + (format nil *list-etc* 'a 'b) ==> "A and B." + (format nil *list-etc* 'a 'b 'c) ==> "A, B and C." + (format nil *list-etc* 'a 'b 'c 'd) ==> "A, B, C, etc." + (format nil *list-etc* 'a 'b 'c 'd 'e) ==> "A, B, C, etc." + + Note that the control string actually contains two ~[~] +directives-both of which use # to select the clause to use. The first +consumes between zero and two arguments, while the second consumes one +more, if available. FORMAT will silently ignore any arguments not +consumed while processing the control string. + + With a colon modifier, the ~[ can contain only two clauses; the +directive consumes a single argument and processes the first clause if +the argument is NIL and the second clause is otherwise. You used this +variant of ~[ in Chapter 9 to generate pass/fail messages, like this: + + (format t "~:[FAIL~;pass~]" test-result) + + Note that either clause can be empty, but the directive must contain +a ~;. + + Finally, with an at-sign modifier, the ~[ directive can have only one +clause. The directive consumes one argument and, if it's non-NIL, +processes the clause after backing up to make the argument available to +be consumed again. + + (format nil "~@[x = ~a ~]~@[y = ~a~]" 10 20) ==> "x = 10 y = 20" + (format nil "~@[x = ~a ~]~@[y = ~a~]" 10 nil) ==> "x = 10 " + (format nil "~@[x = ~a ~]~@[y = ~a~]" nil 20) ==> "y = 20" + (format nil "~@[x = ~a ~]~@[y = ~a~]" nil nil) ==> "" + + +File: pcl.info, Node: 18-8, Next: 18-9, Prev: 18-7, Up: Chapter 18 + +Iteration +========= + +Another FORMAT directive that you've seen already, in passing, is the +iteration directive ~{. This directive tells FORMAT to iterate over the +elements of a list or over the implicit list of the format arguments. + + With no modifiers, ~{ consumes one format argument, which must be a +list. Like the ~[ directive, which is always paired with a ~] +directive, the ~{ directive is always paired with a closing ~}. The +text between the two markers is processed as a control string, which +draws its arguments from the list consumed by the ~{ directive. FORMAT +will repeatedly process this control string for as long as the list +being iterated over has elements left. In the following example, the ~{ +consumes the single format argument, the list (1 2 3), and then +processes the control string "~a, ", repeating until all the elements of +the list have been consumed. + + (format nil "~{~a, ~}" (list 1 2 3)) ==> "1, 2, 3, " + + However, it's annoying that in the output the last element of the +list is followed by a comma and a space. You can fix that with the ~^ +directive; within the body of a ~{ directive, the ~^ causes the +iteration to stop immediately, without processing the rest of the +control string, when no elements remain in the list. Thus, to avoid +printing the comma and space after the last element of a list, you can +precede them with a ~^. + + (format nil "~{~a~^, ~}" (list 1 2 3)) ==> "1, 2, 3" + + The first two times through the iteration, there are still +unprocessed elements in the list when the ~^ is processed. The third +time through, however, after the ~a directive consumes the 3, the ~^ +will cause FORMAT to break out of the iteration without printing the +comma and space. + + With an at-sign modifier, ~{ processes the remaining format arguments +as a list. + + (format nil "~@{~a~^, ~}" 1 2 3) ==> "1, 2, 3" + + Within the body of a ~{...~}, the special prefix parameter # refers +to the number of items remaining to be processed in the list rather than +the number of remaining format arguments. You can use that, along with +the ~[ directive, to print a comma-separated list with an "and" before +the last item like this: + + (format nil "~{~a~#[~;, and ~:;, ~]~}" (list 1 2 3)) ==> "1, 2, and 3" + + However, that doesn't really work right if the list is two items long +because it adds an extra comma. + + (format nil "~{~a~#[~;, and ~:;, ~]~}" (list 1 2)) ==> "1, and 2" + + You could fix that in a bunch of ways. The following takes advantage +of the behavior of ~@{ when nested inside another ~{ or ~@{ directive-it +iterates over whatever items remain in the list being iterated over by +the outer ~{. You can combine that with a ~#[ directive to make the +following control string for formatting lists according to English +grammar: + + (defparameter *english-list* + "~{~#[~;~a~;~a and ~a~:;~@{~a~#[~;, and ~:;, ~]~}~]~}") + + (format nil *english-list* '()) ==> "" + (format nil *english-list* '(1)) ==> "1" + (format nil *english-list* '(1 2)) ==> "1 and 2" + (format nil *english-list* '(1 2 3)) ==> "1, 2, and 3" + (format nil *english-list* '(1 2 3 4)) ==> "1, 2, 3, and 4" + + While that control string verges on being "write-only" code, it's not +too hard to understand if you take it a bit at a time. The outer +~{...~} will consume and iterate over a list. The whole body of the +iteration then consists of a ~#[...~]; the output generated each time +through the iteration will thus depend on the number of items left to be +processed from the list. Splitting apart the ~#[...~] directive on the +~; clause separators, you can see that it's made up of four clauses, the +last of which is a default clause because it's preceded by a ~:; rather +than a plain ~;. The first clause, for when there are zero elements to +be processed, is empty, which makes sense-if there are no more elements +to be processed, the iteration would've stopped already. The second +clause handles the case of one element with a simple ~a directive. Two +elements are handled with "~a and ~a". And the default clause, which +handles three or more elements, consists of another iteration directive, +this time using ~@{ to iterate over the remaining elements of the list +being processed by the outer ~{. And the body of that iteration is the +control string that can handle a list of three or more elements +correctly, which is fine in this context. Because the ~@{ loop consumes +all the remaining list items, the outer loop iterates only once. + + If you wanted to print something special such as "" when the +list was empty, you have a couple ways to do it. Perhaps the easiest is +to put the text you want into the first (zeroth) clause of the outer ~#[ +and then add a colon modifier to the closing ~} of the outer +iteration-the colon forces the iteration to be run at least once, even +if the list is empty, at which point FORMAT processes the zeroth clause +of the conditional directive. + + (defparameter *english-list* + "~{~#[~;~a~;~a and ~a~:;~@{~a~#[~;, and ~:;, ~]~}~]~:}") + + (format nil *english-list* '()) ==> "" + + Amazingly, the ~{ directive provides even more variations with +different combinations of prefix parameters and modifiers. I won't +discuss them other than to say you can use an integer prefix parameter +to limit the maximum number of iterations and that, with a colon +modifier, each element of the list (either an actual list or the list +constructed by the ~@{ directive) must itself be a list whose elements +will then be used as arguments to the control string in the ~:{...~} +directive. + + +File: pcl.info, Node: 18-9, Next: 18-10, Prev: 18-8, Up: Chapter 18 + +Hop, Skip, Jump +=============== + +A much simpler directive is the ~* directive, which allows you to jump +around in the list of format arguments. In its basic form, without +modifiers, it simply skips the next argument, consuming it without +emitting anything. More often, however, it's used with a colon +modifier, which causes it to move backward, allowing the same argument +to be used a second time. For instance, you can use ~:* to print a +numeric argument once as a word and once in numerals like this: + + (format nil "~r ~:*(~d)" 1) ==> "one (1)" + + Or you could implement a directive similar to ~:P for an irregular +plural by combing ~:* with ~[. + + (format nil "I saw ~r el~:*~[ves~;f~:;ves~]." 0) ==> "I saw zero elves." + (format nil "I saw ~r el~:*~[ves~;f~:;ves~]." 1) ==> "I saw one elf." + (format nil "I saw ~r el~:*~[ves~;f~:;ves~]." 2) ==> "I saw two elves." + + In this control string, the ~R prints the format argument as a +cardinal number. Then the ~:* directive backs up so the number is also +used as the argument to the ~[ directive, selecting between the clauses +for when the number is zero, one, or anything else. (1) + + Within an ~{ directive, ~* skips or backs up over the items in the +list. For instance, you could print only the keys of a plist like this: + + (format nil "~{~s~*~^ ~}" '(:a 10 :b 20)) ==> ":A :B" + + The ~* directive can also be given a prefix parameter. With no +modifiers or with the colon modifier, this parameter specifies the +number of arguments to move forward or backward and defaults to one. +With an at-sign modifier, the prefix parameter specifies an absolute, +zero-based index of the argument to jump to, defaulting to zero. The +at-sign variant of ~* can be useful if you want to use different control +strings to generate different messages for the same arguments and if +different messages need to use the arguments in different orders. (2) + + ---------- Footnotes ---------- + + (1) If you find "I saw zero elves" to be a bit clunky, you could use +a slightly more elaborate format string that makes another use of ~:* +like this: + + (format nil "I saw ~[no~:;~:*~r~] el~:*~[ves~;f~:;ves~]." 0) ==> "I saw no elves." + (format nil "I saw ~[no~:;~:*~r~] el~:*~[ves~;f~:;ves~]." 1) ==> "I saw one elf." + (format nil "I saw ~[no~:;~:*~r~] el~:*~[ves~;f~:;ves~]." 2) ==> "I saw two elves." + + (2) This kind of problem can arise when trying to localize an +application and translate human-readable messages into different +languages. FORMAT can help with some of these problems but is by no +means a full-blown localization system. + + +File: pcl.info, Node: 18-10, Next: Chapter 19, Prev: 18-9, Up: Chapter 18 + +And More . . . +============== + +And there's more-I haven't mentioned the ~? directive, which can take +snippets of control strings from the format arguments or the ~/ +directive, which allows you to call an arbitrary function to handle the +next format argument. And then there are all the directives for +generating tabular and pretty-printed output. But the directives +discussed in this chapter should be plenty for the time being. + + In the next chapter, you'll move onto Common Lisp's condition system, +the Common Lisp analog to other languages' exception and error handling +systems. + + +File: pcl.info, Node: Chapter 19, Next: Chapter 20, Prev: Chapter 18, Up: Top + +19. Beyond Exception Handling: Conditions and Restarts +====================================================== + +One of Lisp's great features is its condition system. It serves a +similar purpose to the exception handling systems in Java, Python, and +C++ but is more flexible. In fact, its flexibility extends beyond error +handling-conditions are more general than exceptions in that a condition +can represent any occurrence during a program's execution that may be of +interest to code at different levels on the call stack. For example, in +the section "Other Uses for Conditions," you'll see that conditions can +be used to emit warnings without disrupting execution of the code that +emits the warning while allowing code higher on the call stack to +control whether the warning message is printed. For the time being, +however, I'll focus on error handling. + + The condition system is more flexible than exception systems because +instead of providing a two-part division between the code that signals +an error (1) and the code that handles it, (2) the condition system +splits the responsibilities into three parts-signaling a condition, +handling it, and restarting. In this chapter, I'll describe how you +could use conditions in part of a hypothetical application for analyzing +log files. You'll see how you could use the condition system to allow a +low-level function to detect a problem while parsing a log file and +signal an error, to allow mid-level code to provide several possible +ways of recovering from such an error, and to allow code at the highest +level of the application to define a policy for choosing which recovery +strategy to use. + + To start, I'll introduce some terminology: errors, as I'll use the +term, are the consequences of Murphy's law. If something can go wrong, +it will: a file that your program needs to read will be missing, a disk +that you need to write to will be full, the server you're talking to +will crash, or the network will go down. If any of these things happen, +it may stop a piece of code from doing what you want. But there's no +bug; there's no place in the code that you can fix to make the +nonexistent file exist or the disk not be full. However, if the rest of +the program is depending on the actions that were going to be taken, +then you'd better deal with the error somehow or you will have +introduced a bug. So, errors aren't caused by bugs, but neglecting to +handle an error is almost certainly a bug. + + So, what does it mean to handle an error? In a well-written program, +each function is a black box hiding its inner workings. Programs are +then built out of layers of functions: high-level functions are built on +top of the lower-level functions, and so on. This hierarchy of +functionality manifests itself at runtime in the form of the call stack: +if high calls medium, which calls low, when the flow of control is in +low, it's also still in medium and high, that is, they're still on the +call stack. + + Because each function is a black box, function boundaries are an +excellent place to deal with errors. Each function-low, for example-has +a job to do. Its direct caller-medium in this case-is counting on it to +do its job. However, an error that prevents it from doing its job puts +all its callers at risk: medium called low because it needs the work +done that low does; if that work doesn't get done, medium is in trouble. +But this means that medium's caller, high, is also in trouble-and so on +up the call stack to the very top of the program. On the other hand, +because each function is a black box, if any of the functions in the +call stack can somehow do their job despite underlying errors, then none +of the functions above it needs to know there was a problem-all those +functions care about is that the function they called somehow did the +work expected of it. + + In most languages, errors are handled by returning from a failing +function and giving the caller the choice of either recovering or +failing itself. Some languages use the normal function return +mechanism, while languages with exceptions return control by throwing or +raising an exception. Exceptions are a vast improvement over using +normal function returns, but both schemes suffer from a common flaw: +while searching for a function that can recover, the stack unwinds, +which means code that might recover has to do so without the context of +what the lower-level code was trying to do when the error actually +occurred. + + Consider the hypothetical call chain of high, medium, low. If low +fails and medium can't recover, the ball is in high's court. For high +to handle the error, it must either do its job without any help from +medium or somehow change things so calling medium will work and call it +again. The first option is theoretically clean but implies a lot of +extra code-a whole extra implementation of whatever it was medium was +supposed to do. And the further the stack unwinds, the more work that +needs to be redone. The second option-patching things up and +retrying-is tricky; for high to be able to change the state of the world +so a second call into medium won't end up causing an error in low, it'd +need an unseemly knowledge of the inner workings of both medium and low, +contrary to the notion that each function is a black box. + +* Menu: + +* 19-1:: The Lisp Way +* 19-2:: Conditions +* 19-3:: Condition Handlers +* 19-4:: Restarts +* 19-5:: Providing Multiple Restarts +* 19-6:: Other Uses for Conditions + + ---------- Footnotes ---------- + + (1) Throws or raises an exception in Java/Python terms + + (2) Catches the exception in Java/Python terms + + +File: pcl.info, Node: 19-1, Next: 19-2, Prev: Chapter 19, Up: Chapter 19 + +The Lisp Way +============ + +Common Lisp's error handling system gives you a way out of this +conundrum by letting you separate the code that actually recovers from +an error from the code that decides how to recover. Thus, you can put +recovery code in low-level functions without committing to actually +using any particular recovery strategy, leaving that decision to code in +high-level functions. + + To get a sense of how this works, let's suppose you're writing an +application that reads some sort of textual log file, such as a Web +server's log. Somewhere in your application you'll have a function to +parse the individual log entries. Let's assume you'll write a function, +parse-log-entry, that will be passed a string containing the text of a +single log entry and that is supposed to return a log-entry object +representing the entry. This function will be called from a function, +parse-log-file, that reads a complete log file and returns a list of +objects representing all the entries in the file. + + To keep things simple, the parse-log-entry function will not be +required to parse incorrectly formatted entries. It will, however, be +able to detect when its input is malformed. But what should it do when +it detects bad input? In C you'd return a special value to indicate +there was a problem. In Java or Python you'd throw or raise an +exception. In Common Lisp, you signal a condition. + + +File: pcl.info, Node: 19-2, Next: 19-3, Prev: 19-1, Up: Chapter 19 + +Conditions +========== + +A condition is an object whose class indicates the general nature of the +condition and whose instance data carries information about the details +of the particular circumstances that lead to the condition being +signaled. (1) In this hypothetical log analysis program, you might +define a condition class, malformed-log-entry-error, that +parse-log-entry will signal if it's given data it can't parse. + + Condition classes are defined with the DEFINE-CONDITION macro, which +works essentially the same as DEFCLASS except that the default +superclass of classes defined with DEFINE-CONDITION is CONDITION rather +than STANDARD-OBJECT. Slots are specified in the same way, and condition +classes can singly and multiply inherit from other classes that descend +from CONDITION. But for historical reasons, condition classes aren't +required to be instances of STANDARD-OBJECT, so some of the functions +you use with DEFCLASSed classes aren't required to work with conditions. +In particular, a condition's slots can't be accessed using SLOT-VALUE; +you must specify either a :reader option or an :accessor option for any +slot whose value you intend to use. Likewise, new condition objects are +created with MAKE-CONDITION rather than MAKE-INSTANCE. MAKE-CONDITION +initializes the slots of the new condition based on the :initargs it's +passed, but there's no way to further customize a condition's +initialization, equivalent to INITIALIZE-INSTANCE. (2) + + When using the condition system for error handling, you should define +your conditions as subclasses of ERROR, a subclass of CONDITION. Thus, +you might define malformed-log-entry-error, with a slot to hold the +argument that was passed to parse-log-entry, like this: + + (define-condition malformed-log-entry-error (error) + ((text :initarg :text :reader text))) + + ---------- Footnotes ---------- + + (1) In this respect, a condition is a lot like an exception in Java +or Python except not all conditions represent an error or exceptional +situation. + + (2) In some Common Lisp implementations, conditions are defined as +subclasses of STANDARD-OBJECT, in which case SLOT-VALUE, MAKE-INSTANCE, +and INITIALIZE-INSTANCE will work, but it's not portable to rely on it. + + +File: pcl.info, Node: 19-3, Next: 19-4, Prev: 19-2, Up: Chapter 19 + +Condition Handlers +================== + +In parse-log-entry you'll signal a malformed-log-entry-error if you +can't parse the log entry. You signal errors with the function ERROR, +which calls the lower-level function SIGNAL and drops into the debugger +if the condition isn't handled. You can call ERROR two ways: you can +pass it an already instantiated condition object, or you can pass it the +name of the condition class and any initargs needed to construct a new +condition, and it will instantiate the condition for you. The former is +occasionally useful for resignaling an existing condition object, but +the latter is more concise. Thus, you could write parse-log-entry like +this, eliding the details of actually parsing a log entry: + + (defun parse-log-entry (text) + (if (well-formed-log-entry-p text) + (make-instance 'log-entry ...) + (error 'malformed-log-entry-error :text text))) + + What happens when the error is signaled depends on the code above +parse-log-entry on the call stack. To avoid landing in the debugger, +you must establish a condition handler in one of the functions leading +to the call to parse-log-entry. When a condition is signaled, the +signaling machinery looks through a list of active condition handlers, +looking for a handler that can handle the condition being signaled based +on the condition's class. Each condition handler consists of a type +specifier indicating what types of conditions it can handle and a +function that takes a single argument, the condition. At any given +moment there can be many active condition handlers established at +various levels of the call stack. When a condition is signaled, the +signaling machinery finds the most recently established handler whose +type specifier is compatible with the condition being signaled and calls +its function, passing it the condition object. + + The handler function can then choose whether to handle the condition. +The function can decline to handle the condition by simply returning +normally, in which case control returns to the SIGNAL function, which +will search for the next most recently established handler with a +compatible type specifier. To handle the condition, the function must +transfer control out of SIGNAL via a nonlocal exit. In the next +section, you'll see how a handler can choose where to transfer control. +However, many condition handlers simply want to unwind the stack to the +place where they were established and then run some code. The macro +HANDLER-CASE establishes this kind of condition handler. The basic form +of a HANDLER-CASE is as follows: + + (handler-case expression + error-clause*) + + where each error-clause is of the following form: + + (condition-type ([var]) code) + + If the expression returns normally, then its value is returned by the +HANDLER-CASE. The body of a HANDLER-CASE must be a single expression; +you can use PROGN to combine several expressions into a single form. +If, however, the expression signals a condition that's an instance of +any of the condition-types specified in any error-clause, then the code +in the appropriate error clause is executed and its value returned by +the HANDLER-CASE. The var, if included, is the name of the variable that +will hold the condition object when the handler code is executed. If +the code doesn't need to access the condition object, you can omit the +variable name. + + For instance, one way to handle the malformed-log-entry-error +signaled by parse-log-entry in its caller, parse-log-file, would be to +skip the malformed entry. In the following function, the HANDLER-CASE +expression will either return the value returned by parse-log-entry or +return NIL if a malformed-log-entry-error is signaled. (The it in the +LOOP clause collect it is another LOOP keyword, which refers to the +value of the most recently evaluated conditional test, in this case the +value of entry.) + + (defun parse-log-file (file) + (with-open-file (in file :direction :input) + (loop for text = (read-line in nil nil) while text + for entry = (handler-case (parse-log-entry text) + (malformed-log-entry-error () nil)) + when entry collect it))) + + When parse-log-entry returns normally, its value will be assigned to +entry and collected by the LOOP. But if parse-log-entry signals a +malformed-log-entry-error, then the error clause will return NIL, which +won't be collected. + +* Menu: + +* 19-3-1:: JAVA-STYLE EXCEPTON HANDLING + + This version of parse-log-file has one serious deficiency: it's doing +too much. As its name suggests, the job of parse-log-file is to parse +the file and produce a list of log-entry objects; if it can't, it's not +its place to decide what to do instead. What if you want to use +parse-log-file in an application that wants to tell the user that the +log file is corrupted or one that wants to recover from malformed +entries by fixing them up and re-parsing them? Or maybe an application +is fine with skipping them but only until a certain number of corrupted +entries have been seen. + + You could try to fix this problem by moving the HANDLER-CASE to a +higher-level function. However, then you'd have no way to implement the +current policy of skipping individual entries-when the error was +signaled, the stack would be unwound all the way to the higher-level +function, abandoning the parsing of the log file altogether. What you +want is a way to provide the current recovery strategy without requiring +that it always be used. + + +File: pcl.info, Node: 19-3-1, Next: 19-3, Prev: 19-3, Up: 19-3 + +JAVA-STYLE EXCEPTON HANDLING +============================ + +HANDLER-CASE is the nearest analog in Common Lisp to Java- or +Python-style exception handling. Where you might write this in Java: + + try { + doStuff(); + doMoreStuff(); + } catch (SomeException se) { + recover(se); + } + + or this in Python: + + try: + doStuff() + doMoreStuff() + except SomeException, se: + recover(se) + + in Common Lisp you'd write this: + + (handler-case + (progn + (do-stuff) + (do-more-stuff)) + (some-exception (se) (recover se))) + + +File: pcl.info, Node: 19-4, Next: 19-5, Prev: 19-3, Up: Chapter 19 + +Restarts +======== + +The condition system lets you do this by splitting the error handling +code into two parts. You place code that actually recovers from errors +into restarts, and condition handlers can then handle a condition by +invoking an appropriate restart. You can place restart code in mid- or +low-level functions, such as parse-log-file or parse-log-entry, while +moving the condition handlers into the upper levels of the application. + + To change parse-log-file so it establishes a restart instead of a +condition handler, you can change the HANDLER-CASE to a RESTART-CASE. +The form of RESTART-CASE is quite similar to a HANDLER-CASE except the +names of restarts are just names, not necessarily the names of condition +types. In general, a restart name should describe the action the +restart takes. In parse-log-file, you can call the restart +skip-log-entry since that's what it does. The new version will look +like this: + + (defun parse-log-file (file) + (with-open-file (in file :direction :input) + (loop for text = (read-line in nil nil) while text + for entry = (restart-case (parse-log-entry text) + (skip-log-entry () nil)) + when entry collect it))) + + If you invoke this version of parse-log-file on a log file containing +corrupted entries, it won't handle the error directly; you'll end up in +the debugger. However, there among the various restarts presented by +the debugger will be one called skip-log-entry, which, if you choose it, +will cause parse-log-file to continue on its way as before. To avoid +ending up in the debugger, you can establish a condition handler that +invokes the skip-log-entry restart automatically. + + The advantage of establishing a restart rather than having +parse-log-file handle the error directly is it makes parse-log-file +usable in more situations. The higher-level code that invokes +parse-log-file doesn't have to invoke the skip-log-entry restart. It +can choose to handle the error at a higher level. Or, as I'll show in +the next section, you can add restarts to parse-log-entry to provide +other recovery strategies, and then condition handlers can choose which +strategy they want to use. + + But before I can talk about that, you need to see how to set up a +condition handler that will invoke the skip-log-entry restart. You can +set up the handler anywhere in the chain of calls leading to +parse-log-file. This may be quite high up in your application, not +necessarily in parse-log-file's direct caller. For instance, suppose +the main entry point to your application is a function, log-analyzer, +that finds a bunch of logs and analyzes them with the function +analyze-log, which eventually leads to a call to parse-log-file. +Without any error handling, it might look like this: + + (defun log-analyzer () + (dolist (log (find-all-logs)) + (analyze-log log))) + + The job of analyze-log is to call, directly or indirectly, +parse-log-file and then do something with the list of log entries +returned. An extremely simple version might look like this: + + (defun analyze-log (log) + (dolist (entry (parse-log-file log)) + (analyze-entry entry))) + + where the function analyze-entry is presumably responsible for +extracting whatever information you care about from each log entry and +stashing it away somewhere. + + Thus, the path from the top-level function, log-analyzer, to +parse-log-entry, which actually signals an error, is as follows: + +[image src="imgs/restart-call-stack.png"] + + Assuming you always want to skip malformed log entries, you could +change this function to establish a condition handler that invokes the +skip-log-entry restart for you. However, you can't use HANDLER-CASE to +establish the condition handler because then the stack would be unwound +to the function where the HANDLER-CASE appears. Instead, you need to +use the lower-level macro HANDLER-BIND. The basic form of HANDLER-BIND +is as follows: + + (handler-bind (binding*) form*) + + where each binding is a list of a condition type and a handler +function of one argument. After the handler bindings, the body of the +HANDLER-BIND can contain any number of forms. Unlike the handler code +in HANDLER-CASE, the handler code must be a function object, and it must +accept a single argument. A more important difference between +HANDLER-BIND and HANDLER-CASE is that the handler function bound by +HANDLER-BIND will be run without unwinding the stack-the flow of control +will still be in the call to parse-log-entry when this function is +called. The call to INVOKE-RESTART will find and invoke the most +recently bound restart with the given name. So you can add a handler to +log-analyzer that will invoke the skip-log-entry restart established in +parse-log-file like this: (1) + + (defun log-analyzer () + (handler-bind ((malformed-log-entry-error + #'(lambda (c) + (invoke-restart 'skip-log-entry)))) + (dolist (log (find-all-logs)) + (analyze-log log)))) + + In this HANDLER-BIND, the handler function is an anonymous function +that invokes the restart skip-log-entry. You could also define a named +function that does the same thing and bind it instead. In fact, a +common practice when defining a restart is to define a function, with +the same name and taking a single argument, the condition, that invokes +the eponymous restart. Such functions are called restart functions. +You could define a restart function for skip-log-entry like this: + + (defun skip-log-entry (c) + (invoke-restart 'skip-log-entry)) + + Then you could change the definition of log-analyzer to this: + + (defun log-analyzer () + (handler-bind ((malformed-log-entry-error #'skip-log-entry)) + (dolist (log (find-all-logs)) + (analyze-log log)))) + + As written, the skip-log-entry restart function assumes that a +skip-log-entry restart has been established. If a +malformed-log-entry-error is ever signaled by code called from +log-analyzer without a skip-log-entry having been established, the call +to INVOKE-RESTART will signal a CONTROL-ERROR when it fails to find the +skip-log-entry restart. If you want to allow for the possibility that a +malformed-log-entry-error might be signaled from code that doesn't have +a skip-log-entry restart established, you could change the +skip-log-entry function to this: + + (defun skip-log-entry (c) + (let ((restart (find-restart 'skip-log-entry))) + (when restart (invoke-restart restart)))) + + FIND-RESTART looks for a restart with a given name and returns an +object representing the restart if the restart is found and NIL if not. +You can invoke the restart by passing the restart object to +INVOKE-RESTART. Thus, when skip-log-entry is bound with HANDLER-BIND, it +will handle the condition by invoking the skip-log-entry restart if one +is available and otherwise will return normally, giving other condition +handlers, bound higher on the stack, a chance to handle the condition. + + ---------- Footnotes ---------- + + (1) The compiler may complain if the parameter is never used. You +can silence that warning by adding a declaration (declare (ignore c)) as +the first expression in the LAMBDA body. + + +File: pcl.info, Node: 19-5, Next: 19-6, Prev: 19-4, Up: Chapter 19 + +Providing Multiple Restarts +=========================== + +Since restarts must be explicitly invoked to have any effect, you can +define multiple restarts, each providing a different recovery strategy. +As I mentioned earlier, not all log-parsing applications will +necessarily want to skip malformed entries. Some applications might +want parse-log-file to include a special kind of object representing +malformed entries in the list of log-entry objects; other applications +may have some way to repair a malformed entry and may want a way to pass +the fixed entry back to parse-log-entry. + + To allow more complex recovery protocols, restarts can take arbitrary +arguments, which are passed in the call to INVOKE-RESTART. You can +provide support for both the recovery strategies I just mentioned by +adding two restarts to parse-log-entry, each of which takes a single +argument. One simply returns the value it's passed as the return value +of parse-log-entry, while the other tries to parse its argument in the +place of the original log entry. + + (defun parse-log-entry (text) + (if (well-formed-log-entry-p text) + (make-instance 'log-entry ...) + (restart-case (error 'malformed-log-entry-error :text text) + (use-value (value) value) + (reparse-entry (fixed-text) (parse-log-entry fixed-text))))) + + The name USE-VALUE is a standard name for this kind of restart. +Common Lisp defines a restart function for USE-VALUE similar to the +skip-log-entry function you just defined. So, if you wanted to change +the policy on malformed entries to one that created an instance of +malformed-log-entry, you could change log-analyzer to this (assuming the +existence of a malformed-log-entry class with a :text initarg): + + (defun log-analyzer () + (handler-bind ((malformed-log-entry-error + #'(lambda (c) + (use-value + (make-instance 'malformed-log-entry :text (text c)))))) + (dolist (log (find-all-logs)) + (analyze-log log)))) + + You could also have put these new restarts into parse-log-file +instead of parse-log-entry. However, you generally want to put restarts +in the lowest-level code possible. It wouldn't, though, be appropriate +to move the skip-log-entry restart into parse-log-entry since that would +cause parse-log-entry to sometimes return normally with NIL, the very +thing you started out trying to avoid. And it'd be an equally bad idea +to remove the skip-log-entry restart on the theory that the condition +handler could get the same effect by invoking the use-value restart with +NIL as the argument; that would require the condition handler to have +intimate knowledge of how the parse-log-file works. As it stands, the +skip-log-entry is a properly abstracted part of the log-parsing API. + + +File: pcl.info, Node: 19-6, Next: Chapter 20, Prev: 19-5, Up: Chapter 19 + +Other Uses for Conditions +========================= + +While conditions are mainly used for error handling, they can be used +for other purposes-you can use conditions, condition handlers, and +restarts to build a variety of protocols between low- and high-level +code. The key to understanding the potential of conditions is to +understand that merely signaling a condition has no effect on the flow +of control. + + The primitive signaling function SIGNAL implements the mechanism of +searching for an applicable condition handler and invoking its handler +function. The reason a handler can decline to handle a condition by +returning normally is because the call to the handler function is just a +regular function call-when the handler returns, control passes back to +SIGNAL, which then looks for another, less recently bound handler that +can handle the condition. If SIGNAL runs out of condition handlers +before the condition is handled, it also returns normally. + + The ERROR function you've been using calls SIGNAL. If the error is +handled by a condition handler that transfers control via HANDLER-CASE +or by invoking a restart, then the call to SIGNAL never returns. But if +SIGNAL returns, ERROR invokes the debugger by calling the function +stored in *DEBUGGER-HOOK*. Thus, a call to ERROR can never return +normally; the condition must be handled either by a condition handler or +in the debugger. + + Another condition signaling function, WARN, provides an example of a +different kind of protocol built on the condition system. Like ERROR, +WARN calls SIGNAL to signal a condition. But if SIGNAL returns, WARN +doesn't invoke the debugger-it prints the condition to *ERROR-OUTPUT* +and returns NIL, allowing its caller to proceed. WARN also establishes +a restart, MUFFLE-WARNING, around the call to SIGNAL that can be used by +a condition handler to make WARN return without printing anything. The +restart function MUFFLE-WARNING finds and invokes its eponymous restart, +signaling a CONTROL-ERROR if no such restart is available. Of course, a +condition signaled with WARN could also be handled in some other way-a +condition handler could "promote" a warning to an error by handling it +as if it were an error. + + For instance, in the log-parsing application, if there were ways a +log entry could be slightly malformed but still parsable, you could +write parse-log-entry to go ahead and parse the slightly defective +entries but to signal a condition with WARN when it did. Then the +larger application could choose to let the warning print, to muffle the +warning, or to treat the warning like an error, recovering the same way +it would from a malformed-log-entry-error. + + A third error-signaling function, CERROR, provides yet another +protocol. Like ERROR, CERROR will drop you into the debugger if the +condition it signals isn't handled. But like WARN, it establishes a +restart before it signals the condition. The restart, CONTINUE, causes +CERROR to return normally-if the restart is invoked by a condition +handler, it will keep you out of the debugger altogether. Otherwise, +you can use the restart once you're in the debugger to resume the +computation immediately after the call to CERROR. The function CONTINUE +finds and invokes the CONTINUE restart if it's available and returns NIL +otherwise. + + You can also build your own protocols on SIGNAL-whenever low-level +code needs to communicate information back up the call stack to +higher-level code, the condition mechanism is a reasonable mechanism to +use. But for most purposes, one of the standard error or warning +protocols should suffice. + + You'll use the condition system in future practical chapters, both +for regular error handling and, in Chapter 25, to help in handling a +tricky corner case of parsing ID3 files. Unfortunately, it's the fate +of error handling to always get short shrift in programming texts-proper +error handling, or lack thereof, is often the biggest difference between +illustrative code and hardened, production-quality code. The trick to +writing the latter has more to do with adopting a particularly rigorous +way of thinking about software than with the details of any particular +programming language constructs. That said, if your goal is to write +that kind of software, you'll find the Common Lisp condition system is +an excellent tool for writing robust code and one that fits quite nicely +into Common Lisp's incremental development style. + +* Menu: + +* 19-6-1:: Writing Robust Software + + In the next chapter I'll give a quick overview of some of the 25 +special operators you haven't had a chance to use yet, at least not +directly. + + +File: pcl.info, Node: 19-6-1, Next: 19-6, Prev: 19-6, Up: 19-6 + +Writing Robust Software +======================= + +For information on writing robust software, you could do worse than to +start by finding a copy of Software Reliability (John Wiley & Sons, +1976) by Glenford J. Meyers. Bertrand Meyer's writings on Design By +Contract also provide a useful way of thinking about software +correctness. For instance, see Chapters 11 and 12 of his +Object-Oriented Software Construction (Prentice Hall, 1997). Keep in +mind, however, that Bertrand Meyer is the inventor of Eiffel, a +statically typed bondage and discipline language in the Algol/Ada +school. While he has a lot of smart things to say about object +orientation and software reliability, there's a fairly wide gap between +his view of programming and The Lisp Way. Finally, for an excellent +overview of the larger issues surrounding building fault-tolerant +systems, see Chapter 3 of the classic Transaction Processing: Concepts +and Techniques (Morgan Kaufmann, 1993) by Jim Gray and Andreas Reuter. + + +File: pcl.info, Node: Chapter 20, Next: Chapter 21, Prev: Chapter 19, Up: Top + +20. The Special Operators +========================= + +In a way, the most impressive aspect of the condition system covered in +the previous chapter is that if it wasn't already part of the language, +it could be written entirely as a user-level library. This is possible +because Common Lisp's special operators-while none touches directly on +signaling or handling conditions-provide enough access to the underlying +machinery of the language to be able to do things such as control the +unwinding of the stack. + + In previous chapters I've discussed the most frequently used special +operators, but it's worth being familiar with the others for two +reasons. First, some of the infrequently used special operators are +used infrequently simply because whatever need they address doesn't +arise that often. It's good to be familiar with these special operators +so when one of them is called for, you'll at least know it exists. +Second, because the 25 special operators-along with the basic rule for +evaluating function calls and the built-in data types-provide the +foundation for the rest of the language, a passing familiarity with them +will help you understand how the language works. + + In this chapter, I'll discuss all the special operators, some briefly +and some at length, so you can see how they fit together. I'll point +out which ones you can expect to use directly in your own code, which +ones serve as the basis for other constructs that you use all the time, +and which ones you'll rarely use directly but which can be handy in +macro-generated code. + +* Menu: + +* 20-1:: Controlling Evaluation +* 20-2:: Manipulating the Lexical Environment +* 20-3:: Local Flow of Control +* 20-4:: Unwinding the Stack +* 20-5:: Multiple Values +* 20-6:: EVAL-WHEN +* 20-7:: Other Special Operators + + +File: pcl.info, Node: 20-1, Next: 20-2, Prev: Chapter 20, Up: Chapter 20 + +Controlling Evaluation +====================== + +The first category of special operators contains the three operators +that provide basic control over the evaluation of forms. They're QUOTE, +IF, and PROGN, and I've discussed them all already. However, it's worth +noting how each of these special operators provides one fundamental kind +of control over the evaluation of one or more forms. QUOTE prevents +evaluation altogether and allows you to get at s-expressions as data. +IF provides the fundamental boolean choice operation from which all +other conditional execution constructs can be built. (1) And PROGN +provides the ability to sequence a number of forms. + + ---------- Footnotes ---------- + + (1) Of course, if IF wasn't a special operator but some other +conditional form, such as COND, was, you could build IF as a macro. +Indeed, in many Lisp dialects, starting with McCarthy's original Lisp, +COND was the primitive conditional evaluation operator. + + +File: pcl.info, Node: 20-2, Next: 20-3, Prev: 20-1, Up: Chapter 20 + +Manipulating the Lexical Environment +==================================== + +The largest class of special operators contains the operators that +manipulate and access the lexical environment. LET and LET*, which I've +already discussed, are examples of special operators that manipulate the +lexical environment since they can introduce new lexical bindings for +variables. Any construct, such as a DO or DOTIMES, that binds lexical +variables will have to expand into a LET or LET*. (1) The SETQ special +operator is one that accesses the lexical environment since it can be +used to set variables whose bindings were created by LET and LET*. + + Variables, however, aren't the only thing that can be named within a +lexical scope. While most functions are defined globally with DEFUN, +it's also possible to create local functions with the special operators +FLET and LABELS, local macros with MACROLET, and a special kind of +macro, called a symbol macro, with SYMBOL-MACROLET. + + Much like LET allows you to introduce a lexical variable whose scope +is the body of the LET, FLET and LABELS let you define a function that +can be referred to only within the scope of the FLET or LABELS form. +These special operators are handy when you need a local function that's +a bit too complex to define inline as a LAMBDA expression or that you +need to use more than once. Both have the same basic form, which looks +like this: + + (flet (function-definition*) + body-form*) + + and like this: + + (labels (function-definition*) + body-form*) + + where each function-definition has the following form: + + (name (parameter*) form*) + + The difference between FLET and LABELS is that the names of the +functions defined with FLET can be used only in the body of the FLET, +while the names introduced by LABELS can be used immediately, including +in the bodies of the functions defined by the LABELS. Thus, LABELS can +define recursive functions, while FLET can't. It might seem limiting +that FLET can't be used to define recursive functions, but Common Lisp +provides both FLET and LABELS because sometimes it's useful to be able +to write local functions that can call another function of the same +name, either a globally defined function or a local function from an +enclosing scope. + + Within the body of a FLET or LABELS, you can use the names of the +functions defined just like any other function, including with the +FUNCTION special operator. Since you can use FUNCTION to get the +function object representing a function defined with FLET or LABELS, and +since a FLET or LABELS can be in the scope of other binding forms such +as LETs, these functions can be closures. + + Because the local functions can refer to variables from the enclosing +scope, they can often be written to take fewer parameters than the +equivalent helper functions. This is particularly handy when you need +to pass a function that takes a single argument as a functional +parameter. For example, in the following function, which you'll see +again in Chapter 25, the FLETed function, count-version, takes a single +argument, as required by walk-directory, but can also use the variable +versions, introduced by the enclosing LET: + + (defun count-versions (dir) + (let ((versions (mapcar #'(lambda (x) (cons x 0)) '(2 3 4)))) + (flet ((count-version (file) + (incf (cdr (assoc (major-version (read-id3 file)) versions))))) + (walk-directory dir #'count-version :test #'mp3-p)) + versions)) + + This function could also be written using an anonymous function in +the place of the FLETed count-version, but giving the function a +meaningful name makes it a bit easier to read. + + And when a helper function needs to recurse, an anonymous function +just won't do. (2) When you don't want to define a recursive helper +function as a global function, you can use LABELS. For example, the +following function, collect-leaves, uses the recursive helper function +walk to walk a tree and gather all the atoms in the tree into a list, +which collect-leaves then returns (after reversing it): + + (defun collect-leaves (tree) + (let ((leaves ())) + (labels ((walk (tree) + (cond + ((null tree)) + ((atom tree) (push tree leaves)) + (t (walk (car tree)) + (walk (cdr tree)))))) + (walk tree)) + (nreverse leaves))) + + Notice again how, within the walk function, you can refer to the +variable, leaves, introduced by the enclosing LET. + + FLET and LABELS are also useful operations to use in macro +expansions-a macro can expand into code that contains a FLET or LABELS +to create functions that can be used within the body of the macro. This +technique can be used either to introduce functions that the user of the +macro will call or simply as a way of organizing the code generated by +the macro. This, for instance, is how a function such as +CALL-NEXT-METHOD, which can be used only within a method definition, +might be defined. + + A near relative to FLET and LABELS is the special operator MACROLET, +which you can use to define local macros. Local macros work just like +global macros defined with DEFMACRO except without cluttering the global +namespace. When a MACROLET form is evaluated, the body forms are +evaluated with the local macro definitions in effect and possibly +shadowing global function and macro definitions or local definitions +from enclosing forms. Like FLET and LABELS, MACROLET can be used +directly, but it's also a handy target for macro-generated code-by +wrapping some user-supplied code in a MACROLET, a macro can provide +constructs that can be used only within that code or can shadow a +globally defined macro. You'll see an example of this latter use of +MACROLET in Chapter 31. + + Finally, one last macro-defining special operator is SYMBOL-MACROLET, +which defines a special kind of macro called, appropriately enough, a +symbol macro. Symbol macros are like regular macros except they can't +take arguments and are referred to with a plain symbol rather than a +list form. In other words, after you've defined a symbol macro with a +particular name, any use of that symbol in a value position will be +expanded and the resulting form evaluated in its place. This is how +macros such as WITH-SLOTS and WITH-ACCESSORS are able to define +"variables" that access the state of a particular object under the +covers. For instance, the following WITH-SLOTS form: + + (with-slots (x y z) foo (list x y z))) + + might expand into this code that uses SYMBOL-MACROLET: + + (let ((#:g149 foo)) + (symbol-macrolet + ((x (slot-value #:g149 'x)) + (y (slot-value #:g149 'y)) + (z (slot-value #:g149 'z))) + (list x y z))) + + When the expression (list x y z) is evaluated, the symbols x, y, and +z will be replaced with their expansions, such as (slot-value #:g149 +'x). (3) + + Symbol macros are most often local, defined with SYMBOL-MACROLET, but +Common Lisp also provides a macro DEFINE-SYMBOL-MACRO that defines a +global symbol macro. A symbol macro defined with SYMBOL-MACROLET +shadows other symbol macros of the same name defined with +DEFINE-SYMBOL-MACRO or enclosing SYMBOL-MACROLET forms. + + ---------- Footnotes ---------- + + (1) Well, technically those constructs could also expand into a +LAMBDA expression since, as I mentioned in Chapter 6, LET could be +defined-and was in some earlier Lisps-as a macro that expands into an +invocation of an anonymous function. + + (2) Surprising as it may seem, it actually is possible to make +anonymous functions recurse. However, you must use a rather esoteric +mechanism known as the Y combinator. But the Y combinator is an +interesting theoretical result, not a practical programming tool, so is +well outside the scope of this book. + + (3) It's not required that WITH-SLOTS be implemented with +SYMBOL-MACROLET-in some implementations, WITH-SLOTS may walk the code +provided and generate an expansion with x, y, and z already replaced +with the appropriate SLOT-VALUE forms. You can see how your +implementation does it by evaluating this form: + + (macroexpand-1 '(with-slots (x y z) obj (list x y z))) + + However, walking the body is much easier for the Lisp implementation +to do than for user code; to replace x, y, and z only when they appear +in value positions requires a code walker that understands the syntax of +all special operators and that recursively expands all macro forms in +order to determine whether their expansions include the symbols in value +positions. The Lisp implementation obviously has such a code walker at +its disposal, but it's one of the few parts of Lisp that's not exposed +to users of the language. + + +File: pcl.info, Node: 20-3, Next: 20-4, Prev: 20-2, Up: Chapter 20 + +Local Flow of Control +===================== + +The next four special operators I'll discuss also create and use names +in the lexical environment but for the purposes of altering the flow of +control rather than defining new functions and macros. I've mentioned +all four of these special operators in passing because they provide the +underlying mechanisms used by other language features. They're BLOCK, +RETURN-FROM, TAGBODY, and GO. The first two, BLOCK and RETURN-FROM, are +used together to write code that returns immediately from a section of +code-I discussed RETURN-FROM in Chapter 5 as a way to return immediately +from a function, but it's more general than that. The other two, +TAGBODY and GO, provide a quite low-level goto construct that's the +basis for all the higher-level looping constructs you've already seen. + + The basic skeleton of a BLOCK form is this: + + (block name + form*) + + The name is a symbol, and the forms are Lisp forms. The forms are +evaluated in order, and the value of the last form is returned as the +value of the BLOCK unless a RETURN-FROM is used to return from the block +early. A RETURN-FROM form, as you saw in Chapter 5, consists of the +name of the block to return from and, optionally, a form that provides a +value to return. When a RETURN-FROM is evaluated, it causes the named +BLOCK to return immediately. If RETURN-FROM is called with a return +value form, the BLOCK will return the resulting value; otherwise, the +BLOCK evaluates to NIL. + + A BLOCK name can be any symbol, which includes NIL. Many of the +standard control construct macros, such as DO, DOTIMES, and DOLIST, +generate an expansion consisting of a BLOCK named NIL. This allows you +to use the RETURN macro, which is a bit of syntactic sugar for +(return-from nil ...), to break out of such loops. Thus, the following +loop will print at most ten random numbers, stopping as soon as it gets +a number greater than 50: + + (dotimes (i 10) + (let ((answer (random 100))) + (print answer) + (if (> answer 50) (return)))) + + Function-defining macros such as DEFUN, FLET, and LABELS, on the +other hand, wrap their bodies in a BLOCK with the same name as the +function. That's why you can use RETURN-FROM to return from a function. + + TAGBODY and GO have a similar relationship to each other as BLOCK and +RETURN-FROM: a TAGBODY form defines a context in which names are defined +that can be used by GO. The skeleton of a TAGBODY is as follows: + + (tagbody + tag-or-compound-form*) + + where each tag-or-compound-form is either a symbol, called a tag, or +a nonempty list form. The list forms are evaluated in order and the +tags ignored, except as I'll discuss in a moment. After the last form +of the TAGBODY is evaluated, the TAGBODY returns NIL. Anywhere within +the lexical scope of the TAGBODY you can use the GO special operator to +jump immediately to any of the tags, and evaluation will resume with the +form following the tag. For instance, you can write a trivial infinite +loop with TAGBODY and GO like this: + + (tagbody + top + (print 'hello) + (go top)) + + Note that while the tag names must appear at the top level of the +TAGBODY, not nested within other forms, the GO special operator can +appear anywhere within the scope of the TAGBODY. This means you could +write a loop that loops a random number of times like this: + + (tagbody + top + (print 'hello) + (when (plusp (random 10)) (go top))) + + An even sillier example of TAGBODY, which shows you can have multiple +tags in a single TAGBODY, looks like this: + + (tagbody + a (print 'a) (if (zerop (random 2)) (go c)) + b (print 'b) (if (zerop (random 2)) (go a)) + c (print 'c) (if (zerop (random 2)) (go b))) + + This form will jump around randomly printing as, bs, and cs until +eventually the last RANDOM expression returns 1 and the control falls +off the end of the TAGBODY. + + TAGBODY is rarely used directly since it's almost always easier to +write iterative constructs in terms of the existing looping macros. +It's handy, however, for translating algorithms written in other +languages into Common Lisp, either automatically or manually. An +example of an automatic translation tool is the FORTRAN-to-Common Lisp +translator, f2cl, that translates FORTRAN source code into Common Lisp +in order to make various FORTRAN libraries available to Common Lisp +programmers. Since many FORTRAN libraries were written before the +structured programming revolution, they're full of gotos. The f2cl +compiler can simply translate those gotos to GOs within appropriate +TAGBODYs. (1) + + Similarly, TAGBODY and GO can be handy when translating algorithms +described in prose or by flowcharts-for instance, in Donald Knuth's +classic series The Art of Computer Programming, he describes algorithms +using a "recipe" format: step 1, do this; step 2, do that; step 3, go +back to step 2; and so on. For example, on page 142 of The Art of +Computer Programming, Volume 2: Seminumerical Algorithms, Third Edition +(Addison-Wesley, 1998), he describes Algorithm S, which you'll use in +Chapter 27, in this form: + + Algorithm S (Selection sampling technique). To select n records at random from a set of N, where 0 < n <= N. + + S1. [Initialize.] Set t <-- 0, m <-- 0. (During this algorithm, m represents the number of records selected so far, and t is the total number of input records that we have dealt with.) + + S2. [Generate U.] Generate a random number U, uniformly distributed between zero and one. + + S3. [Test.] If (N - t)U >= n - m, go to step S5. + + S4. [Select.] Select the next record for the sample, and increase m and t by 1. If m < n, go to step S2; otherwise the sample is complete and the algorithm terminates. + + S5. [Skip.] Skip the next record (do not include it in the sample), increase t by 1, and go back to step S2. + + This description can be easily translated into a Common Lisp +function, after renaming a few variables, as follows: + + (defun algorithm-s (n max) ; max is N in Knuth's algorithm + (let (seen ; t in Knuth's algorithm + selected ; m in Knuth's algorithm + u ; U in Knuth's algorithm + (records ())) ; the list where we save the records selected + (tagbody + s1 + (setf seen 0) + (setf selected 0) + s2 + (setf u (random 1.0)) + s3 + (when (>= (* (- max seen) u) (- n selected)) (go s5)) + s4 + (push seen records) + (incf selected) + (incf seen) + (if (< selected n) + (go s2) + (return-from algorithm-s (nreverse records))) + s5 + (incf seen) + (go s2)))) + + It's not the prettiest code, but it's easy to verify that it's a +faithful translation of Knuth's algorithm. But, this code, unlike +Knuth's prose description, can be run and tested. Then you can start +refactoring, checking after each change that the function still works. +(2) + + After pushing the pieces around a bit, you might end up with +something like this: + + (defun algorithm-s (n max) + (loop for seen from 0 + when (< (* (- max seen) (random 1.0)) n) + collect seen and do (decf n) + until (zerop n))) + + While it may not be immediately obvious that this code correctly +implements Algorithm S, if you got here via a series of functions that +all behave identically to the original literal translation of Knuth's +recipe, you'd have good reason to believe it's correct. + + ---------- Footnotes ---------- + + (1) One version of f2cl is available as part of the Common Lisp Open +Code Collection (CLOCC): http://clocc.sourceforge.net/. By contrast, +consider the tricks the authors of f2j, a FORTRAN-to-Java translator, +have to play. Although the Java Virtual Machine (JVM) has a goto +instruction, it's not directly exposed in Java. So to compile FORTRAN +gotos, they first compile the FORTRAN code into legal Java source with +calls to a dummy class to represent the labels and gotos. Then they +compile the source with a regular Java compiler and postprocess the byte +codes to translate the dummy calls into JVM-level byte codes. Clever, +but what a pain. + + (2) Since this algorithm depends on values returned by RANDOM, you +may want to test it with a consistent random seed, which you can get by +binding *RANDOM-STATE* to the value of (make-random-state nil) around +each call to algorithm-s. For instance, you can do a basic sanity check +of algorithm-s by evaluating this: + + (let ((*random-state* (make-random-state nil))) (algorithm-s 10 200)) + + If your refactorings are all valid, this expression should evaluate +to the same list each time. + + +File: pcl.info, Node: 20-4, Next: 20-5, Prev: 20-3, Up: Chapter 20 + +Unwinding the Stack +=================== + +Another aspect of the language that special operators give you control +over is the behavior of the call stack. For instance, while you +normally use BLOCK and TAGBODY to manage the flow of control within a +single function, you can also use them, in conjunction with closures, to +force an immediate nonlocal return from a function further down on the +stack. That's because BLOCK names and TAGBODY tags can be closed over +by any code within the lexical scope of the BLOCK or TAGBODY. For +example, consider this function: + + (defun foo () + (format t "Entering foo~%") + (block a + (format t " Entering BLOCK~%") + (bar #'(lambda () (return-from a))) + (format t " Leaving BLOCK~%")) + (format t "Leaving foo~%")) + + The anonymous function passed to bar uses RETURN-FROM to return from +the BLOCK. But that RETURN-FROM doesn't get evaluated until the +anonymous function is invoked with FUNCALL or APPLY. Now suppose bar +looks like this: + + (defun bar (fn) + (format t " Entering bar~%") + (baz fn) + (format t " Leaving bar~%")) + + Still, the anonymous function isn't invoked. Now look at baz. + + (defun baz (fn) + (format t " Entering baz~%") + (funcall fn) + (format t " Leaving baz~%")) + + Finally the function is invoked. But what does it mean to +RETURN-FROM a block that's several layers up on the call stack? Turns +out it works fine-the stack is unwound back to the frame where the BLOCK +was established and control returns from the BLOCK. The FORMAT +expressions in foo, bar, and baz show this: + + CL-USER> (foo) + Entering foo + Entering BLOCK + Entering bar + Entering baz + Leaving foo + NIL + + Note that the only "Leaving . . ." message that prints is the one +that appears after the BLOCK in foo. + + Because the names of blocks are lexically scoped, a RETURN-FROM +always returns from the smallest enclosing BLOCK in the lexical +environment where the RETURN-FROM form appears even if the RETURN-FROM +is executed in a different dynamic context. For instance, bar could +also contain a BLOCK named a, like this: + + (defun bar (fn) + (format t " Entering bar~%") + (block a (baz fn)) + (format t " Leaving bar~%")) + + This extra BLOCK won't change the behavior of foo at all-the name a +is resolved lexically, at compile time, not dynamically, so the +intervening block has no effect on the RETURN-FROM. Conversely, the name +of a BLOCK can be used only by RETURN-FROMs appearing within the lexical +scope of the BLOCK; there's no way for code outside the block to return +from the block except by invoking a closure that closes over a +RETURN-FROM from the lexical scope of the BLOCK. + + TAGBODY and GO work the same way, in this regard, as BLOCK and +RETURN-FROM. When you invoke a closure that contains a GO form, if the +GO is evaluated, the stack will unwind back to the appropriate TAGBODY +and then jump to the specified tag. + + BLOCK names and TAGBODY tags, however, differ from lexical variable +bindings in one important way. As I discussed in Chapter 6, lexical +bindings have indefinite extent, meaning the bindings can stick around +even after the binding form has returned. BLOCKs and TAGBODYs, on the +other hand, have dynamic extent-you can RETURN-FROM a BLOCK or GO to a +TAGBODY tag only while the BLOCK or TAGBODY is on the call stack. In +other words, a closure that captures a block name or TAGBODY tag can be +passed down the stack to be invoked later, but it can't be returned up +the stack. If you invoke a closure that tries to RETURN-FROM a BLOCK, +after the BLOCK itself has returned, you'll get an error. Likewise, +trying to GO to a TAGBODY that no longer exists will cause an error. +(1) + + It's unlikely you'll need to use BLOCK and TAGBODY yourself for this +kind of stack unwinding. But you'll likely be using them indirectly +whenever you use the condition system, so understanding how they work +should help you understand better what exactly, for instance, invoking a +restart is doing. (2) + + CATCH and THROW are another pair of special operators that can force +the stack to unwind. You'll use these operators even less often than +the others mentioned so far-they're holdovers from earlier Lisp dialects +that didn't have Common Lisp's condition system. They definitely +shouldn't be confused with try/catch and try/except constructs from +languages such as Java and Python. + + CATCH and THROW are the dynamic counterparts of BLOCK and +RETURN-FROM. That is, you wrap CATCH around a body of code and then use +THROW to cause the CATCH form to return immediately with a specified +value. The difference is that the association between a CATCH and THROW +is established dynamically-instead of a lexically scoped name, the label +for a CATCH is an object, called a catch tag, and any THROW evaluated +within the dynamic extent of the CATCH that throws that object will +unwind the stack back to the CATCH form and cause it to return +immediately. Thus, you can write a version of the foo, bar, and baz +functions from before using CATCH and THROW instead of BLOCK and +RETURN-FROM like this: + + (defparameter *obj* (cons nil nil)) ; i.e. some arbitrary object + + (defun foo () + (format t "Entering foo~%") + (catch *obj* + (format t " Entering CATCH~%") + (bar) + (format t " Leaving CATCH~%")) + (format t "Leaving foo~%")) + + (defun bar () + (format t " Entering bar~%") + (baz) + (format t " Leaving bar~%")) + + (defun baz () + (format t " Entering baz~%") + (throw *obj* nil) + (format t " Leaving baz~%")) + + Notice how it isn't necessary to pass a closure down the stack-baz +can call THROW directly. The result is quite similar to the earlier +version. + + CL-USER> (foo) + Entering foo + Entering CATCH + Entering bar + Entering baz + Leaving foo + NIL + + However, CATCH and THROW are almost too dynamic. In both the CATCH +and the THROW, the tag form is evaluated, which means their values are +both determined at runtime. Thus, if some code in bar reassigned or +rebound *obj*, the THROW in baz wouldn't throw to the same CATCH. This +makes CATCH and THROW much harder to reason about than BLOCK and +RETURN-FROM. The only advantage, which the version of foo, bar, and baz +that use CATCH and THROW demonstrates, is there's no need to pass down a +closure in order for low-level code to return from a CATCH-any code that +runs within the dynamic extent of a CATCH can cause it to return by +throwing the right object. + + In older Lisp dialects that didn't have anything like Common Lisp's +condition system, CATCH and THROW were used for error handling. +However, to keep them manageable, the catch tags were usually just +quoted symbols, so you could tell by looking at a CATCH and a THROW +whether they would hook up at runtime. In Common Lisp you'll rarely +have any call to use CATCH and THROW since the condition system is so +much more flexible. + + The last special operator related to controlling the stack is another +one I've mentioned in passing before-UNWIND-PROTECT. UNWIND-PROTECT lets +you control what happens as the stack unwinds-to make sure that certain +code always runs regardless of how control leaves the scope of the +UNWIND-PROTECT, whether by a normal return, by a restart being invoked, +or by any of the ways discussed in this section. (3) The basic skeleton +of UNWIND-PROTECT looks like this: + + (unwind-protect protected-form + cleanup-form*) + + The single protected-form is evaluated, and then, regardless of how +it returns, the cleanup-forms are evaluated. If the protected-form +returns normally, then whatever it returns is returned from the +UNWIND-PROTECT after the cleanup forms run. The cleanup forms are +evaluated in the same dynamic environment as the UNWIND-PROTECT, so the +same dynamic variable bindings, restarts, and condition handlers will be +visible to code in cleanup forms as were visible just before the +UNWIND-PROTECT. + + You'll occasionally use UNWIND-PROTECT directly. More often you'll +use it as the basis for WITH- style macros, similar to WITH-OPEN-FILE, +that evaluate any number of body forms in a context where they have +access to some resource that needs to be cleaned up after they're done, +regardless of whether they return normally or bail via a restart or +other nonlocal exit. For example, if you were writing a database +library that defined functions open-connection and close-connection, you +might write a macro like this: (4) + + (defmacro with-database-connection ((var &rest open-args) &body body) + `(let ((,var (open-connection ,@open-args))) + (unwind-protect (progn ,@body) + (close-connection ,var)))) + + which lets you write code like this: + + (with-database-connection (conn :host "foo" :user "scott" :password "tiger") + (do-stuff conn) + (do-more-stuff conn)) + + and not have to worry about closing the database connection, since +the UNWIND-PROTECT will make sure it gets closed no matter what happens +in the body of the with-database-connection form. + + ---------- Footnotes ---------- + + (1) This is a pretty reasonable restriction-it's not entirely clear +what it'd mean to return from a form that has already returned-unless, +of course, you're a Scheme programmer. Scheme supports continuations, a +language construct that makes it possible to return from the same +function call more than once. But for a variety of reasons, few, if +any, languages other than Scheme support this kind of continuation. + + (2) If you're the kind of person who likes to know how things work +all the way down to the bits, it may be instructive to think about how +you might implement the condition system's macros using BLOCK, TAGBODY, +closures, and dynamic variables. + + (3) UNWIND-PROTECT is essentially equivalent to try/finally +constructs in Java and Python. + + (4) And indeed, CLSQL, the multi-Lisp, multidatabase SQL interface +library, provides a similar macro called with-database. CLSQL's home +page is at http://clsql.b9.com. + + +File: pcl.info, Node: 20-5, Next: 20-6, Prev: 20-4, Up: Chapter 20 + +Multiple Values +=============== + +Another feature of Common Lisp that I've mentioned in passing-in Chapter +11, when I discussed GETHASH-is the ability for a single form to return +multiple values. I'll discuss it in greater detail now. It is, +however, slightly misplaced in a chapter on special operators since the +ability to return multiple values isn't provided by just one or two +special operators but is deeply integrated into the language. The +operators you'll most often use when dealing with multiple values are +macros and functions, not special operators. But it is the case that +the basic ability to get at multiple return values is provided by a +special operator, MULTIPLE-VALUE-CALL, upon which the more commonly used +MULTIPLE-VALUE-BIND macro is built. + + The key thing to understand about multiple values is that returning +multiple values is quite different from returning a list-if a form +returns multiple values, unless you do something specific to capture the +multiple values, all but the primary value will be silently discarded. +To see the distinction, consider the function GETHASH, which returns two +values: the value found in the hash table and a boolean that's NIL when +no value was found. If it returned those two values in a list, every +time you called GETHASH you'd have to take apart the list to get at the +actual value, regardless of whether you cared about the second return +value. Suppose you have a hash table, *h*, that contains numeric +values. If GETHASH returned a list, you couldn't write something like +this: + + (+ (gethash 'a *h*) (gethash 'b *h*)) + + because + expects its arguments to be numbers, not lists. But +because the multiple value mechanism silently discards the secondary +return value when it's not wanted, this form works fine. + + There are two aspects to using multiple values-returning multiple +values and getting at the nonprimary values returned by forms that +return multiple values. The starting points for returning multiple +values are the functions VALUES and VALUES-LIST. These are regular +functions, not special operators, so their arguments are passed in the +normal way. VALUES takes a variable number of arguments and returns +them as multiple values; VALUES-LIST takes a single list and returns its +elements as multiple values. In other words: + + (values-list x) === (apply #'values x) + + The mechanism by which multiple values are returned is implementation +dependent just like the mechanism for passing arguments into functions +is. Almost all language constructs that return the value of some +subform will "pass through" multiple values, returning all the values +returned by the subform. Thus, a function that returns the result of +calling VALUES or VALUES-LIST will itself return multiple values-and so +will another function whose result comes from calling the first +function. And so on. (1) + + But when a form is evaluated in a value position, only the primary +value will be used, which is why the previous addition form works the +way you'd expect. The special operator MULTIPLE-VALUE-CALL provides the +mechanism for getting your hands on the multiple values returned by a +form. MULTIPLE-VALUE-CALL is similar to FUNCALL except that while +FUNCALL is a regular function and, therefore, can see and pass on only +the primary values passed to it, MULTIPLE-VALUE-CALL passes, to the +function returned by its first subform, all the values returned by the +remaining subforms. + + (funcall #'+ (values 1 2) (values 3 4)) ==> 4 + (multiple-value-call #'+ (values 1 2) (values 3 4)) ==> 10 + + However, it's fairly rare that you'll simply want to pass all the +values returned by a function onto another function. More likely, +you'll want to stash the multiple values in different variables and then +do something with them. The MULTIPLE-VALUE-BIND macro, which you saw in +Chapter 11, is the most frequently used operator for accepting multiple +return values. Its skeleton looks like this: + + (multiple-value-bind (variable*) values-form + body-form*) + + The values-form is evaluated, and the multiple values it returns are +bound to the variables. Then the body-forms are evaluated with those +bindings in effect. Thus: + + (multiple-value-bind (x y) (values 1 2) + (+ x y)) ==> 3 + + Another macro, MULTIPLE-VALUE-LIST, is even simpler-it takes a single +form, evaluates it, and collects the resulting multiple values into a +list. In other words, it's the inverse of VALUES-LIST. + + CL-USER> (multiple-value-list (values 1 2)) + (1 2) + CL-USER> (values-list (multiple-value-list (values 1 2))) + 1 + 2 + + However, if you find yourself using MULTIPLE-VALUE-LIST a lot, it may +be a sign that some function should be returning a list to start with +rather than multiple values. + + Finally, if you want to assign multiple values returned by a form to +existing variables, you can use VALUES as a SETFable place. For +example: + + CL-USER> (defparameter *x* nil) + *X* + CL-USER> (defparameter *y* nil) + *Y* + CL-USER> (setf (values *x* *y*) (floor (/ 57 34))) + 1 + 23/34 + CL-USER> *x* + 1 + CL-USER> *y* + 23/34 + + ---------- Footnotes ---------- + + (1) A small handful of macros don't pass through extra return values +of the forms they evaluate. In particular, the PROG1 macro, which +evaluates a number of forms like a PROGN before returning the value of +the first form, returns that form's primary value only. Likewise, +PROG2, which returns the value of the second of its subforms, returns +only the primary value. The special operator MULTIPLE-VALUE-PROG1 is a +variant of PROG1 that returns all the values returned by the first form. +It's a minor wart that PROG1 doesn't already behave like +MULTIPLE-VALUE-PROG1, but neither is used often enough that it matters +much. The OR and COND macros are also not always transparent to +multiple values, returning only the primary value of certain subforms. + + +File: pcl.info, Node: 20-6, Next: 20-7, Prev: 20-5, Up: Chapter 20 + +EVAL-WHEN +========= + +A special operator you'll need to understand in order to write certain +kinds of macros is EVAL-WHEN. For some reason, Lisp books often treat +EVAL-WHEN as a wizards-only topic. But the only prerequisite to +understanding EVAL-WHEN is an understanding of how the two functions +LOAD and COMPILE-FILE interact. And understanding EVAL-WHEN will be +important as you start writing certain kinds of more sophisticated +macros, such as the ones you'll write in Chapters 24 and 31. + + I've touched briefly on the relation between LOAD and COMPILE-FILE in +previous chapters, but it's worth reviewing again here. The job of LOAD +is to load a file and evaluate all the top-level forms it contains. The +job of COMPILE-FILE is to compile a source file into a FASL file, which +can then be loaded with LOAD such that (load "foo.lisp") and (load +"foo.fasl") are essentially equivalent. + + Because LOAD evaluates each form before reading the next, the side +effects of evaluating forms earlier in the file can affect how forms +later in the form are read and evaluated. For instance, evaluating an +IN-PACKAGE form changes the value of *PACKAGE*, which will affect the +way subsequent forms are read. (1) Similarly, a DEFMACRO form early in +a file can define a macro that can then be used by code later in the +file. (2) + + COMPILE-FILE, on the other hand, normally doesn't evaluate the forms +it's compiling; it's when the FASL is loaded that the forms-or their +compiled equivalents-will be evaluated. However, COMPILE-FILE must +evaluate some forms, such as IN-PACKAGE and DEFMACRO forms, in order to +keep the behavior of (load "foo.lisp") and (load "foo.fasl") consistent. + + So how do macros such as IN-PACKAGE and DEFMACRO work when processed +by COMPILE-FILE? In some pre-Common Lisp versions of Lisp, the file +compiler simply knew it should evaluate certain macros in addition to +compiling them. Common Lisp avoided the need for such kludges by +borrowing the EVAL-WHEN special operator from Maclisp. This operator, +as its name suggests, allows you to control when specific bits of code +are evaluated. The skeleton of an EVAL-WHEN form looks like this: + + (eval-when (situation*) + body-form*) + + There are three possible situations-:compile-toplevel, +:load-toplevel, and :execute-and which ones you specify controls when +the body-forms will be evaluated. An EVAL-WHEN with multiple situations +is equivalent to several EVAL-WHEN forms, one per situation, each with +the same body code. To explain the meaning of the three situations, +I'll need to explain a bit about how COMPILE-FILE, which is also +referred to as the file compiler, goes about compiling a file. + + To explain how COMPILE-FILE compiles EVAL-WHEN forms, I need to +introduce a distinction between compiling top-level forms and compiling +non-top-level forms. A top-level form is, roughly speaking, one that +will be compiled into code that will be run when the FASL is loaded. +Thus, all forms that appear directly at the top level of a source file +are compiled as top-level forms. Similarly, any forms appearing +directly in a top-level PROGN are compiled as top-level forms since the +PROGN itself doesn't do anything-it just groups together its subforms, +which will be run when the FASL is loaded. (3) Similarly, forms +appearing directly in a MACROLET or SYMBOL-MACROLET are compiled as +top-level forms because after the compiler has expanded the local macros +or symbol macros, there will be no remnant of the MACROLET or +SYMBOL-MACROLET in the compiled code. Finally, the expansion of a +top-level macro form will be compiled as a top-level form. + + Thus, a DEFUN appearing at the top level of a source file is a +top-level form-the code that defines the function and associates it with +its name will run when the FASL is loaded-but the forms within the body +of the function, which won't run until the function is called, aren't +top-level forms. Most forms are compiled the same when compiled as +top-level and non-top-level forms, but the semantics of an EVAL-WHEN +depend on whether it's being compiled as a top- level form, compiled as +a non-top-level form, or simply evaluated, combined with what situations +are listed in its situation list. + + The situations :compile-toplevel and :load-toplevel control the +meaning of an EVAL-WHEN compiled as a top-level form. When +:compile-toplevel is present, the file compiler will evaluate the +subforms at compile time. When :load-toplevel is present, it will +compile the subforms as top-level forms. If neither of these situations +is present in a top-level EVAL-WHEN, the compiler ignores it. + + When an EVAL-WHEN is compiled as a non-top-level form, it's either +compiled like a PROGN, if the :execute situation is specified, or +ignored. Similarly, an evaluated EVAL-WHEN-which includes top-level +EVAL-WHENs in a source file processed by LOAD and EVAL-WHENs evaluated +at compile time because they appear as subforms of a top-level EVAL-WHEN +with the :compile-toplevel situation-is treated like a PROGN if :execute +is present and ignored otherwise. + + Thus, a macro such as IN-PACKAGE can have the necessary effect at +both compile time and when loading from source by expanding into an +EVAL-WHEN like the following: + + (eval-when (:compile-toplevel :load-toplevel :execute) + (setf *package* (find-package "PACKAGE-NAME"))) + + *PACKAGE* will be set at compile time because of the +:compile-toplevel situation, set when the FASL is loaded because of +:load-toplevel, and set when the source is loaded because of the +:execute. + + There are two ways you're most likely to use EVAL-WHEN. One is if you +want to write macros that need to save some information at compile time +to be used when generating the expansion of other macro forms in the +same file. This typically arises with definitional macros where a +definition early in a file can affect the code generated for a +definition later in the same file. You'll write this kind of macro in +Chapter 24. + + The other time you might need EVAL-WHEN is if you want to put the +definition of a macro and helper functions it uses in the same file as +code that uses the macro. DEFMACRO already includes an EVAL-WHEN in its +expansion so the macro definition is immediately available to be used +later in the file. But DEFUN normally doesn't make function definitions +available at compile time. But if you use a macro in the same file as +it's defined in, you need the macro and any functions it uses to be +defined. If you wrap the DEFUNs of any helper functions used by the +macro in an EVAL-WHEN with :compile-toplevel, the definitions will be +available when the macro's expansion function runs. You'll probably +want to include :load-toplevel and :execute as well since the macros +will also need the function definitions after the file is compiled and +loaded or if you load the source instead of compiling. + + ---------- Footnotes ---------- + + (1) The reason loading a file with an IN-PACKAGE form in it has no +effect on the value of *PACKAGE* after LOAD returns is because LOAD +binds *PACKAGE* to its current value before doing anything else. In +other words, something equivalent to the following LET is wrapped around +the rest of the code in LOAD: + + (let ((*package* *package*)) ...) + + Any assignment to *PACKAGE* will be to the new binding, and the old +binding will be restored when LOAD returns. It also binds the variable +*READTABLE*, which I haven't discussed, in the same way. + + (2) In some implementations, you may be able to get away with +evaluating DEFUNs that use undefined macros in the function body as long +as the macros are defined before the function is actually called. But +that works, if at all, only when LOADing the definitions from source, +not when compiling with COMPILE-FILE, so in general macro definitions +must be evaluated before they're used. + + (3) By contrast, the subforms in a top-level LET aren't compiled as +top-level forms because they're not run directly when the FASL is +loaded. They will run, but it's in the runtime context of the bindings +established by the LET. Theoretically, a LET that binds no variables +could be treated like a PROGN, but it's not-the forms appearing in a LET +are never treated as top-level forms. + + +File: pcl.info, Node: 20-7, Next: Chapter 21, Prev: 20-6, Up: Chapter 20 + +Other Special Operators +======================= + +The four remaining special operators, LOCALLY, THE, LOAD-TIME-VALUE, and +PROGV, all allow you to get at parts of the underlying language that +can't be accessed any other way. LOCALLY and THE are part of Common +Lisp's declaration system, which is used to communicate things to the +compiler that don't affect the meaning of your code but that may help +the compiler generate better code-faster, clearer error messages, and so +on. (1) I'll discuss declarations briefly in Chapter 32. + + The other two, LOAD-TIME-VALUE and PROGV, are infrequently used, and +explaining the reason why you might ever want to use them would take +longer than explaining what they do. So I'll just tell you what they do +so you know they're there. Someday you'll hit on one of those rare +times when they're just the thing, and then you'll be ready. + + LOAD-TIME-VALUE is used, as its name suggests, to create a value +that's determined at load time. When the file compiler compiles code +that contains a LOAD-TIME-VALUE form, it arranges to evaluate the first +subform once, when the FASL is loaded, and for the code containing the +LOAD-TIME-VALUE form to refer to that value. In other words, instead of +writing this: + + (defvar *loaded-at* (get-universal-time)) + + (defun when-loaded () *loaded-at*) + + you can write the following: + + (defun when-loaded () (load-time-value (get-universal-time))) + + In code not processed by COMPILE-FILE, LOAD-TIME-VALUE is evaluated +once when the code is compiled, which may be when you explicitly compile +a function with COMPILE or earlier because of implicit compilation +performed by the implementation in the course of evaluating the code. +In uncompiled code, LOAD-TIME-VALUE evaluates its form each time it's +evaluated. + + Finally, PROGV creates new dynamic bindings for variables whose names +are determined at runtime. This is mostly useful for implementing +embedded interpreters for languages with dynamically scoped variables. +The basic skeleton is as follows: + + (progv symbols-list values-list + body-form*) + + where symbols-list is a form that evaluates to a list of symbols and +values-list is a form that evaluates to a list of values. Each symbol +is dynamically bound to the corresponding value, and then the body-forms +are evaluated. The difference between PROGV and LET is that because +symbols-list is evaluated at runtime, the names of the variables to bind +can be determined dynamically. As I say, this isn't something you need +to do often. + + And that's it for special operators. In the next chapter, I'll get +back to hard-nosed practical topics and show you how to use Common +Lisp's package system to take control of your namespaces so you can +write libraries and applications that can coexist without stomping on +each other's names. + + ---------- Footnotes ---------- + + (1) The one declaration that has an effect on the semantics of a +program is the SPECIAL declaration mentioned in Chapter 6. + + +File: pcl.info, Node: Chapter 21, Next: Chapter 22, Prev: Chapter 20, Up: Top + +21. Programming in the Large: Packages and Symbols +================================================== + +In Chapter 4 I discussed how the Lisp reader translates textual names +into objects to be passed to the evaluator, representing them with a +kind of object called a symbol. It turns out that having a built-in +data type specifically for representing names is quite handy for a lot +of kinds of programming. (1) That, however, isn't the topic of this +chapter. In this chapter I'll discuss one of the more immediate and +practical aspects of dealing with names: how to avoid name conflicts +between independently developed pieces of code. + + Suppose, for instance, you're writing a program and decide to use a +third-party library. You don't want to have to know the name of every +function, variable, class, or macro used in the internals of that +library in order to avoid conflicts between those names and the names +you use in your program. You'd like for most of the names in the +library and the names in your program to be considered distinct even if +they happen to have the same textual representation. At the same time, +you'd like certain names defined in the library to be readily +accessible-the names that make up its public API, which you'll want to +use in your program. + + In Common Lisp, this namespace problem boils down to a question of +controlling how the reader translates textual names into symbols: if you +want two occurrences of the same name to be considered the same by the +evaluator, you need to make sure the reader uses the same symbol to +represent each name. Conversely, if you want two names to be considered +distinct, even if they happen to have the same textual name, you need +the reader to create different symbols to represent each name. + +* Menu: + +* 21-1:: How the Reader Uses Packages +* 21-2:: A Bit of Package and Symbol Vocabulary +* 21-3:: Three Standard Packages +* 21-4:: Defining Your Own Packages +* 21-5:: Packaging Reusable Libraries +* 21-6:: Importing Individual Names +* 21-7:: Packaging Mechanics +* 21-8:: Package Gotchas + + ---------- Footnotes ---------- + + (1) The kind of programming that relies on a symbol data type is +called, appropriately enough, symbolic computation. It's typically +contrasted to numeric programming. An example of a primarily symbolic +program that all programmers should be familiar with is a compiler-it +treats the text of a program as symbolic data and translates it into a +new form. + + +File: pcl.info, Node: 21-1, Next: 21-2, Prev: Chapter 21, Up: Chapter 21 + +How the Reader Uses Packages +============================ + +In Chapter 4 I discussed briefly how the Lisp reader translates names +into symbols, but I glossed over most of the details-now it's time to +take a closer look at what actually happens. + + I'll start by describing the syntax of names understood by the reader +and how that syntax relates to packages. For the moment you can think +of a package as a table that maps strings to symbols. As you'll see in +the next section, the actual mapping is slightly more flexible than a +simple lookup table but not in ways that matter much to the reader. +Each package also has a name, which can be used to find the package +using the function FIND-PACKAGE. + + The two key functions that the reader uses to access the +name-to-symbol mappings in a package are FIND-SYMBOL and INTERN. Both +these functions take a string and, optionally, a package. If not +supplied, the package argument defaults to the value of the global +variable *PACKAGE*, also called the current package. + + FIND-SYMBOL looks in the package for a symbol with the given string +for a name and returns it, or NIL if no symbol is found. INTERN also +will return an existing symbol; otherwise it creates a new symbol with +the string as its name and adds it to the package. + + Most names you use are unqualified, names that contain no colons. +When the reader reads such a name, it translates it to a symbol by +converting any unescaped letters to uppercase and passing the resulting +string to INTERN. Thus, each time the reader reads the same name in the +same package, it'll get the same symbol object. This is important +because the evaluator uses the object identity of symbols to determine +which function, variable, or other program element a given symbol refers +to. Thus, the reason an expression such as (hello-world) results in +calling a particular hello-world function is because the reader returns +the same symbol when it reads the function call as it did when it read +the DEFUN form that defined the function. + + A name containing either a single colon or a double colon is a +package-qualified name. When the reader reads a package-qualified name, +it splits the name on the colon(s) and uses the first part as the name +of a package and the second part as the name of the symbol. The reader +looks up the appropriate package and uses it to translate the symbol +name to a symbol object. + + A name containing only a single colon must refer to an external +symbol-one the package exports for public use. If the named package +doesn't contain a symbol with a given name, or if it does but it hasn't +been exported, the reader signals an error. A double-colon name can +refer to any symbol from the named package, though it's usually a bad +idea-the set of exported symbols defines a package's public interface, +and if you don't respect the package author's decision about what names +to make public and which ones to keep private, you're asking for trouble +down the road. On the other hand, sometimes a package author will +neglect to export a symbol that really ought to be public. In that +case, a double-colon name lets you get work done without having to wait +for the next version of the package to be released. + + Two other bits of symbol syntax the reader understands are those for +keyword symbols and uninterned symbols. Keyword symbols are written +with names starting with a colon. Such symbols are interned in the +package named KEYWORD and automatically exported. Additionally, when +the reader interns a symbol in the KEYWORD, it also defines a constant +variable with the symbol as both its name and value. This is why you +can use keywords in argument lists without quoting them-when they appear +in a value position, they evaluate to themselves. Thus: + + (eql ':foo :foo) ==> T + + The names of keyword symbols, like all symbols, are converted to all +uppercase by the reader before they're interned. The name doesn't +include the leading colon. + + (symbol-name :foo) ==> "FOO" + + Uninterned symbols are written with a leading #:. These names (minus +the #:) are converted to uppercase as normal and then translated into +symbols, but the symbols aren't interned in any package; each time the +reader reads a #: name, it creates a new symbol. Thus: + + (eql '#:foo '#:foo) ==> NIL + + You'll rarely, if ever, write this syntax yourself, but will +sometimes see it when you print an s-expression containing symbols +returned by the function GENSYM. + + (gensym) ==> #:G3128 + + +File: pcl.info, Node: 21-2, Next: 21-3, Prev: 21-1, Up: Chapter 21 + +A Bit of Package and Symbol Vocabulary +====================================== + +As I mentioned previously, the mapping from names to symbols implemented +by a package is slightly more flexible than a simple lookup table. At +its core, every package contains a name-to-symbol lookup table, but a +symbol can be made accessible via an unqualified name in a given package +in other ways. To talk sensibly about these other mechanisms, you'll +need a little bit of vocabulary. + + To start with, all the symbols that can be found in a given package +using FIND-SYMBOL are said to be accessible in that package. In other +words, the accessible symbols in a package are those that can be +referred to with unqualified names when the package is current. + + A symbol can be accessible in two ways. The first is for the +package's name-to-symbol table to contain an entry for the symbol, in +which case the symbol is said to be present in the package. When the +reader interns a new symbol in a package, it's added to the package's +name-to-symbol table. The package in which a symbol is first interned +is called the symbol's home package. + + The other way a symbol can be accessible in a package is if the +package inherits it. A package inherits symbols from other packages by +using the other packages. Only external symbols in the used packages +are inherited. A symbol is made external in a package by exporting it. +In addition to causing it to be inherited by using packages, exporting a +symbol also-as you saw in the previous section-makes it possible to +refer to the symbol using a single-colon qualified name. + + To keep the mappings from names to symbols deterministic, the package +system allows only one symbol to be accessible in a given package for +each name. That is, a package can't have a present symbol and an +inherited symbol with the same name or inherit two different symbols, +from different packages, with the same name. However, you can resolve +conflicts by making one of the accessible symbols a shadowing symbol, +which makes the other symbols of the same name inaccessible. In +addition to its name-to-symbol table, each package maintains a list of +shadowing symbols. + + An existing symbol can be imported into another package by adding it +to the package's name-to-symbol table. Thus, the same symbol can be +present in multiple packages. Sometimes you'll import symbols simply +because you want them to be accessible in the importing package without +using their home package. Other times you'll import a symbol because +only present symbols can be exported or be shadowing symbols. For +instance, if a package needs to use two packages that have external +symbols of the same name, one of the symbols must be imported into the +using package in order to be added to its shadowing list and make the +other symbol inaccessible. + + Finally, a present symbol can be uninterned from a package, which +causes it to be removed from the name-to-symbol table and, if it's a +shadowing symbol, from the shadowing list. You might unintern a symbol +from a package to resolve a conflict between the symbol and an external +symbol from a package you want to use. A symbol that isn't present in +any package is called an uninterned symbol, can no longer be read by the +reader, and will be printed using the #:foo syntax. + + +File: pcl.info, Node: 21-3, Next: 21-4, Prev: 21-2, Up: Chapter 21 + +Three Standard Packages +======================= + +In the next section I'll show you how to define your own packages, +including how to make one package use another and how to export, shadow, +and import symbols. But first let's look at a few packages you've been +using already. When you first start Lisp, the value of *PACKAGE* is +typically the COMMON-LISP-USER package, also known as CL-USER. (1) +CL-USER uses the package COMMON-LISP, which exports all the names +defined by the language standard. Thus, when you type an expression at +the REPL, all the names of standard functions, macros, variables, and so +on, will be translated to the symbols exported from COMMON-LISP, and all +other names will be interned in the COMMON-LISP-USER package. For +example, the name *PACKAGE* is exported from COMMON-LISP-if you want to +see the value of *PACKAGE*, you can type this: + + CL-USER> *package* + # + + because COMMON-LISP-USER uses COMMON-LISP. Or you can use a +package-qualified name. + + CL-USER> common-lisp:*package* + # + + You can even use COMMON-LISP's nickname, CL. + + CL-USER> cl:*package* + # + + But *X* isn't a symbol in COMMON-LISP, so you if type this: + + CL-USER> (defvar *x* 10) + *X* + + the reader reads DEFVAR as the symbol from the COMMON-LISP package +and *X* as a symbol in COMMON-LISP-USER. + + The REPL can't start in the COMMON-LISP package because you're not +allowed to intern new symbols in it; COMMON-LISP-USER serves as a +"scratch" package where you can create your own names while still having +easy access to all the symbols in COMMON-LISP. (2) Typically, all +packages you'll define will also use COMMON-LISP, so you don't have to +write things like this: + + (cl:defun (x) (cl:+ x 2)) + + The third standard package is the KEYWORD package, the package the +Lisp reader uses to intern names starting with colon. Thus, you can +also refer to any keyword symbol with an explicit package qualification +of keyword like this: + + CL-USER> :a + :A + CL-USER> keyword:a + :A + CL-USER> (eql :a keyword:a) + T + + ---------- Footnotes ---------- + + (1) Every package has one official name and zero or more nicknames +that can be used anywhere you need to use the package name, such as in +package-qualified names or to refer to the package in a DEFPACKAGE or +IN-PACKAGE form. + + (2) COMMON-LISP-USER is also allowed to provide access to symbols +exported by other implementation-defined packages. While this is +intended as a convenience for the user-it makes implementation-specific +functionality readily accessible-it can also cause confusion for new +Lispers: Lisp will complain about an attempt to redefine some name that +isn't listed in the language standard. To see what packages +COMMON-LISP-USER inherits symbols from in a particular implementation, +evaluate this expression at the REPL: + + (mapcar #'package-name (package-use-list :cl-user)) + + And to find out what package a symbol came from originally, evaluate +this: + + (package-name (symbol-package 'some-symbol)) + + with some-symbol replaced by the symbol in question. For instance: + + (package-name (symbol-package 'car)) ==> "COMMON-LISP" + (package-name (symbol-package 'foo)) ==> "COMMON-LISP-USER" + + Symbols inherited from implementation-defined packages will return +some other value. + + +File: pcl.info, Node: 21-4, Next: 21-5, Prev: 21-3, Up: Chapter 21 + +Defining Your Own Packages +========================== + +Working in COMMON-LISP-USER is fine for experiments at the REPL, but +once you start writing actual programs you'll want to define new +packages so different programs loaded into the same Lisp environment +don't stomp on each other's names. And when you write libraries that +you intend to use in different contexts, you'll want to define separate +packages and then export the symbols that make up the libraries' public +APIs. + + However, before you start defining packages, it's important to +understand one thing about what packages do not do. Packages don't +provide direct control over who can call what function or access what +variable. They provide you with basic control over namespaces by +controlling how the reader translates textual names into symbol objects, +but it isn't until later, in the evaluator, that the symbol is +interpreted as the name of a function or variable or whatever else. +Thus, it doesn't make sense to talk about exporting a function or a +variable from a package. You can export symbols to make certain names +easier to refer to, but the package system doesn't allow you to restrict +how those names are used. (1) + + With that in mind, you can start looking at how to define packages +and tie them together. You define new packages with the macro +DEFPACKAGE, which allows you to not only create the package but to +specify what packages it uses, what symbols it exports, and what symbols +it imports from other packages and to resolve conflicts by creating +shadowing symbols. (2) + + I'll describe the various options in terms of how you might use +packages while writing a program that organizes e-mail messages into a +searchable database. The program is purely hypothetical, as are the +libraries I'll refer to-the point is to look at how the packages used in +such a program might be structured. + + The first package you'd need is one to provide a namespace for the +application-you want to be able to name your functions, variables, and +so on, without having to worry about name collisions with unrelated +code. So you'd define a new package with DEFPACKAGE. + + If the application is simple enough to be written with no libraries +beyond the facilities provided by the language itself, you could define +a simple package like this: + + (defpackage :com.gigamonkeys.email-db + (:use :common-lisp)) + + This defines a package, named COM.GIGAMONKEYS.EMAIL-DB, that inherits +all the symbols exported by the COMMON-LISP package. (3) + + You actually have several choices of how to represent the names of +packages and, as you'll see, the names of symbols in a DEFPACKAGE. +Packages and symbols are named with strings. However, in a DEFPACKAGE +form, you can specify the names of packages and symbols with string +designators. A string designator is either a string, which designates +itself; a symbol, which designates its name; or a character, which +designates a one-character string containing just the character. Using +keyword symbols, as in the previous DEFPACKAGE, is a common style that +allows you to write the names in lowercase-the reader will convert the +names to uppercase for you. You could also write the DEFPACKAGE with +strings, but then you have to write them in all uppercase, because the +true names of most symbols and packages are in fact uppercase because of +the case conversion performed by the reader. (4) + + (defpackage "COM.GIGAMONKEYS.EMAIL-DB" + (:use "COMMON-LISP")) + + You could also use nonkeyword symbols-the names in DEFPACKAGE aren't +evaluated-but then the very act of reading the DEFPACKAGE form would +cause those symbols to be interned in the current package, which at the +very least will pollute that namespace and may also cause problems later +if you try to use the package. (5) + + To read code in this package, you need to make it the current package +with the IN-PACKAGE macro: + + (in-package :com.gigamonkeys.email-db) + + If you type this expression at the REPL, it will change the value of +*PACKAGE*, affecting how the REPL reads subsequent expressions, until +you change it with another call to IN-PACKAGE. Similarly, if you include +an IN-PACKAGE in a file that's loaded with LOAD or compiled with +COMPILE-FILE, it will change the package, affecting the way subsequent +expressions in the file are read. (6) + + With the current package set to the COM.GIGAMONKEYS.EMAIL-DB package, +other than names inherited from the COMMON-LISP package, you can use any +name you want for whatever purpose you want. Thus, you could define a +new hello-world function that could coexist with the hello-world +function previously defined in COMMON-LISP-USER. Here's the behavior of +the existing function: + + CL-USER> (hello-world) + hello, world + NIL + + Now you can switch to the new package using IN-PACKAGE. (7) Notice +how the prompt changes-the exact form is determined by the development +environment, but in SLIME the default prompt consists of an abbreviated +version of the package name. + + CL-USER> (in-package :com.gigamonkeys.email-db) + # + EMAIL-DB> + + You can define a new hello-world in this package: + + EMAIL-DB> (defun hello-world () (format t "hello from EMAIL-DB package~%")) + HELLO-WORLD + + And test it, like this: + + EMAIL-DB> (hello-world) + hello from EMAIL-DB package + NIL + + Now switch back to CL-USER. + + EMAIL-DB> (in-package :cl-user) + # + CL-USER> + + And the old function is undisturbed. + + CL-USER> (hello-world) + hello, world + NIL + + ---------- Footnotes ---------- + + (1) This is different from the Java package system, which provides a +namespace for classes but is also involved in Java's access control +mechanism. The non-Lisp language with a package system most like Common +Lisp's packages is Perl. + + (2) All the manipulations performed by DEFPACKAGE can also be +performed with functions that man- ipulate package objects. However, +since a package generally needs to be fully defined before it can be +used, those functions are rarely used. Also, DEFPACKAGE takes care of +performing all the package manipulations in the right order-for +instance, DEFPACKAGE adds symbols to the shadowing list before it tries +to use the used packages. + + (3) In many Lisp implementations the :use clause is optional if you +want only to :use COMMON-LISP-if it's omitted, the package will +automatically inherit names from an implementation-defined list of +packages that will usually include COMMON-LISP. However, your code will +be more portable if you always explicitly specify the packages you want +to :use. Those who are averse to typing can use the package's nickname +and write (:use :cl). + + (4) Using keywords instead of strings has another advantage-Allegro +provides a "modern mode" Lisp in which the reader does no case +conversion of names and in which, instead of a COMMON-LISP package with +uppercase names, provides a common-lisp package with lowercase names. +Strictly speaking, this Lisp isn't a conforming Common Lisp since all +the names in the standard are defined to be uppercase. But if you write +your DEFPACKAGE forms using keyword symbols, they will work both in +Common Lisp and in this near relative. + + (5) Some folks, instead of keywords, use uninterned symbols, using +the #: syntax. + + (defpackage #:com.gigamonkeys.email-db + (:use #:common-lisp)) + + This saves a tiny bit of memory by not interning any symbols in the +keyword package-the symbol can become garbage after DEFPACKAGE (or the +code it expands into) is done with it. However, the difference is so +slight that it really boils down to a matter of aesthetics. + + (6) The reason to use IN-PACKAGE instead of just SETFing *PACKAGE* is +that IN-PACKAGE expands into code that will run when the file is +compiled by COMPILE-FILE as well as when the file is loaded, changing +the way the reader reads the rest of the file during compilation. + + (7) In the REPL buffer in SLIME you can also change packages with a +REPL shortcut. Type a comma, and then enter change-package at the +Command: prompt. + + +File: pcl.info, Node: 21-5, Next: 21-6, Prev: 21-4, Up: Chapter 21 + +Packaging Reusable Libraries +============================ + +While working on the e-mail database, you might write several functions +related to storing and retrieving text that don't have anything in +particular to do with e-mail. You might realize that those functions +could be useful in other programs and decide to repackage them as a +library. You should define a new package, but this time you'll export +certain names to make them available to other packages. + + (defpackage :com.gigamonkeys.text-db + (:use :common-lisp) + (:export :open-db + :save + :store)) + + Again, you use the COMMON-LISP package, because you'll need access to +standard functions within COM.GIGAMONKEYS.TEXT-DB. The :export clause +specifies names that will be external in COM.GIGAMONKEYS.TEXT-DB and +thus accessible in packages that :use it. Therefore, after you've +defined this package, you can change the definition of the main +application package to the following: + + (defpackage :com.gigamonkeys.email-db + (:use :common-lisp :com.gigamonkeys.text-db)) + + Now code written in COM.GIGAMONKEYS.EMAIL-DB can use unqualified +names to refer to the exported symbols from both COMMON-LISP and +COM.GIGAMONKEYS.TEXT-DB. All other names will continue to be interned +directly in the COM.GIGAMONKEYS.EMAIL-DB package. + + +File: pcl.info, Node: 21-6, Next: 21-7, Prev: 21-5, Up: Chapter 21 + +Importing Individual Names +========================== + +Now suppose you find a third-party library of functions for manipulating +e-mail messages. The names used in the library's API are exported from +the package COM.ACME.EMAIL, so you could :use that package to get easy +access to those names. But suppose you need to use only one function +from this library, and other exported symbols conflict with names you +already use (or plan to use) in our own code. (1) In this case, you can +import the one symbol you need with an :import-from clause in the +DEFPACKAGE. For instance, if the name of the function you want to use is +parse-email-address, you can change the DEFPACKAGE to this: + + (defpackage :com.gigamonkeys.email-db + (:use :common-lisp :com.gigamonkeys.text-db) + (:import-from :com.acme.email :parse-email-address)) + + Now anywhere the name parse-email-address appears in code read in the +COM.GIGAMONKEYS.EMAIL-DB package, it will be read as the symbol from +COM.ACME.EMAIL. If you need to import more than one symbol from a single +package, you can include multiple names after the package name in a +single :import-from clause. A DEFPACKAGE can also include multiple +:import-from clauses in order to import symbols from different packages. + + Occasionally you'll run into the opposite situation-a package may +export a bunch of names you want to use and a few you don't. Rather +than listing all the symbols you do want to use in an :import-from +clause, you can instead :use the package and then list the names you +don't want to inherit in a :shadow clause. For instance, suppose the +COM.ACME.TEXT package exports a bunch of names of functions and classes +used in text processing. Further suppose that most of these functions +and classes are ones you'll want to use in your code, but one of the +names, build-index, conflicts with a name you've already used. You can +make the build-index from COM.ACME.TEXT inaccessible by shadowing it. + + (defpackage :com.gigamonkeys.email-db + (:use + :common-lisp + :com.gigamonkeys.text-db + :com.acme.text) + (:import-from :com.acme.email :parse-email-address) + (:shadow :build-index)) + + The :shadow clause causes a new symbol named BUILD-INDEX to be +created and added directly to COM.GIGAMONKEYS.EMAIL-DB's name-to-symbol +map. Now if the reader reads the name BUILD-INDEX, it will translate it +to the symbol in COM.GIGAMONKEYS.EMAIL-DB's map, rather than the one +that would otherwise be inherited from COM.ACME.TEXT. The new symbol is +also added to a shadowing symbols list that's part of the +COM.GIGAMONKEYS.EMAIL-DB package, so if you later use another package +that also exports a BUILD-INDEX symbol, the package system will know +there's no conflict-that you want the symbol from +COM.GIGAMONKEYS.EMAIL-DB to be used rather than any other symbols with +the same name inherited from other packages. + + A similar situation can arise if you want to use two packages that +export the same name. In this case the reader won't know which +inherited name to use when it reads the textual name. In such +situations you must resolve the ambiguity by shadowing the conflicting +names. If you don't need to use the name from either package, you could +shadow the name with a :shadow clause, creating a new symbol with the +same name in your package. But if you actually want to use one of the +inherited symbols, then you need to resolve the ambiguity with a +:shadowing-import-from clause. Like an :import-from clause, a +:shadowing-import-from clause consists of a package name followed by the +names to import from that package. For instance, if COM.ACME.TEXT +exports a name SAVE that conflicts with the name exported from +COM.GIGAMONKEYS.TEXT-DB, you could resolve the ambiguity with the +following DEFPACKAGE: + + (defpackage :com.gigamonkeys.email-db + (:use + :common-lisp + :com.gigamonkeys.text-db + :com.acme.text) + (:import-from :com.acme.email :parse-email-address) + (:shadow :build-index) + (:shadowing-import-from :com.gigamonkeys.text-db :save)) + + ---------- Footnotes ---------- + + (1) During development, if you try to :use a package that exports a +symbol with the same name as a symbol already interned in the using +package, Lisp will signal an error and typically offer you a restart +that will unintern the offending symbol from the using package. For +more on this, see the section "Package Gotchas." + + +File: pcl.info, Node: 21-7, Next: 21-8, Prev: 21-6, Up: Chapter 21 + +Packaging Mechanics +=================== + +That covers the basics of how to use packages to manage namespaces in +several common situations. However, another level of how to use +packages is worth discussing-the raw mechanics of how to organize code +that uses different packages. In this section I'll discuss a few rules +of thumb about how to organize code-where to put your DEFPACKAGE forms +relative to the code that uses your packages via IN-PACKAGE. + + Because packages are used by the reader, a package must be defined +before you can LOAD or COMPILE-FILE a file that contains an IN-PACKAGE +expression switching to that package. Packages also must be defined +before other DEFPACKAGE forms can refer to them. For instance, if +you're going to :use COM.GIGAMONKEYS.TEXT-DB in +COM.GIGAMONKEYS.EMAIL-DB, then COM.GIGAMONKEYS.TEXT-DB's DEFPACKAGE must +be evaluated before the DEFPACKAGE of COM.GIGAMONKEYS.EMAIL-DB. + + The best first step toward making sure packages exist when they need +to is to put all your DEFPACKAGEs in files separate from the code that +needs to be read in those packages. Some folks like to create a +foo-package.lisp file for each individual package, and others create a +single packages.lisp that contains all the DEFPACKAGE forms for a group +of related packages. Either approach is reasonable, though the +one-file-per-package approach also requires that you arrange to load the +individual files in the right order according to the interpackage +dependencies. + + Either way, once all the DEFPACKAGE forms have been separated from +the code that will be read in the packages they define, you can arrange +to LOAD the files containing the DEFPACKAGEs before you compile or load +any of the other files. For simple programs you can do this by hand: +simply LOAD the file or files containing the DEFPACKAGE forms, possibly +compiling them with COMPILE-FILE first. Then LOAD the files that use +those packages, again optionally compiling them first with COMPILE-FILE. +Note, however, that the packages don't exist until you LOAD the package +definitions, either the source or the files produced by COMPILE-FILE. +Thus, if you're compiling everything, you must still LOAD all the +package definitions before you can COMPILE-FILE any files to be read in +the packages. + + Doing these steps by hand will get tedious after a while. For simple +programs you can automate the steps by writing a file, load.lisp, that +contains the appropriate LOAD and COMPILE-FILE calls in the right order. +Then you can just LOAD that file. For more complex programs you'll want +to use a system definition facility to manage loading and compiling +files in the right order. (1) + + The other key rule of thumb is that each file should contain exactly +one IN-PACKAGE form, and it should be the first form in the file other +than comments. Files containing DEFPACKAGE forms should start with +(in-package "COMMON-LISP-USER"), and all other files should contain an +IN-PACKAGE of one of your packages. + + If you violate this rule and switch packages in the middle of a file, +you'll confuse human readers who don't notice the second IN-PACKAGE. +Also, many Lisp development environments, particularly Emacs-based ones +such as SLIME, look for an IN-PACKAGE to determine the package they +should use when communicating with Common Lisp. Multiple IN-PACKAGE +forms per file may confuse these tools as well. + + On the other hand, it's fine to have multiple files read in the same +package, each with an identical IN-PACKAGE form. It's just a matter of +how you like to organize your code. + + The other bit of packaging mechanics has to do with how to name +packages. Package names live in a flat namespace-package names are just +strings, and different packages must have textually distinct names. +Thus, you have to consider the possibility of conflicts between package +names. If you're using only packages you developed yourself, then you +can probably get away with using short names for your packages. But if +you're planning to use third-party libraries or to publish your code for +use by other programmers, then you need to follow a naming convention +that will minimize the possibility of name collisions between different +packages. Many Lispers these days are adopting Java-style names, like +the ones used in this chapter, consisting of a reversed Internet domain +name followed by a dot and a descriptive string. + + ---------- Footnotes ---------- + + (1) The code for the "Practical" chapters, available from this book's +Web site, uses the ASDF system definition library. ASDF stands for +Another System Definition Facility. + + +File: pcl.info, Node: 21-8, Next: Chapter 22, Prev: 21-7, Up: Chapter 21 + +Package Gotchas +=============== + +Once you're familiar with packages, you won't spend a bunch of time +thinking about them. There's just not that much to them. However, a +couple of gotchas that bite most new Lisp programmers make the package +system seem more complicated and unfriendly than it really is. + + The number-one gotcha arises most commonly when playing around at the +REPL. You'll be looking at some library that defines certain interesting +functions. You'll try to call one of the functions like this: + + CL-USER> (foo) + + and get dropped into the debugger with this error: + + attempt to call `FOO' which is an undefined function. + [Condition of type UNDEFINED-FUNCTION] + + Restarts: + 0: [TRY-AGAIN] Try calling FOO again. + 1: [RETURN-VALUE] Return a value instead of calling FOO. + 2: [USE-VALUE] Try calling a function other than FOO. + 3: [STORE-VALUE] Setf the symbol-function of FOO and call it again. + 4: [ABORT] Abort handling SLIME request. + 5: [ABORT] Abort entirely from this (lisp) process. + + Ah, of course-you forgot to use the library's package. So you quit +the debugger and try to USE-PACKAGE the library's package in order to +get access to the name FOO so you can call the function. + + CL-USER> (use-package :foolib) + + But that drops you back into the debugger with this error message: + + Using package `FOOLIB' results in name conflicts for these symbols: FOO + [Condition of type PACKAGE-ERROR] + + Restarts: + 0: [CONTINUE] Unintern the conflicting symbols from the `COMMON-LISP-USER' package. + 1: [ABORT] Abort handling SLIME request. + 2: [ABORT] Abort entirely from this (lisp) process. + + Huh? The problem is the first time you called foo, the reader read +the name foo and interned it in CL-USER before the evaluator got hold of +it and discovered that this newly interned symbol isn't the name of a +function. This new symbol then conflicts with the symbol of the same +name exported from the FOOLIB package. If you had remembered to +USE-PACKAGE FOOLIB before you tried to call foo, the reader would have +read foo as the inherited symbol and not interned a foo symbol in +CL-USER. + + However, all isn't lost, because the first restart offered by the +debugger will patch things up in just the right way: it will unintern +the foo symbol from COMMON-LISP-USER, putting the CL-USER package back +to the state it was in before you called foo, allowing the USE-PACKAGE +to proceed and allowing for the inherited foo to be available in +CL-USER. + + This kind of problem can also occur when loading and compiling files. +For instance, if you defined a package, MY-APP, for code that was going +to use functions with names from the FOOLIB package, but forgot to :use +FOOLIB, when you compile the files with an (in-package :my-app) in them, +the reader will intern new symbols in MY-APP for the names that were +supposed to be read as symbols from FOOLIB. When you try to run the +compiled code, you'll get undefined function errors. If you then try to +redefine the MY-APP package to :use FOOLIB, you'll get the conflicting +symbols error. The solution is the same: select the restart to unintern +the conflicting symbols from MY-APP. You'll then need to recompile the +code in the MY-APP package so it will refer to the inherited names. + + The next gotcha is essentially the reverse of the first gotcha. In +this case, you'd have defined a package-again, let's say it's +MY-APP-that uses another package, say, FOOLIB. Now you start writing +code in the MY-APP package. Although you used FOOLIB in order to be +able to refer to the foo function, FOOLIB may export other symbols as +well. If you use one of those exported symbols-say, bar-as the name of +a function in your own code, Lisp won't complain. Instead, the name of +your function will be the symbol exported by FOOLIB, which will clobber +the definition of bar from FOOLIB. + + This gotcha is more insidious because it doesn't cause an error-from +the evaluator's point of view it's just being asked to associate a new +function with an old name, something that's perfectly legal. It's +suspect only because the code doing the redefining was read with a +different value for *PACKAGE* than the name's package. But the +evaluator doesn't necessarily know that. However, in most Lisps you'll +get an warning about "redefining BAR, originally defined in?". You +should heed those warnings. If you clobber a definition from a library, +you can restore it by reloading the library code with LOAD. (1) + + The last package-related gotcha is, by comparison, quite trivial, but +it bites most Lisp programmers at least a few times: you define a +package that uses COMMON-LISP and maybe a few libraries. Then at the +REPL you change to that package to play around. Then you decide to quit +Lisp altogether and try to call (quit). However, quit isn't a name from +the COMMON-LISP package-it's defined by the implementation in some +implementation-specific package that happens to be used by +COMMON-LISP-USER. The solution is simple-change packages back to CL-USER +to quit. Or use the SLIME REPL shortcut quit, which will also save you +from having to remember that in certain Common Lisp implementations the +function to quit is exit, not quit. + + You're almost done with your tour of Common Lisp. In the next +chapter I'll discuss the details of the extended LOOP macro. After +that, the rest of the book is devoted to "practicals": a spam filter, a +library for parsing binary files, and various parts of a streaming MP3 +server with a Web interface. + + ---------- Footnotes ---------- + + (1) Some Common Lisp implementations, such as Allegro and SBCL, +provide a facility for "locking" the symbols in a particular package so +they can be used in defining forms such as DEFUN, DEFVAR, and DEFCLASS +only when their home package is the current package. + + +File: pcl.info, Node: Chapter 22, Next: Chapter 23, Prev: Chapter 21, Up: Top + +22. LOOP for Black Belts +======================== + +In Chapter 7 I briefly discussed the extended LOOP macro. As I +mentioned then, LOOP provides what is essentially a special-purpose +language just for writing iteration constructs. + + This might seem like a lot of bother-inventing a whole language just +for writing loops. But if you think about the ways loops are used in +programs, it actually makes a fair bit of sense. Any program of any +size at all will contain quite a number of loops. And while they won't +all be the same, they won't all be unique either; patterns will emerge, +particularly if you include the code immediately preceding and following +the loops-patterns of how things are set up for the loop, patterns in +what gets done in the loop proper, and patterns in what gets done after +the loop. The LOOP language captures these patterns so you can express +them directly. + + The LOOP macro has a lot of parts-one of the main complaints of +LOOP's detractors is that it's too complex. In this chapter, I'll +tackle LOOP head on, giving you a systematic tour of the various parts +and how they fit together. + +* Menu: + +* 22-1:: The Parts of a LOOP +* 22-2:: Iteration Control +* 22-3:: Counting Loops +* 22-4:: Looping Over Collections and Packages +* 22-5:: Equals-Then Iteration +* 22-6:: Local Variables +* 22-7:: Destructuring Variables +* 22-8:: Value Accumulation +* 22-9:: Unconditional Execution +* 22-10:: Conditional Execution +* 22-11:: Setting Up and Tearing Down +* 22-12:: Termination Tests +* 22-13:: Putting It All Together + + +File: pcl.info, Node: 22-1, Next: 22-2, Prev: Chapter 22, Up: Chapter 22 + +The Parts of a LOOP +=================== + +You can do the following in a LOOP: + + * Step variables numerically and over various data structures + * Collect, count, sum, minimize, and maximize values seen while + looping + * Execute arbitrary Lisp expressions + * Decide when to terminate the loop + * Conditionally do any of these + + Additionally, LOOP provides syntax for the following: + + * Creating local variables for use within the loop + * Specifying arbitrary Lisp expressions to run before and after the + loop proper + + The basic structure of a LOOP is a set of clauses, each of which +begins with a loop keyword.(1) How each clause is parsed by the LOOP +macro depends on the keyword. Some of the main keywords, which you saw +in Chapter 7, are for, collecting, summing, counting, do, and finally. + + ---------- Footnotes ---------- + + (1) The term loop keyword is a bit unfortunate, as loop keywords +aren't keywords in the normal sense of being symbols in the KEYWORD +package. In fact, any symbol, from any package, with the appropriate +name will do; the LOOP macro cares only about their names. Typically, +though, they're written with no package qualifier and are thus read (and +interned as necessary) in the current package. + + +File: pcl.info, Node: 22-2, Next: 22-3, Prev: 22-1, Up: Chapter 22 + +Iteration Control +================= + +Most of the so-called iteration control clauses start with the loop +keyword for, or its synonym as, (1) followed by the name of a variable. +What follows after the variable name depends on the type of for clause. + + The subclauses of a for clause can iterate over the following: + + * Ranges of numbers, up or down, by specified intervals + * The individual items of a list + * The cons cells that make up a list + * The elements of a vector, including subtypes such as strings and + bit vectors + * The pairs of a hash table + * The symbols in a package + * The results of repeatedly evaluating a given form + + A single loop can have multiple for clauses with each clause naming +its own variable. When a loop has multiple for clauses, the loop +terminates as soon as any for clause reaches its end condition. For +instance, the following loop: + + (loop + for item in list + for i from 1 to 10 + do (something)) + + will iterate at most ten times but may stop sooner if list contains +fewer than ten items. + + ---------- Footnotes ---------- + + (1) Because one of the goals of LOOP is to allow loop expressions to +be written with a quasi-English syntax, many of the keywords have +synonyms that are treated the same by LOOP but allow some freedom to +express things in slightly more idiomatic English for different +contexts. + + +File: pcl.info, Node: 22-3, Next: 22-4, Prev: 22-2, Up: Chapter 22 + +Counting Loops +============== + +Arithmetic iteration clauses control the number of times the loop body +will be executed by stepping a variable over a range of numbers, +executing the body once per step. These clauses consist of from one to +three of the following prepositional phrases after the for (or as): the +from where phrase, the to where phrase, and the by how much phrase. + + The from where phrase specifies the initial value of the clause's +variable. It consists of one of the prepositions from, downfrom, or +upfrom followed by a form, which supplies the initial value (a number). + + The to where phrase specifies a stopping point for the loop and +consists of one of the prepositions to, upto, below, downto, or above +followed by a form, which supplies the stopping point. With upto and +downto, the loop body will be terminated (without executing the body +again) when the variable passes the stopping point; with below and +above, it stops one iteration earlier.The by how much phrase consists of +the prepositions by and a form, which must evaluate to a positive +number. The variable will be stepped (up or down, as determined by the +other phrases) by this amount on each iteration or by one if it's +omitted. + + You must specify at least one of these prepositional phrases. The +defaults are to start at zero, increment the variable by one at each +iteration, and go forever or, more likely, until some other clause +terminates the loop. You can modify any or all of these defaults by +adding the appropriate prepositional phrases. The only wrinkle is that +if you want decremental stepping, there's no default from where value, +so you must specify one with either from or downfrom. So, the +following: + + (loop for i upto 10 collect i) + + collects the first eleven integers (zero to ten), but the behavior of +this: + + (loop for i downto -10 collect i) ; wrong + + is undefined. Instead, you need to write this: + + (loop for i from 0 downto -10 collect i) + + Also note that because LOOP is a macro, which runs at compile time, +it has to be able to determine the direction to step the variable based +solely on the prepositions-not the values of the forms, which may not be +known until runtime. So, the following: + + (loop for i from 10 to 20 ...) + + works fine since the default is incremental stepping. But this: + + (loop for i from 20 to 10 ...) + + won't know to count down from twenty to ten. Worse yet, it won't +give you an error-it will just not execute the loop since i is already +greater than ten. Instead, you must write this: + + (loop for i from 20 downto 10 ...) + + or this: + + (loop for i downfrom 20 to 10 ...) + + Finally, if you just want a loop that repeats a certain number of +times, you can replace a clause of the following form: + + for i from 1 to number-form + + with a repeat clause like this: + + repeat number-form + + These clauses are identical in effect except the repeat clause +doesn't create an explicit loop variable. + + +File: pcl.info, Node: 22-4, Next: 22-5, Prev: 22-3, Up: Chapter 22 + +Looping Over Collections and Packages +===================================== + +The for clauses for iterating over lists are much simpler than the +arithmetic clauses. They support only two prepositional phrases, in and +on. + + A phrase of this form: + + for var in list-form + + steps var over all the elements of the list produced by evaluating +list-form. + + (loop for i in (list 10 20 30 40) collect i) ==> (10 20 30 40) + + Occasionally this clause is supplemented with a by phrase, which +specifies a function to use to move down the list. The default is CDR +but can be any function that takes a list and returns a sublist. For +instance, you could collect every other element of a list with a loop +like this: + + (loop for i in (list 10 20 30 40) by #'cddr collect i) ==> (10 30) + + An on prepositional phrase is used to step var over the cons cells +that make up a list. + + (loop for x on (list 10 20 30) collect x) ==> ((10 20 30) (20 30) (30)) + + This phrase too can take a by preposition: + + (loop for x on (list 10 20 30 40) by #'cddr collect x) ==> ((10 20 30 40) (30 40)) + + Looping over the elements of a vector (which includes strings and bit +vectors) is similar to looping over the elements of a list except the +preposition across is used instead of in. (1) For instance: + + (loop for x across "abcd" collect x) ==> (#\a #\b #\c #\d) + + Iterating over a hash table or package is slightly more complicated +because hash tables and packages have different sets of values you might +want to iterate over-the keys or values in a hash table and the +different kinds of symbols in a package. Both kinds of iteration follow +the same pattern. The basic pattern looks like this: + + (loop for var being the things in hash-or-package ...) + + For hash tables, the possible values for things are hash-keys and +hash-values, which cause var to be bound to successive values of either +the keys or the values of the hash table. The hash-or-package form is +evaluated once to produce a value, which must be a hash table. + + To iterate over a package, things can be symbols, present-symbols, +and external-symbols, which cause var to be bound to each of the symbols +accessible in a package, each of the symbols present in a package (in +other words, interned or imported into that package), or each of the +symbols that have been exported from the package. The hash-or-package +form is evaluated to produce the name of a package, which is looked up +as if by FIND-PACKAGE or a package object. Synonyms are also available +for parts of the for clause. In place of the, you can use each; you can +use of instead of in; and you can write the things in the singular (for +example, hash-key or symbol). + + Finally, since you'll often want both the keys and the values when +iterating over a hash table, the hash table clauses support a using +subclause at the end of the hash table clause. + + (loop for k being the hash-keys in h using (hash-value v) ...) + (loop for v being the hash-values in h using (hash-key k) ...) + + Both of these loops will bind k to each key in the hash table and v +to the corresponding value. Note that the first element of the using +subclause must be in the singular form. (2) + + ---------- Footnotes ---------- + + (1) You may wonder why LOOP can't figure out whether it's looping +over a list or a vector without needing different prepositions. This is +another consequence of LOOP being a macro: the value of the list or +vector won't be known until runtime, but LOOP, as a macro, has to +generate code at compile time. And LOOP's designers wanted it to +generate extremely efficient code. To be able to generate efficient +code for looping across, say, a vector, it needs to know at compile time +that the value will be a vector at runtime-thus, the different +prepositions are needed. + + (2) Don't ask me why LOOP's authors chickened out on the +no-parentheses style for the using subclause. + + +File: pcl.info, Node: 22-5, Next: 22-6, Prev: 22-4, Up: Chapter 22 + +Equals-Then Iteration +===================== + +If none of the other for clauses supports exactly the form of variable +stepping you need, you can take complete control over stepping with an +equals-then clause. This clause is similar to the binding clauses in a +DO loop but cast in a more Algolish syntax. The template is as follows: + + (loop for var = initial-value-form [ then step-form ] ...) + + As usual, var is the name of the variable to be stepped. Its initial +value is obtained by evaluating initial-value-form once before the first +iteration. In each subsequent iteration, step-form is evaluated, and +its value becomes the new value of var. With no then part to the +clause, the initial-value-form is reevaluated on each iteration to +provide the new value. Note that this is different from a DO binding +clause with no step form. + + The step-form can refer to other loop variables, including variables +created by other for clauses later in the loop. For instance: + + (loop repeat 5 + for x = 0 then y + for y = 1 then (+ x y) + collect y) ==> (1 2 4 8 16) + + However, note that each for clause is evaluated separately in the +order it appears. So in the previous loop, on the second iteration x is +set to the value of y before y changes (in other words, 1). But y is +then set to the sum of its old value (still 1) and the new value of x. +If the order of the for clauses is reversed, the results change. + + (loop repeat 5 + for y = 1 then (+ x y) + for x = 0 then y + collect y) ==> (1 1 2 4 8) + + Often, however, you'll want the step forms for multiple variables to +be evaluated before any of the variables is given its new value (similar +to how DO steps its variables). In that case, you can join multiple for +clauses by replacing all but the first for with and. You saw this +formulation already in the LOOP version of the Fibonacci computation in +Chapter 7. Here's another variant, based on the two previous examples: + + (loop repeat 5 + for x = 0 then y + and y = 1 then (+ x y) + collect y) ==> (1 1 2 3 5) + + +File: pcl.info, Node: 22-6, Next: 22-7, Prev: 22-5, Up: Chapter 22 + +Local Variables +=============== + +While the main variables needed within a loop are usually declared +implicitly in for clauses, sometimes you'll need auxiliary variables, +which you can declare with with clauses. + + with var [ = value-form ] + + The name var becomes the name of a local variable that will cease to +exist when the loop finishes. If the with clause contains an = +value-form part, the variable will be initialized, before the first +iteration of the loop, to the value of value-form. + + Multiple with clauses can appear in a loop; each clause is evaluated +independently in the order it appears and the value is assigned before +proceeding to the next clause, allowing later variables to depend on the +value of already declared variables. Mutually independent variables can +be declared in one with clause with an and between each declaration. + + +File: pcl.info, Node: 22-7, Next: 22-8, Prev: 22-6, Up: Chapter 22 + +Destructuring Variables +======================= + +One handy feature of LOOP I haven't mentioned yet is the ability to +destructure list values assigned to loop variables. This lets you take +apart the value of lists that would otherwise be assigned to a loop +variable, similar to the way DESTRUCTURING-BIND works but a bit less +elaborate. Basically, you can replace any loop variable in a for or +with clause with a tree of symbols, and the list value that would have +been assigned to the simple variable will instead be destructured into +variables named by the symbols in the tree. A simple example looks like +this: + + CL-USER> (loop for (a b) in '((1 2) (3 4) (5 6)) + do (format t "a: ~a; b: ~a~%" a b)) + a: 1; b: 2 + a: 3; b: 4 + a: 5; b: 6 + NIL + + The tree can also include dotted lists, in which case the name after +the dot acts like a &rest parameter, being bound to a list containing +any remaining elements of the list. This is particularly handy with +for/on loops since the value is always a list. For instance, this LOOP +(which I used in Chapter 18 to emit a comma-delimited list): + + (loop for cons on list + do (format t "~a" (car cons)) + when (cdr cons) do (format t ", ")) + + could also be written like this: + + (loop for (item . rest) on list + do (format t "~a" item) + when rest do (format t ", ")) + + If you want to ignore a value in the destructured list, you can use +NIL in place of a variable name. + + (loop for (a nil) in '((1 2) (3 4) (5 6)) collect a) ==> (1 3 5) + + If the destructuring list contains more variables than there are +values in the list, the extra variables are set to NIL, making all the +variables essentially like &optional parameters. There isn't, however, +any equivalent to &key parameters. + + +File: pcl.info, Node: 22-8, Next: 22-9, Prev: 22-7, Up: Chapter 22 + +Value Accumulation +================== + +The value accumulation clauses are perhaps the most powerful part of +LOOP. While the iteration control clauses provide a concise syntax for +expressing the basic mechanics of looping, they aren't dramatically +different from the equivalent mechanisms provided by DO, DOLIST, and +DOTIMES. + + The value accumulation clauses, on the other hand, provide a concise +notation for a handful of common loop idioms having to do with +accumulating values while looping. Each accumulation clause starts with +a verb and follows this pattern: + + verb form [ into var ] + + Each time through the loop, an accumulation clause evaluates form and +saves the value in a manner determined by the verb. With an into +subclause, the value is saved into the variable named by var. The +variable is local to the loop, as if it'd been declared in a with +clause. With no into subclause, the accumulation clause instead +accumulates a default value for the whole loop expression. + + The possible verbs are collect, append, nconc, count, sum, maximize, +and minimize. Also available as synonyms are the present participle +forms: collecting, appending, nconcing, counting, summing, maximizing, +and minimizing. + + A collect clause builds up a list containing all the values of form +in the order they're seen. This is a particularly useful construct +because the code you'd have to write to collect a list in order as +efficiently as LOOP does is more painful than you'd normally write by +hand. (1) Related to collect are the verbs append and nconc. These +verbs also accumulate values into a list, but they join the values, +which must be lists, into a single list as if by the functions APPEND or +NCONC. (2) + + The remaining accumulation clauses are used to accumulate numeric +values. The verb count counts the number of times form is true, sum +collects a running total of the values of form, maximize collects the +largest value seen for form, and minimize collects the smallest. For +instance, suppose you define a variable *random* that contains a list of +random numbers. + + (defparameter *random* (loop repeat 100 collect (random 10000))) + + Then the following loop will return a list containing various summary +information about the numbers: + + (loop for i in *random* + counting (evenp i) into evens + counting (oddp i) into odds + summing i into total + maximizing i into max + minimizing i into min + finally (return (list min max total evens odds))) + + ---------- Footnotes ---------- + + (1) The trick is to keep ahold of the tail of the list and add new +cons cells by SETFing the CDR of the tail. A handwritten equivalent of +the code generated by (loop for i upto 10 collect i) would look like +this: + + (do ((list nil) (tail nil) (i 0 (1+ i))) + ((> i 10) list) + (let ((new (cons i nil))) + (if (null list) + (setf list new) + (setf (cdr tail) new)) + (setf tail new))) + + Of course you'll rarely, if ever, write code like that. You'll use +either LOOP or (if, for some reason, you don't want to use LOOP) the +standard PUSH/NREVERSE idiom for collecting values. + + (2) Recall that NCONC is the destructive version of APPEND-it's safe +to use an nconc clause only if the values you're collecting are fresh +lists that don't share any structure with other lists. For instance, +this is safe: + + (loop for i upto 3 nconc (list i i)) ==> (0 0 1 1 2 2 3 3) + + But this will get you into trouble: + + (loop for i on (list 1 2 3) nconc i) ==> undefined + + The later will most likely get into an infinite loop as the various +parts of the list produced by (list 1 2 3) are destructively modified to +point to each other. But even that's not guaranteed-the behavior is +simply undefined. + + +File: pcl.info, Node: 22-9, Next: 22-10, Prev: 22-8, Up: Chapter 22 + +Unconditional Execution +======================= + +As useful as the value accumulation constructs are, LOOP wouldn't be a +very good general-purpose iteration facility if there wasn't a way to +execute arbitrary Lisp code in the loop body. + + The simplest way to execute arbitrary code within a loop body is with +a do clause. Compared to the clauses I've described so far, with their +prepositions and subclauses, do is a model of Yodaesque simplicity. (1) +A do clause consists of the word do (or doing) followed by one or more +Lisp forms that are all evaluated when the do clause is. The do clause +ends at the closing parenthesis of the loop or the next loop keyword. + + For instance, to print the numbers from one to ten, you could write +this: + + (loop for i from 1 to 10 do (print i)) + + Another, more dramatic, form of immediate execution is a return +clause. This clause consists of the word return followed by a single +Lisp form, which is evaluated, with the resulting value immediately +returned as the value of the loop. + + You can also break out of a loop in a do clause using any of Lisp's +normal control flow operators, such as RETURN and RETURN-FROM. Note that +a return clause always returns from the immediately enclosing LOOP +expression, while a RETURN or RETURN-FROM in a do clause can return from +any enclosing expression. For instance, compare the following: + + (block outer + (loop for i from 0 return 100) ; 100 returned from LOOP + (print "This will print") + 200) ==> 200 + + to this: + + (block outer + (loop for i from 0 do (return-from outer 100)) ; 100 returned from BLOCK + (print "This won't print") + 200) ==> 100 + + The do and return clauses are collectively called the unconditional +execution clauses. + + ---------- Footnotes ---------- + + (1) "No! Try not. Do . . . or do not. There is no try." - +Yoda, The Empire Strikes Back + + +File: pcl.info, Node: 22-10, Next: 22-11, Prev: 22-9, Up: Chapter 22 + +Conditional Execution +===================== + +Because a do clause can contain arbitrary Lisp forms, you can use any +Lisp expressions you want, including control constructs such as IF and +WHEN. So, the following is one way to write a loop that prints only the +even numbers between one and ten: + + (loop for i from 1 to 10 do (when (evenp i) (print i))) + + However, sometimes you'll want conditional control at the level of +loop clauses. For instance, suppose you wanted to sum only the even +numbers between one and ten using a summing clause. You couldn't write +such a loop with a do clause because there'd be no way to "call" the sum +i in the middle of a regular Lisp form. In cases like this, you need to +use one of LOOP's own conditional expressions like this: + + (loop for i from 1 to 10 when (evenp i) sum i) ==> 30 + + LOOP provides three conditional constructs, and they all follow this +basic pattern: + + conditional test-form loop-clause + + The conditional can be if, when, or unless. The test-form is any +regular Lisp form, and loop-clause can be a value accumulation clause +(count, collect, and so on), an unconditional execution clause, or +another conditional execution clause. Multiple loop clauses can be +attached to a single conditional by joining them with and. + + As an extra bit of syntactic sugar, within the first loop clause, +after the test form, you can use the variable it to refer to the value +returned by the test form. For instance, the following loop collects +the non-NIL values found in some-hash when looking up the keys in +some-list: + + (loop for key in some-list when (gethash key some-hash) collect it) + + A conditional clause is executed each time through the loop. An if +or when clause executes its loop-clause if test-form evaluates to true. +An unless reverses the test, executing loop-clause only when test-form +is NIL. Unlike their Common Lisp namesakes, LOOP's if and when are +merely synonyms-there's no difference in their behavior. + + All three conditional clauses can also take an else branch, which is +followed by another loop clause or multiple clauses joined by and. When +conditional clauses are nested, the set of clauses connected to an inner +conditional clause can be closed with the word end. The end is optional +when not needed to disambiguate a nested conditional-the end of a +conditional clause will be inferred from the end of the loop or the +start of another clause not joined by and. + + The following rather silly loop demonstrates the various forms of +LOOP conditionals. The update-analysis function will be called each +time through the loop with the latest values of the various variables +accumulated by the clauses within the conditionals. + + (loop for i from 1 to 100 + if (evenp i) + minimize i into min-even and + maximize i into max-even and + unless (zerop (mod i 4)) + sum i into even-not-fours-total + end + and sum i into even-total + else + minimize i into min-odd and + maximize i into max-odd and + when (zerop (mod i 5)) + sum i into fives-total + end + and sum i into odd-total + do (update-analysis min-even + max-even + min-odd + max-odd + even-total + odd-total + fives-total + even-not-fours-total)) + + +File: pcl.info, Node: 22-11, Next: 22-12, Prev: 22-10, Up: Chapter 22 + +Setting Up and Tearing Down +=========================== + +One of the key insights the designers of the LOOP language had about +actual loops "in the wild" is that the loop proper is often preceded by +a bit of code to set things up and then followed by some more code that +does something with the values computed by the loop. A trivial example, +in Perl, (1) might look like this: + + my $evens_sum = 0; + my $odds_sum = 0; + foreach my $i (@list_of_numbers) { + if ($i % 2) { + $odds_sum += $i; + } else { + $evens_sum += $i; + } + } + if ($evens_sum > $odds_sum) { + print "Sum of evens greater\n"; + } else { + print "Sum of odds greater\n"; + } + + The loop proper in this code is the foreach statement. But the +foreach loop doesn't stand on its own: the code in the loop body refers +to variables declared in the two lines before the loop. (2) And the +work the loop does is all for naught without the if statement after the +loop that actually reports the results. In Common Lisp, of course, the +LOOP construct is an expression that returns a value, so there's even +more often a need to do something after the loop proper, namely, +generate the return value. + + So, said the LOOP designers, let's give a way to include the code +that's really part of the loop in the loop itself. Thus, LOOP provides +two keywords, initially and finally, that introduce code to be run +outside the loop's main body. + + After the initially or finally, these clauses consist of all the Lisp +forms up to the start of the next loop clause or the end of the loop. +All the initially forms are combined into a single prologue, which runs +once, immediately after all the local loop variables are initialized and +before the body of the loop. The finally forms are similarly combined +into a epilogue to be run after the last iteration of the loop body. +Both the prologue and epilogue code can refer to local loop variables. + + The prologue is always run, even if the loop body iterates zero +times. The loop can return without running the epilogue if any of the +following happens: + + * A return clause executes. + * RETURN , RETURN-FROM, or another transfer of control construct is + called from within a Lisp form within the body. (3) + * The loop is terminated by an always, never, or thereis clause, as + I'll discuss in the next section. + + Within the epilogue code, RETURN or RETURN-FROM can be used to +explicitly provide a return value for the loop. Such an explicit return +value will take precedence over any value that might otherwise be +provided by an accumulation or termination test clause. + + To allow RETURN-FROM to be used to return from a specific loop +(useful when nesting LOOP expressions), you can name a LOOP with the +loop keyword named. If a named clause appears in a loop, it must be the +first clause. For a simple example, assume lists is a list of lists and +you want to find an item that matches some criteria in one of those +nested lists. You could find it with a pair of nested loops like this: + + (loop named outer for list in lists do + (loop for item in list do + (if (what-i-am-looking-for-p item) + (return-from outer item)))) + + ---------- Footnotes ---------- + + (1) I'm not picking on Perl here-this example would look pretty much +the same in any language that bases its syntax on C's. + + (2) Perl would let you get away with not declaring those variables if +your program didn't use strict. But you should always use strict in +Perl. The equivalent code in Python, Java, or C would always require +the variables to be declared. + + (3) You can cause a loop to finish normally, running the epilogue, +from Lisp code executed as part of the loop body with the local macro +LOOP-FINISH. + + +File: pcl.info, Node: 22-12, Next: 22-13, Prev: 22-11, Up: Chapter 22 + +Termination Tests +================= + +While the for and repeat clauses provide the basic infrastructure for +controlling the number of iterations, sometimes you'll need to break out +of a loop early. You've already seen how a return clause or a RETURN or +RETURN-FROM within a do clause can immediately terminate the loop; but +just as there are common patterns for accumulating values, there are +also common patterns for deciding when it's time to bail on a loop. +These patterns are supported in LOOP by the termination clauses, while, +until, always, never, and thereis. They all follow the same pattern. + + loop-keyword test-form + + All five evaluate test-form each time through the iteration and +decide, based on the resulting value, whether to terminate the loop. +They differ in what happens after they terminate the loop-if they do-and +how they decide. + + The loop keywords while and until introduce the "mild" termination +clauses. When they decide to terminate the loop, control passes to the +epilogue, skipping the rest of the loop body. The epilogue can then +return a value or do whatever it wants to finish the loop. A while +clause terminates the loop the first time the test form is false; until, +conversely, stops it the first time the test form is true. + + Another form of mild termination is provided by the LOOP-FINISH +macro. This is a regular Lisp form, not a loop clause, so it can be +used anywhere within the Lisp forms of a do clause. It also causes an +immediate jump to the loop epilogue. It can be useful when the decision +to break out of the loop can't be easily condensed into a single form +that can be used with a while or until clause. + + The other three clauses-always, never, and thereis-terminate the loop +with extreme prejudice; they immediately return from the loop, skipping +not only any subsequent loop clauses but also the epilogue. They also +provide a default value for the loop even when they don't cause the loop +to terminate. However, if the loop is not terminated by one of these +termination tests, the epilogue is run and can return a value other than +the default provided by the termination clauses. + + Because these clauses provide their own return values, they can't be +combined with accumulation clauses unless the accumulation clause has an +into subclause. The compiler (or interpreter) should signal an error at +compile time if they are.The always and never clauses return only +boolean values, so they're most useful when you need to use a loop +expression as part of a predicate. You can use always to check that the +test form is true on every iteration of the loop. Conversely, never +tests that the test form evaluates to NIL on every iteration. If the +test form fails (returning NIL in an always clause or non-NIL in a never +clause), the loop is immediately terminated, returning NIL. If the loop +runs to completion, the default value of T is provided. + + For instance, if you want to test that all the numbers in a list, +numbers, are even, you can write this: + + (if (loop for n in numbers always (evenp n)) + (print "All numbers even.")) + + Equivalently you could write the following: + + (if (loop for n in numbers never (oddp n)) + (print "All numbers even.")) + + A thereis clause is used to test whether the test form is ever true. +As soon as the test form returns a non-NIL value, the loop is +terminated, returning that value. If the loop runs to completion, the +thereis clause provides a default return value of NIL. + + (loop for char across "abc123" thereis (digit-char-p char)) ==> 1 + + (loop for char across "abcdef" thereis (digit-char-p char)) ==> NIL + + +File: pcl.info, Node: 22-13, Next: Chapter 23, Prev: 22-12, Up: Chapter 22 + +Putting It All Together +======================= + +Now you've seen all the main features of the LOOP facility. You can +combine any of the clauses I've discussed as long as you abide by the +following rules: + + * The named clause, if any, must be the first clause. + * After the named clause come all the initially, with, for, and + repeat clauses. + * Then comes the body clauses: conditional and unconditional + execution, accumulation, and termination test. (1) + * End with any finally clauses. + + The LOOP macro will expand into code that performs the following +actions: + + * Initializes all local loop variables as declared with with or for + clauses as well as those implicitly created by accumulation + clauses. The initial value forms are evaluated in the order the + clauses appear in the loop. + * Execute the forms provided by any initially clauses-the prologue-in + the order they appear in the loop. + * Iterate, executing the body of the loop as described in the next + paragraph. + * Execute the forms provided by any finally clauses-the epilogue-in + the order they appear in the loop. + + While the loop is iterating, the body is executed by first stepping +any iteration control variables and then executing any conditional or +unconditional execution, accumulation, or termination test clauses in +the order they appear in the loop code. If any of the clauses in the +loop body terminate the loop, the rest of the body is skipped and the +loop returns, possibly after running the epilogue. + + And that's pretty much all there is to it. (2) You'll use LOOP +fairly often in the code later in this book, so it's worth having some +knowledge of it. Beyond that, it's up to you how much you use it. + + And with that, you're ready to dive into the practical chapters that +make up the rest of the book-up first, writing a spam filter. + + ---------- Footnotes ---------- + + (1) Some Common Lisp implementations will let you get away with +mixing body clauses and for clauses, but that's strictly undefined, and +some implementations will reject such loops. + + (2) The one aspect of LOOP I haven't touched on at all is the syntax +for declaring the types of loop variables. Of course, I haven't +discussed type declarations outside of LOOP either. I'll cover the +general topic a bit in Chapter 32. For information on how they work +with LOOP, consult your favorite Common Lisp reference. + + +File: pcl.info, Node: Chapter 23, Next: Chapter 24, Prev: Chapter 22, Up: Top + +23. Practical: A Spam Filter +============================ + +In 2002 Paul Graham, having some time on his hands after selling Viaweb +to Yahoo, wrote the essay "A Plan for Spam" (1) that launched a minor +revolution in spam-filtering technology. Prior to Graham's article, +most spam filters were written in terms of handcrafted rules: if a +message has XXX in the subject, it's probably a spam; if a message has a +more than three or more words in a row in ALL CAPITAL LETTERS, it's +probably a spam. Graham spent several months trying to write such a +rule-based filter before realizing it was fundamentally a soul-sucking +task. + + To recognize individual spam features you have to try to get into + the mind of the spammer, and frankly I want to spend as little time + inside the minds of spammers as possible. + + To avoid having to think like a spammer, Graham decided to try +distinguishing spam from nonspam, a.k.a. ham, based on statistics +gathered about which words occur in which kinds of e-mails. The filter +would keep track of how often specific words appear in both spam and ham +messages and then use the frequencies associated with the words in a new +message to compute a probability that it was either spam or ham. He +called his approach Bayesian filtering after the statistical technique +that he used to combine the individual word frequencies into an overall +probability. (2) + +* Menu: + +* 23-1:: The Heart of a Spam Filter +* 23-2:: Training the Filter +* 23-3:: Per-Word Statistics +* 23-4:: Combining Probabilities +* 23-5:: Inverse Chi Square +* 23-6:: Training the Filter +* 23-7:: Testing the Filter +* 23-8:: A Couple of Utility Functions +* 23-9:: Analyzing the Results +* 23-10:: What's Next + + ---------- Footnotes ---------- + + (1) Available at http://www.paulgraham.com/spam.html and also in +Hackers & Painters: Big Ideas from the Computer Age (O'Reilly, 2004) + + (2) There has since been some disagreement over whether the technique +Graham described was actually "Bayesian." However, the name has stuck +and is well on its way to becoming a synonym for "statistical" when +talking about spam filters. + + +File: pcl.info, Node: 23-1, Next: 23-2, Prev: Chapter 23, Up: Chapter 23 + +The Heart of a Spam Filter +========================== + +In this chapter, you'll implement the core of a spam-filtering engine. +You won't write a soup-to-nuts spam-filtering application; rather, +you'll focus on the functions for classifying new messages and training +the filter. + + This application is going to be large enough that it's worth defining +a new package to avoid name conflicts. For instance, in the source code +you can download from this book's Web site, I use the package name +COM.GIGAMONKEYS.SPAM, defining a package that uses both the standard +COMMON-LISP package and the COM.GIGAMONKEYS.PATHNAMES package from +Chapter 15, like this: + + (defpackage :com.gigamonkeys.spam + (:use :common-lisp :com.gigamonkeys.pathnames)) + + Any file containing code for this application should start with this +line: + + (in-package :com.gigamonkeys.spam) + + You can use the same package name or replace com.gigamonkeys with +some domain you control. (1) + + You can also type this same form at the REPL to switch to this +package to test the functions you write. In SLIME this will change the +prompt from CL-USER> to SPAM> like this: + + CL-USER> (in-package :com.gigamonkeys.spam) + # + SPAM> + + Once you have a package defined, you can start on the actual code. +The main function you'll need to implement has a simple job-take the +text of a message as an argument and classify the message as spam, ham, +or unsure. You can easily implement this basic function by defining it +in terms of other functions that you'll write in a moment. + + (defun classify (text) + (classification (score (extract-features text)))) + + Reading from the inside out, the first step in classifying a message +is to extract features to pass to the score function. In score you'll +compute a value that can then be translated into one of three +classifications-spam, ham, or unsure-by the function classification. Of +the three functions, classification is the simplest. You can assume +score will return a value near 1 if the message is a spam, near 0 if +it's a ham, and near .5 if it's unclear. + + Thus, you can implement classification like this: + + (defparameter *max-ham-score* .4) + (defparameter *min-spam-score* .6) + + (defun classification (score) + (cond + ((<= score *max-ham-score*) 'ham) + ((>= score *min-spam-score*) 'spam) + (t 'unsure))) + + The extract-features function is almost as straightforward, though it +requires a bit more code. For the moment, the features you'll extract +will be the words appearing in the text. For each word, you need to +keep track of the number of times it has been seen in a spam and the +number of times it has been seen in a ham. A convenient way to keep +those pieces of data together with the word itself is to define a class, +word-feature, with three slots. + + (defclass word-feature () + ((word + :initarg :word + :accessor word + :initform (error "Must supply :word") + :documentation "The word this feature represents.") + (spam-count + :initarg :spam-count + :accessor spam-count + :initform 0 + :documentation "Number of spams we have seen this feature in.") + (ham-count + :initarg :ham-count + :accessor ham-count + :initform 0 + :documentation "Number of hams we have seen this feature in."))) + + You'll keep the database of features in a hash table so you can +easily find the object representing a given feature. You can define a +special variable, *feature-database*, to hold a reference to this hash +table. + + (defvar *feature-database* (make-hash-table :test #'equal)) + + You should use DEFVAR rather than DEFPARAMETER because you don't want +*feature-database* to be reset if you happen to reload the file +containing this definition during development-you might have data stored +in *feature-database* that you don't want to lose. Of course, that +means if you do want to clear out the feature database, you can't just +reevaluate the DEFVAR form. So you should define a function +clear-database. + + (defun clear-database () + (setf *feature-database* (make-hash-table :test #'equal))) + + To find the features present in a given message, the code will need +to extract the individual words and then look up the corresponding +word-feature object in *feature-database*. If *feature-database* +contains no such feature, it'll need to create a new word-feature to +represent the word. You can encapsulate that bit of logic in a +function, intern-feature, that takes a word and returns the appropriate +feature, creating it if necessary. + + (defun intern-feature (word) + (or (gethash word *feature-database*) + (setf (gethash word *feature-database*) + (make-instance 'word-feature :word word)))) + + You can extract the individual words from the message text using a +regular expression. For example, using the Common Lisp Portable +Perl-Compatible Regular Expression (CL-PPCRE) library written by Edi +Weitz, you can write extract-words like this: (2) + + (defun extract-words (text) + (delete-duplicates + (cl-ppcre:all-matches-as-strings "[a-zA-Z]{3,}" text) + :test #'string=)) + + Now all that remains to implement extract-features is to put +extract-features and intern-feature together. Since extract-words +returns a list of strings and you want a list with each string +translated to the corresponding word-feature, this is a perfect time to +use MAPCAR. + + (defun extract-features (text) + (mapcar #'intern-feature (extract-words text))) + + You can test these functions at the REPL like this: + + SPAM> (extract-words "foo bar baz") + ("foo" "bar" "baz") + + And you can make sure the DELETE-DUPLICATES is working like this: + + SPAM> (extract-words "foo bar baz foo bar") + ("baz" "foo" "bar") + + You can also test extract-features. + + SPAM> (extract-features "foo bar baz foo bar") + (# # + #) + + However, as you can see, the default method for printing arbitrary +objects isn't very informative. As you work on this program, it'll be +useful to be able to print word-feature objects in a less opaque way. +Luckily, as I mentioned in Chapter 17, the printing of all objects is +implemented in terms of a generic function PRINT-OBJECT, so to change +the way word-feature objects are printed, you just need to define a +method on PRINT-OBJECT that specializes on word-feature. To make +implementing such methods easier, Common Lisp provides the macro +PRINT-UNREADABLE-OBJECT. (3) + + The basic form of PRINT-UNREADABLE-OBJECT is as follows: + + (print-unreadable-object (object stream-variable &key type identity) + body-form*) + + The object argument is an expression that evaluates to the object to +be printed. Within the body of PRINT-UNREADABLE-OBJECT, stream-variable +is bound to a stream to which you can print anything you want. Whatever +you print to that stream will be output by PRINT-UNREADABLE-OBJECT and +enclosed in the standard syntax for unreadable objects, #<>. (4) + + PRINT-UNREADABLE-OBJECT also lets you include the type of the object +and an indication of the object's identity via the keyword parameters +type and identity. If they're non-NIL, the output will start with the +name of the object's class and end with an indication of the object's +identity similar to what's printed by the default PRINT-OBJECT method +for STANDARD-OBJECTs. For word-feature, you probably want to define a +PRINT-OBJECT method that includes the type but not the identity along +with the values of the word, ham-count, and spam-count slots. Such a +method would look like this: + + (defmethod print-object ((object word-feature) stream) + (print-unreadable-object (object stream :type t) + (with-slots (word ham-count spam-count) object + (format stream "~s :hams ~d :spams ~d" word ham-count spam-count)))) + + Now when you test extract-features at the REPL, you can see more +clearly what features are being extracted. + + SPAM> (extract-features "foo bar baz foo bar") + (# + # + #) + + ---------- Footnotes ---------- + + (1) It would, however, be poor form to distribute a version of this +application using a package starting with com.gigamonkeys since you +don't control that domain. + + (2) A version of CL-PPCRE is included with the book's source code +available from the book's Web site. Or you can download it from Weitz's +site at http://www.weitz.de/cl-ppcre/. + + (3) The main reason to use PRINT-UNREADABLE-OBJECT is that it takes +care of signaling the appropriate error if someone tries to print your +object readably, such as with the ~S FORMAT directive. + + (4) PRINT-UNREADABLE-OBJECT also signals an error if it's used when +the printer control variable *PRINT-READABLY* is true. Thus, a +PRINT-OBJECT method consisting solely of a PRINT-UNREADABLE-OBJECT form +will correctly implement the PRINT-OBJECT contract with regard to +*PRINT-READABLY*. + + +File: pcl.info, Node: 23-2, Next: 23-3, Prev: 23-1, Up: Chapter 23 + +Training the Filter +=================== + +Now that you have a way to keep track of individual features, you're +almost ready to implement score. But first you need to write the code +you'll use to train the spam filter so score will have some data to use. +You'll define a function, train, that takes some text and a symbol +indicating what kind of message it is-ham or spam-and that increments +either the ham count or the spam count of all the features present in +the text as well as a global count of hams or spams processed. Again, +you can take a top-down approach and implement it in terms of other +functions that don't yet exist. + + (defun train (text type) + (dolist (feature (extract-features text)) + (increment-count feature type)) + (increment-total-count type)) + + You've already written extract-features, so next up is +increment-count, which takes a word-feature and a message type and +increments the appropriate slot of the feature. Since there's no reason +to think that the logic of incrementing these counts is going to change +for different kinds of objects, you can write this as a regular +function. (1) Because you defined both ham-count and spam-count with an +:accessor option, you can use INCF and the accessor functions created by +DEFCLASS to increment the appropriate slot. + + (defun increment-count (feature type) + (ecase type + (ham (incf (ham-count feature))) + (spam (incf (spam-count feature))))) + + The ECASE construct is a variant of CASE, both of which are similar +to case statements in Algol-derived languages (renamed switch in C and +its progeny). They both evaluate their first argument-the key form-and +then find the clause whose first element-the key-is the same value +according to EQL. In this case, that means the variable type is +evaluated, yielding whatever value was passed as the second argument to +increment-count. + + The keys aren't evaluated. In other words, the value of type will be +compared to the literal objects read by the Lisp reader as part of the +ECASE form. In this function, that means the keys are the symbols ham +and spam, not the values of any variables named ham and spam. So, if +increment-count is called like this: + + (increment-count some-feature 'ham) + + the value of type will be the symbol ham, and the first branch of the +ECASE will be evaluated and the feature's ham count incremented. On the +other hand, if it's called like this: + + (increment-count some-feature 'spam) + + then the second branch will run, incrementing the spam count. Note +that the symbols ham and spam are quoted when calling increment-count +since otherwise they'd be evaluated as the names of variables. But +they're not quoted when they appear in ECASE since ECASE doesn't +evaluate the keys. (2) + + The E in ECASE stands for "exhaustive" or "error," meaning ECASE +should signal an error if the key value is anything other than one of +the keys listed. The regular CASE is looser, returning NIL if no +matching clause is found. + + To implement increment-total-count, you need to decide where to store +the counts; for the moment, two more special variables, *total-spams* +and *total-hams*, will do fine. + + (defvar *total-spams* 0) + (defvar *total-hams* 0) + + (defun increment-total-count (type) + (ecase type + (ham (incf *total-hams*)) + (spam (incf *total-spams*)))) + + You should use DEFVAR to define these two variables for the same +reason you used it with *feature-database*-they'll hold data built up +while you run the program that you don't necessarily want to throw away +just because you happen to reload your code during development. But +you'll want to reset those variables if you ever reset +*feature-database*, so you should add a few lines to clear-database as +shown here: + + (defun clear-database () + (setf + *feature-database* (make-hash-table :test #'equal) + *total-spams* 0 + *total-hams* 0)) + + ---------- Footnotes ---------- + + (1) If you decide later that you do need to have different versions +of increment-feature for different classes, you can redefine +increment-count as a generic function and this function as a method +specialized on word-feature. + + (2) Technically, the key in each clause of a CASE or ECASE is +interpreted as a list designator, an object that designates a list of +objects. A single nonlist object, treated as a list designator, +designates a list containing just that one object, while a list +designates itself. Thus, each clause can have multiple keys; CASE and +ECASE will select the clause whose list of keys contains the value of +the key form. For example, if you wanted to make good a synonym for ham +and bad a synonym for spam, you could write increment-count like this: + + (defun increment-count (feature type) + (ecase type + ((ham good) (incf (ham-count feature))) + ((spam bad) (incf (spam-count feature))))) + + +File: pcl.info, Node: 23-3, Next: 23-4, Prev: 23-2, Up: Chapter 23 + +Per-Word Statistics +=================== + +The heart of a statistical spam filter is, of course, the functions that +compute statistics-based probabilities. The mathematical nuances (1) of +why exactly these computations work are beyond the scope of this +book-interested readers may want to refer to several papers by Gary +Robinson. (2) I'll focus rather on how they're implemented. + + The starting point for the statistical computations is the set of +measured values-the frequencies stored in *feature-database*, +*total-spams*, and *total-hams*. Assuming that the set of messages +trained on is statistically representative, you can treat the observed +frequencies as probabilities of the same features showing up in hams and +spams in future messages. + + The basic plan is to classify a message by extracting the features it +contains, computing the individual probability that a given message +containing the feature is a spam, and then combining all the individual +probabilities into a total score for the message. Messages with many +"spammy" features and few "hammy" features will receive a score near 1, +and messages with many hammy features and few spammy features will score +near 0. + + The first statistical function you need is one that computes the +basic probability that a message containing a given feature is a spam. +By one point of view, the probability that a given message containing +the feature is a spam is the ratio of spam messages containing the +feature to all messages containing the feature. Thus, you could compute +it this way: + + (defun spam-probability (feature) + (with-slots (spam-count ham-count) feature + (/ spam-count (+ spam-count ham-count)))) + + The problem with the value computed by this function is that it's +strongly affected by the overall probability that any message will be a +spam or a ham. For instance, suppose you get nine times as much ham as +spam in general. A completely neutral feature will then appear in one +spam for every nine hams, giving you a spam probability of 1/10 +according to this function. + + But you're more interested in the probability that a given feature +will appear in a spam message, independent of the overall probability of +getting a spam or ham. Thus, you need to divide the spam count by the +total number of spams trained on and the ham count by the total number +of hams. To avoid division-by-zero errors, if either of *total-spams* +or *total-hams* is zero, you should treat the corresponding frequency as +zero. (Obviously, if the total number of either spams or hams is zero, +then the corresponding per-feature count will also be zero, so you can +treat the resulting frequency as zero without ill effect.) + + (defun spam-probability (feature) + (with-slots (spam-count ham-count) feature + (let ((spam-frequency (/ spam-count (max 1 *total-spams*))) + (ham-frequency (/ ham-count (max 1 *total-hams*)))) + (/ spam-frequency (+ spam-frequency ham-frequency))))) + + This version suffers from another problem-it doesn't take into +account the number of messages analyzed to arrive at the per-word +probabilities. Suppose you've trained on 2,000 messages, half spam and +half ham. Now consider two features that have appeared only in spams. +One has appeared in all 1,000 spams, while the other appeared only once. +According to the current definition of spam-probability, the appearance +of either feature predicts that a message is spam with equal +probability, namely, 1. + + However, it's still quite possible that the feature that has appeared +only once is actually a neutral feature-it's obviously rare in either +spams or hams, appearing only once in 2,000 messages. If you trained on +another 2,000 messages, it might very well appear one more time, this +time in a ham, making it suddenly a neutral feature with a spam +probability of .5. + + So it seems you might like to compute a probability that somehow +factors in the number of data points that go into each feature's +probability. In his papers, Robinson suggested a function based on the +Bayesian notion of incorporating observed data into prior knowledge or +assumptions. Basically, you calculate a new probability by starting +with an assumed prior probability and a weight to give that assumed +probability before adding new information. Robinson's function is this: + + (defun bayesian-spam-probability (feature &optional + (assumed-probability 1/2) + (weight 1)) + (let ((basic-probability (spam-probability feature)) + (data-points (+ (spam-count feature) (ham-count feature)))) + (/ (+ (* weight assumed-probability) + (* data-points basic-probability)) + (+ weight data-points)))) + + Robinson suggests values of 1/2 for assumed-probability and 1 for +weight. Using those values, a feature that has appeared in one spam and +no hams has a bayesian-spam-probability of 0.75, a feature that has +appeared in 10 spams and no hams has a bayesian-spam-probability of +approximately 0.955, and one that has matched in 1,000 spams and no hams +has a spam probability of approximately 0.9995. + + ---------- Footnotes ---------- + + (1) Speaking of mathematical nuances, hard-core statisticians may be +offended by the sometimes loose use of the word probability in this +chapter. However, since even the pros, who are divided between the +Bayesians and the frequentists, can't agree on what a probability is, +I'm not going to worry about it. This is a book about programming, not +statistics. + + (2) Robinson's articles that directly informed this chapter are "A +Statistical Approach to the Spam Problem" (published in the Linux +Journal and available at http://www.linuxjournal.com/ +article.php?sid=6467 and in a shorter form on Robinson's blog at +http://radio.weblogs.com/ 0101454/stories/2002/09/16/spamDetection.html) +and "Why Chi? Motivations for the Use of Fisher's Inverse Chi-Square +Procedure in Spam Classification" (available at +http://garyrob.blogs.com/ whychi93.pdf). Another article that may be +useful is "Handling Redundancy in Email Token Probabilities" (available +at http://garyrob.blogs.com//handlingtokenredundancy94.pdf). The +archived mailing lists of the SpamBayes project +(http://spambayes.sourceforge.net/) also contain a lot of useful +information about different algorithms and approaches to testing spam +filters. + + +File: pcl.info, Node: 23-4, Next: 23-5, Prev: 23-3, Up: Chapter 23 + +Combining Probabilities +======================= + +Now that you can compute the bayesian-spam-probability of each +individual feature you find in a message, the last step in implementing +the score function is to find a way to combine a bunch of individual +probabilities into a single value between 0 and 1. + + If the individual feature probabilities were independent, then it'd +be mathematically sound to multiply them together to get a combined +probability. But it's unlikely they actually are independent-certain +features are likely to appear together, while others never do. (1) + + Robinson proposed using a method for combining probabilities invented +by the statistician R. A. Fisher. Without going into the details of +exactly why his technique works, it's this: First you combine the +probabilities by multiplying them together. This gives you a number +nearer to 0 the more low probabilities there were in the original set. +Then take the log of that number and multiply by -2. Fisher showed in +1950 that if the individual probabilities were independent and drawn +from a uniform distribution between 0 and 1, then the resulting value +would be on a chi-square distribution. This value and twice the number +of probabilities can be fed into an inverse chi-square function, and +it'll return the probability that reflects the likelihood of obtaining a +value that large or larger by combining the same number of randomly +selected probabilities. When the inverse chi-square function returns a +low probability, it means there was a disproportionate number of low +probabilities (either a lot of relatively low probabilities or a few +very low probabilities) in the individual probabilities. + + To use this probability in determining whether a given message is a +spam, you start with a null hypothesis, a straw man you hope to knock +down. The null hypothesis is that the message being classified is in +fact just a random collection of features. If it were, then the +individual probabilities-the likelihood that each feature would appear +in a spam-would also be random. That is, a random selection of features +would usually contain some features with a high probability of appearing +in spam and other features with a low probability of appearing in spam. +If you were to combine these randomly selected probabilities according +to Fisher's method, you should get a middling combined value, which the +inverse chi-square function will tell you is quite likely to arise just +by chance, as, in fact, it would have. But if the inverse chi-square +function returns a very low probability, it means it's unlikely the +probabilities that went into the combined value were selected at random; +there were too many low probabilities for that to be likely. So you can +reject the null hypothesis and instead adopt the alternative hypothesis +that the features involved were drawn from a biased sample-one with few +high spam probability features and many low spam probability features. +In other words, it must be a ham message. + + However, the Fisher method isn't symmetrical since the inverse +chi-square function returns the probability that a given number of +randomly selected probabilities would combine to a value as large or +larger than the one you got by combining the actual probabilities. This +asymmetry works to your advantage because when you reject the null +hypothesis, you know what the more likely hypothesis is. When you +combine the individual spam probabilities via the Fisher method, and it +tells you there's a high probability that the null hypothesis is +wrong-that the message isn't a random collection of words-then it means +it's likely the message is a ham. The number returned is, if not +literally the probability that the message is a ham, at least a good +measure of its "hamminess." Conversely, the Fisher combination of the +individual ham probabilities gives you a measure of the message's +"spamminess." + + To get a final score, you need to combine those two measures into a +single number that gives you a combined hamminess-spamminess score +ranging from 0 to 1. The method recommended by Robinson is to add half +the difference between the hamminess and spamminess scores to 1/2, in +other words, to average the spamminess and 1 minus the hamminess. This +has the nice effect that when the two scores agree (high spamminess and +low hamminess, or vice versa) you'll end up with a strong indicator near +either 0 or 1. But when the spamminess and hamminess scores are both +high or both low, then you'll end up with a final value near 1/2, which +you can treat as an "uncertain" classification. + + The score function that implements this scheme looks like this: + + (defun score (features) + (let ((spam-probs ()) (ham-probs ()) (number-of-probs 0)) + (dolist (feature features) + (unless (untrained-p feature) + (let ((spam-prob (float (bayesian-spam-probability feature) 0.0d0))) + (push spam-prob spam-probs) + (push (- 1.0d0 spam-prob) ham-probs) + (incf number-of-probs)))) + (let ((h (- 1 (fisher spam-probs number-of-probs))) + (s (- 1 (fisher ham-probs number-of-probs)))) + (/ (+ (- 1 h) s) 2.0d0)))) + + You take a list of features and loop over them, building up two lists +of probabilities, one listing the probabilities that a message +containing each feature is a spam and the other that a message +containing each feature is a ham. As an optimization, you can also +count the number of probabilities while looping over them and pass the +count to fisher to avoid having to count them again in fisher itself. +The value returned by fisher will be low if the individual probabilities +contained too many low probabilities to have come from random text. +Thus, a low fisher score for the spam probabilities means there were +many hammy features; subtracting that score from 1 gives you a +probability that the message is a ham. Conversely, subtracting the +fisher score for the ham probabilities gives you the probability that +the message was a spam. Combining those two probabilities gives you an +overall spamminess score between 0 and 1. + + Within the loop, you can use the function untrained-p to skip +features extracted from the message that were never seen during +training. These features will have spam counts and ham counts of zero. +The untrained-p function is trivial. + + (defun untrained-p (feature) + (with-slots (spam-count ham-count) feature + (and (zerop spam-count) (zerop ham-count)))) + + The only other new function is fisher itself. Assuming you already +had an inverse-chi-square function, fisher is conceptually simple. + + (defun fisher (probs number-of-probs) + "The Fisher computation described by Robinson." + (inverse-chi-square + (* -2 (log (reduce #'* probs))) + (* 2 number-of-probs))) + + Unfortunately, there's a small problem with this straightforward +implementation. While using REDUCE is a concise and idiomatic way of +multiplying a list of numbers, in this particular application there's a +danger the product will be too small a number to be represented as a +floating-point number. In that case, the result will underflow to zero. +And if the product of the probabilities underflows, all bets are off +because taking the LOG of zero will either signal an error or, in some +implementation, result in a special negative-infinity value, which will +render all subsequent calculations essentially meaningless. This is +particularly unfortunate in this function because the Fisher method is +most sensitive when the input probabilities are low-near zero-and +therefore in the most danger of causing the multiplication to underflow. + + Luckily, you can use a bit of high-school math to avoid this problem. +Recall that the log of a product is the same as the sum of the logs of +the factors. So instead of multiplying all the probabilities and then +taking the log, you can sum the logs of each probability. And since +REDUCE takes a :key keyword parameter, you can use it to perform the +whole calculation. Instead of this: + + (log (reduce #'* probs)) + + write this: + + (reduce #'+ probs :key #'log) + + ---------- Footnotes ---------- + + (1) Techniques that combine nonindependent probabilities as though +they were, in fact, independent, are called naive Bayesian. Graham's +original proposal was essentially a naive Bayesian classifier with some +"empirically derived" constant factors thrown in. + + +File: pcl.info, Node: 23-5, Next: 23-6, Prev: 23-4, Up: Chapter 23 + +Inverse Chi Square +================== + +The implementation of inverse-chi-square in this section is a fairly +straightforward translation of a version written in Python by Robinson. +The exact mathematical meaning of this function is beyond the scope of +this book, but you can get an intuitive sense of what it does by +thinking about how the values you pass to fisher will affect the result: +the more low probabilities you pass to fisher, the smaller the product +of the probabilities will be. The log of a small product will be a +negative number with a large absolute value, which is then multiplied by +-2, making it an even larger positive number. Thus, the more low +probabilities were passed to fisher, the larger the value it'll pass to +inverse-chi-square. Of course, the number of probabilities involved +also affects the value passed to inverse-chi-square. Since +probabilities are, by definition, less than or equal to 1, the more +probabilities that go into a product, the smaller it'll be and the +larger the value passed to inverse-chi-square. Thus, inverse-chi-square +should return a low probability when the Fisher combined value is +abnormally large for the number of probabilities that went into it. The +following function does exactly that: + + (defun inverse-chi-square (value degrees-of-freedom) + (assert (evenp degrees-of-freedom)) + (min + (loop with m = (/ value 2) + for i below (/ degrees-of-freedom 2) + for prob = (exp (- m)) then (* prob (/ m i)) + summing prob) + 1.0)) + + Recall from Chapter 10 that EXP raises e to the argument given. +Thus, the larger value is, the smaller the initial value of prob will +be. But that initial value will then be adjusted upward slightly for +each degree of freedom as long as m is greater than the number of +degrees of freedom. Since the value returned by inverse-chi-square is +supposed to be another probability, it's important to clamp the value +returned with MIN since rounding errors in the multiplication and +exponentiation may cause the LOOP to return a sum just a shade over 1. + + +File: pcl.info, Node: 23-6, Next: 23-7, Prev: 23-5, Up: Chapter 23 + +Training the Filter +=================== + +Since you wrote classify and train to take a string argument, you can +test them easily at the REPL. If you haven't yet, you should switch to +the package in which you've been writing this code by evaluating an +IN-PACKAGE form at the REPL or using the SLIME shortcut change-package. +To use the SLIME shortcut, type a comma at the REPL and then type the +name at the prompt. Pressing Tab while typing the package name will +autocomplete based on the packages your Lisp knows about. Now you can +invoke any of the functions that are part of the spam application. You +should first make sure the database is empty. + + SPAM> (clear-database) + + Now you can train the filter with some text. + + SPAM> (train "Make money fast" 'spam) + + And then see what the classifier thinks. + + SPAM> (classify "Make money fast") + SPAM + SPAM> (classify "Want to go to the movies?") + UNSURE + + While ultimately all you care about is the classification, it'd be +nice to be able to see the raw score too. The easiest way to get both +values without disturbing any other code is to change classification to +return multiple values. + + (defun classification (score) + (values + (cond + ((<= score *max-ham-score*) 'ham) + ((>= score *min-spam-score*) 'spam) + (t 'unsure)) + score)) + + You can make this change and then recompile just this one function. +Because classify returns whatever classification returns, it'll also now +return two values. But since the primary return value is the same, +callers of either function who expect only one value won't be affected. +Now when you test classify, you can see exactly what score went into the +classification. + + SPAM> (classify "Make money fast") + SPAM + 0.863677101854273D0 + SPAM> (classify "Want to go to the movies?") + UNSURE + 0.5D0 + + And now you can see what happens if you train the filter with some +more ham text. + + SPAM> (train "Do you have any money for the movies?" 'ham) + 1 + SPAM> (classify "Make money fast") + SPAM + 0.7685351219857626D0 + + It's still spam but a bit less certain since money was seen in ham +text. + + SPAM> (classify "Want to go to the movies?") + HAM + 0.17482223132078922D0 + + And now this is clearly recognizable ham thanks to the presence of +the word movies, now a hammy feature. + + However, you don't really want to train the filter by hand. What +you'd really like is an easy way to point it at a bunch of files and +train it on them. And if you want to test how well the filter actually +works, you'd like to then use it to classify another set of files of +known types and see how it does. So the last bit of code you'll write +in this chapter will be a test harness that tests the filter on a corpus +of messages of known types, using a certain fraction for training and +then measuring how accurate the filter is when classifying the +remainder. + + +File: pcl.info, Node: 23-7, Next: 23-8, Prev: 23-6, Up: Chapter 23 + +Testing the Filter +================== + +To test the filter, you need a corpus of messages of known types. You +can use messages lying around in your inbox, or you can grab one of the +corpora available on the Web. For instance, the SpamAssassin corpus (1) +contains several thousand messages hand classified as spam, easy ham, +and hard ham. To make it easy to use whatever files you have, you can +define a test rig that's driven off an array of file/type pairs. You +can define a function that takes a filename and a type and adds it to +the corpus like this: + + (defun add-file-to-corpus (filename type corpus) + (vector-push-extend (list filename type) corpus)) + + The value of corpus should be an adjustable vector with a fill +pointer. For instance, you can make a new corpus like this: + + (defparameter *corpus* (make-array 1000 :adjustable t :fill-pointer 0)) + + If you have the hams and spams already segregated into separate +directories, you might want to add all the files in a directory as the +same type. This function, which uses the list-directory function from +Chapter 15, will do the trick: + + (defun add-directory-to-corpus (dir type corpus) + (dolist (filename (list-directory dir)) + (add-file-to-corpus filename type corpus))) + + For instance, suppose you have a directory mail containing two +subdirectories, spam and ham, each containing messages of the indicated +type; you can add all the files in those two directories to *corpus* +like this: + + SPAM> (add-directory-to-corpus "mail/spam/" 'spam *corpus*) + NIL + SPAM> (add-directory-to-corpus "mail/ham/" 'ham *corpus*) + NIL + + Now you need a function to test the classifier. The basic strategy +will be to select a random chunk of the corpus to train on and then test +the corpus by classifying the remainder of the corpus, comparing the +classification returned by the classify function to the known +classification. The main thing you want to know is how accurate the +classifier is-what percentage of the messages are classified correctly? +But you'll probably also be interested in what messages were +misclassified and in what direction-were there more false positives or +more false negatives? To make it easy to perform different analyses of +the classifier's behavior, you should define the testing functions to +build a list of raw results, which you can then analyze however you +like. + + The main testing function might look like this: + + (defun test-classifier (corpus testing-fraction) + (clear-database) + (let* ((shuffled (shuffle-vector corpus)) + (size (length corpus)) + (train-on (floor (* size (- 1 testing-fraction))))) + (train-from-corpus shuffled :start 0 :end train-on) + (test-from-corpus shuffled :start train-on))) + + This function starts by clearing out the feature database. (2) Then +it shuffles the corpus, using a function you'll implement in a moment, +and figures out, based on the testing-fraction parameter, how many +messages it'll train on and how many it'll reserve for testing. The two +helper functions train-from-corpus and test-from-corpus will both take +:start and :end keyword parameters, allowing them to operate on a +subsequence of the given corpus. + + The train-from-corpus function is quite simple-simply loop over the +appropriate part of the corpus, use DESTRUCTURING-BIND to extract the +filename and type from the list found in each element, and then pass the +text of the named file and the type to train. Since some mail messages, +such as those with attachments, are quite large, you should limit the +number of characters it'll take from the message. It'll obtain the text +with a function start-of-file, which you'll implement in a moment, that +takes a filename and a maximum number of characters to return. +train-from-corpus looks like this: + + (defparameter *max-chars* (* 10 1024)) + + (defun train-from-corpus (corpus &key (start 0) end) + (loop for idx from start below (or end (length corpus)) do + (destructuring-bind (file type) (aref corpus idx) + (train (start-of-file file *max-chars*) type)))) + + The test-from-corpus function is similar except you want to return a +list containing the results of each classification so you can analyze +them after the fact. Thus, you should capture both the classification +and score returned by classify and then collect a list of the filename, +the actual type, the type returned by classify, and the score. To make +the results more human readable, you can include keywords in the list to +indicate which values are which. + + (defun test-from-corpus (corpus &key (start 0) end) + (loop for idx from start below (or end (length corpus)) collect + (destructuring-bind (file type) (aref corpus idx) + (multiple-value-bind (classification score) + (classify (start-of-file file *max-chars*)) + (list + :file file + :type type + :classification classification + :score score))))) + + ---------- Footnotes ---------- + + (1) Several spam corpora including the SpamAssassin corpus are linked +to from http://nexp.cs.pdx.edu/~psam/cgi-bin/view/PSAM/CorpusSets. + + (2) If you wanted to conduct a test without disturbing the existing +database, you could bind *feature-database*, *total-spams*, and +*total-hams* with a LET, but then you'd have no way of looking at the +database after the fact-unless you returned the values you used within +the function. + + +File: pcl.info, Node: 23-8, Next: 23-9, Prev: 23-7, Up: Chapter 23 + +A Couple of Utility Functions +============================= + +To finish the implementation of test-classifier, you need to write the +two utility functions that don't really have anything particularly to do +with spam filtering, shuffle-vector and start-of-file. + + An easy and efficient way to implement shuffle-vector is using the +Fisher-Yates algorithm. (1) You can start by implementing a function, +nshuffle-vector, that shuffles a vector in place. This name follows the +same naming convention of other destructive functions such as NCONC and +NREVERSE. It looks like this: + + (defun nshuffle-vector (vector) + (loop for idx downfrom (1- (length vector)) to 1 + for other = (random (1+ idx)) + do (unless (= idx other) + (rotatef (aref vector idx) (aref vector other)))) + vector) + + The nondestructive version simply makes a copy of the original vector +and passes it to the destructive version. + + (defun shuffle-vector (vector) + (nshuffle-vector (copy-seq vector))) + + The other utility function, start-of-file, is almost as +straightforward with just one wrinkle. The most efficient way to read +the contents of a file into memory is to create an array of the +appropriate size and use READ-SEQUENCE to fill it in. So it might seem +you could make a character array that's either the size of the file or +the maximum number of characters you want to read, whichever is smaller. +Unfortunately, as I mentioned in Chapter 14, the function FILE-LENGTH +isn't entirely well defined when dealing with character streams since +the number of characters encoded in a file can depend on both the +character encoding used and the particular text in the file. In the +worst case, the only way to get an accurate measure of the number of +characters in a file is to actually read the whole file. Thus, it's +ambiguous what FILE-LENGTH should do when passed a character stream; in +most implementations, FILE-LENGTH always returns the number of octets in +the file, which may be greater than the number of characters that can be +read from the file. + + However, READ-SEQUENCE returns the number of characters actually +read. So, you can attempt to read the number of characters reported by +FILE-LENGTH and return a substring if the actual number of characters +read was smaller. + + (defun start-of-file (file max-chars) + (with-open-file (in file) + (let* ((length (min (file-length in) max-chars)) + (text (make-string length)) + (read (read-sequence text in))) + (if (< read length) + (subseq text 0 read) + text)))) + + ---------- Footnotes ---------- + + (1) This algorithm is named for the same Fisher who invented the +method used for combining probabilities and for Frank Yates, his +coauthor of the book Statistical Tables for Biological, Agricultural and +Medical Research (Oliver & Boyd, 1938) in which, according to Knuth, +they provided the first published description of the algorithm. + + +File: pcl.info, Node: 23-9, Next: 23-10, Prev: 23-8, Up: Chapter 23 + +Analyzing the Results +===================== + +Now you're ready to write some code to analyze the results generated by +test-classifier. Recall that test-classifier returns the list returned +by test-from-corpus in which each element is a plist representing the +result of classifying one file. This plist contains the name of the +file, the actual type of the file, the classification, and the score +returned by classify. The first bit of analytical code you should write +is a function that returns a symbol indicating whether a given result +was correct, a false positive, a false negative, a missed ham, or a +missed spam. You can use DESTRUCTURING-BIND to pull out the :type and +:classification elements of an individual result list (using +&allow-other-keys to tell DESTRUCTURING-BIND to ignore any other +key/value pairs it sees) and then use nested ECASE to translate the +different pairings into a single symbol. + + (defun result-type (result) + (destructuring-bind (&key type classification &allow-other-keys) result + (ecase type + (ham + (ecase classification + (ham 'correct) + (spam 'false-positive) + (unsure 'missed-ham))) + (spam + (ecase classification + (ham 'false-negative) + (spam 'correct) + (unsure 'missed-spam)))))) + + You can test out this function at the REPL. + + SPAM> (result-type '(:FILE #p"foo" :type ham :classification ham :score 0)) + CORRECT + SPAM> (result-type '(:FILE #p"foo" :type spam :classification spam :score 0)) + CORRECT + SPAM> (result-type '(:FILE #p"foo" :type ham :classification spam :score 0)) + FALSE-POSITIVE + SPAM> (result-type '(:FILE #p"foo" :type spam :classification ham :score 0)) + FALSE-NEGATIVE + SPAM> (result-type '(:FILE #p"foo" :type ham :classification unsure :score 0)) + MISSED-HAM + SPAM> (result-type '(:FILE #p"foo" :type spam :classification unsure :score 0)) + MISSED-SPAM + + Having this function makes it easy to slice and dice the results of +test-classifier in a variety of ways. For instance, you can start by +defining predicate functions for each type of result. + + (defun false-positive-p (result) + (eql (result-type result) 'false-positive)) + + (defun false-negative-p (result) + (eql (result-type result) 'false-negative)) + + (defun missed-ham-p (result) + (eql (result-type result) 'missed-ham)) + + (defun missed-spam-p (result) + (eql (result-type result) 'missed-spam)) + + (defun correct-p (result) + (eql (result-type result) 'correct)) + + With those functions, you can easily use the list and sequence +manipulation functions I discussed in Chapter 11 to extract and count +particular kinds of results. + + SPAM> (count-if #'false-positive-p *results*) + 6 + SPAM> (remove-if-not #'false-positive-p *results*) + ((:FILE #p"ham/5349" :TYPE HAM :CLASSIFICATION SPAM :SCORE 0.9999983107355541d0) + (:FILE #p"ham/2746" :TYPE HAM :CLASSIFICATION SPAM :SCORE 0.6286468956619795d0) + (:FILE #p"ham/3427" :TYPE HAM :CLASSIFICATION SPAM :SCORE 0.9833753501352983d0) + (:FILE #p"ham/7785" :TYPE HAM :CLASSIFICATION SPAM :SCORE 0.9542788587998488d0) + (:FILE #p"ham/1728" :TYPE HAM :CLASSIFICATION SPAM :SCORE 0.684339162891261d0) + (:FILE #p"ham/10581" :TYPE HAM :CLASSIFICATION SPAM :SCORE 0.9999924537959615d0)) + + You can also use the symbols returned by result-type as keys into a +hash table or an alist. For instance, you can write a function to print +a summary of the counts and percentages of each type of result using an +alist that maps each type plus the extra symbol total to a count. + + (defun analyze-results (results) + (let* ((keys '(total correct false-positive + false-negative missed-ham missed-spam)) + (counts (loop for x in keys collect (cons x 0)))) + (dolist (item results) + (incf (cdr (assoc 'total counts))) + (incf (cdr (assoc (result-type item) counts)))) + (loop with total = (cdr (assoc 'total counts)) + for (label . count) in counts + do (format t "~&~@(~a~):~20t~5d~,5t: ~6,2f%~%" + label count (* 100 (/ count total)))))) + + This function will give output like this when passed a list of +results generated by test-classifier: + + SPAM> (analyze-results *results*) + Total: 3761 : 100.00% + Correct: 3689 : 98.09% + False-positive: 4 : 0.11% + False-negative: 9 : 0.24% + Missed-ham: 19 : 0.51% + Missed-spam: 40 : 1.06% + NIL + + And as a last bit of analysis you might want to look at why an +individual message was classified the way it was. The following +functions will show you: + + (defun explain-classification (file) + (let* ((text (start-of-file file *max-chars*)) + (features (extract-features text)) + (score (score features)) + (classification (classification score))) + (show-summary file text classification score) + (dolist (feature (sorted-interesting features)) + (show-feature feature)))) + + (defun show-summary (file text classification score) + (format t "~&~a" file) + (format t "~2%~a~2%" text) + (format t "Classified as ~a with score of ~,5f~%" classification score)) + + (defun show-feature (feature) + (with-slots (word ham-count spam-count) feature + (format + t "~&~2t~a~30thams: ~5d; spams: ~5d;~,10tprob: ~,f~%" + word ham-count spam-count (bayesian-spam-probability feature)))) + + (defun sorted-interesting (features) + (sort (remove-if #'untrained-p features) #'< :key #'bayesian-spam-probability)) + + +File: pcl.info, Node: 23-10, Next: Chapter 24, Prev: 23-9, Up: Chapter 23 + +What's Next +=========== + +Obviously, you could do a lot more with this code. To turn it into a +real spam-filtering application, you'd need to find a way to integrate +it into your normal e-mail infrastructure. One approach that would make +it easy to integrate with almost any e-mail client is to write a bit of +code to act as a POP3 proxy-that's the protocol most e-mail clients use +to fetch mail from mail servers. Such a proxy would fetch mail from +your real POP3 server and serve it to your mail client after either +tagging spam with a header that your e-mail client's filters can easily +recognize or simply putting it aside. Of course, you'd also need a way +to communicate with the filter about misclassifications-as long as +you're setting it up as a server, you could also provide a Web +interface. I'll talk about how to write Web interfaces in Chapter 26, +and you'll build one, for a different application, in Chapter 29. + + Or you might want to work on improving the basic classification-a +likely place to start is to make extract-features more sophisticated. +In particular, you could make the tokenizer smarter about the internal +structure of e-mail-you could extract different kinds of features for +words appearing in the body versus the message headers. And you could +decode various kinds of message encoding such as base 64 and quoted +printable since spammers often try to obfuscate their message with those +encodings. + + But I'll leave those improvements to you. Now you're ready to head +down the path of building a streaming MP3 server, starting by writing a +general-purpose library for parsing binary files. + + +File: pcl.info, Node: Chapter 24, Next: Chapter 25, Prev: Chapter 23, Up: Top + +24. Practical: Parsing Binary Files +=================================== + +In this chapter I'll show you how to build a library that you can use to +write code for reading and writing binary files. You'll use this +library in Chapter 25 to write a parser for ID3 tags, the mechanism used +to store metadata such as artist and album names in MP3 files. This +library is also an example of how to use macros to extend the language +with new constructs, turning it into a special-purpose language for +solving a particular problem, in this case reading and writing binary +data. Because you'll develop the library a bit at a time, including +several partial versions, it may seem you're writing a lot of code. But +when all is said and done, the whole library is fewer than 150 lines of +code, and the longest macro is only 20 lines long. + +* Menu: + +* 24-1:: Binary Files +* 24-2:: Binary Format Basics +* 24-3:: Strings in Binary Files +* 24-4:: Composite Structures +* 24-5:: Designing the Macros +* 24-6:: Making the Dream a Reality +* 24-7:: Reading Binary Objects +* 24-8:: Writing Binary Objects +* 24-9:: Adding Inheritance and Tagged Structures +* 24-10:: Keeping Track of Inherited Slots +* 24-11:: Tagged Structures +* 24-12:: Primitive Binary Types +* 24-13:: The Current Object Stack + + +File: pcl.info, Node: 24-1, Next: 24-2, Prev: Chapter 24, Up: Chapter 24 + +Binary Files +============ + +At a sufficiently low level of abstraction, all files are "binary" in +the sense that they just contain a bunch of numbers encoded in binary +form. However, it's customary to distinguish between text files, where +all the numbers can be interpreted as characters representing +human-readable text, and binary files, which contain data that, if +interpreted as characters, yields nonprintable characters. (1) + + Binary file formats are usually designed to be both compact and +efficient to parse-that's their main advantage over text-based formats. +To meet both those criteria, they're usually composed of on-disk +structures that are easily mapped to data structures that a program +might use to represent the same data in memory. (2) + + The library will give you an easy way to define the mapping between +the on-disk structures defined by a binary file format and in-memory +Lisp objects. Using the library, it should be easy to write a program +that can read a binary file, translating it into Lisp objects that you +can manipulate, and then write back out to another properly formatted +binary file. + + ---------- Footnotes ---------- + + (1) In ASCII, the first 32 characters are nonprinting control +characters originally used to control the behavior of a Teletype +machine, causing it to do such things as sound the bell, back up one +character, move to a new line, and move the carriage to the beginning of +the line. Of these 32 control characters, only three, the newline, +carriage return, and horizontal tab, are typically found in text files. + + (2) Some binary file formats are in-memory data structures-on many +operating systems it's possible to map a file into memory, and low-level +languages such as C can then treat the region of memory containing the +contents of the file just like any other memory; data written to that +area of memory is saved to the underlying file when it's unmapped. +However, these formats are platform-dependent since the in-memory +representation of even such simple data types as integers depends on the +hardware on which the program is running. Thus, any file format that's +intended to be portable must define a canonical representation for all +the data types it uses that can be mapped to the actual in-memory data +representation on a particular kind of machine or in a particular +language. + + +File: pcl.info, Node: 24-2, Next: 24-3, Prev: 24-1, Up: Chapter 24 + +Binary Format Basics +==================== + +The starting point for reading and writing binary files is to open the +file for reading or writing individual bytes. As I discussed in Chapter +14, both OPEN and WITH-OPEN-FILE accept a keyword argument, +:element-type, that controls the basic unit of transfer for the stream. +When you're dealing with binary files, you'll specify (unsigned-byte 8). +An input stream opened with such an :element-type will return an integer +between 0 and 255 each time it's passed to READ-BYTE. Conversely, you +can write bytes to an (unsigned-byte 8) output stream by passing numbers +between 0 and 255 to WRITE-BYTE. + + Above the level of individual bytes, most binary formats use a +smallish number of primitive data types-numbers encoded in various ways, +textual strings, bit fields, and so on-which are then composed into more +complex structures. So your first task is to define a framework for +writing code to read and write the primitive data types used by a given +binary format. + + To take a simple example, suppose you're dealing with a binary format +that uses an unsigned 16-bit integer as a primitive data type. To read +such an integer, you need to read the two bytes and then combine them +into a single number by multiplying one byte by 256, a.k.a. 2^8, and +adding it to the other byte. For instance, assuming the binary format +specifies that such 16-bit quantities are stored in big-endian (1) form, +with the most significant byte first, you can read such a number with +this function: + + (defun read-u2 (in) + (+ (* (read-byte in) 256) (read-byte in))) + + However, Common Lisp provides a more convenient way to perform this +kind of bit twiddling. The function LDB, whose name stands for load +byte, can be used to extract and set (with SETF) any number of +contiguous bits from an integer. (2) The number of bits and their +position within the integer is specified with a byte specifier created +with the BYTE function. BYTE takes two arguments, the number of bits to +extract (or set) and the position of the rightmost bit where the least +significant bit is at position zero. LDB takes a byte specifier and the +integer from which to extract the bits and returns the positive integer +represented by the extracted bits. Thus, you can extract the least +significant octet of an integer like this: + + (ldb (byte 8 0) #xabcd) ==> 205 ; 205 is #xcd + + To get the next octet, you'd use a byte specifier of (byte 8 8) like +this: + + (ldb (byte 8 8) #xabcd) ==> 171 ; 171 is #xab + + You can use LDB with SETF to set the specified bits of an integer +stored in a SETFable place. + + CL-USER> (defvar *num* 0) + *NUM* + CL-USER> (setf (ldb (byte 8 0) *num*) 128) + 128 + CL-USER> *num* + 128 + CL-USER> (setf (ldb (byte 8 8) *num*) 255) + 255 + CL-USER> *num* + 65408 + + Thus, you can also write read-u2 like this: (3) + + (defun read-u2 (in) + (let ((u2 0)) + (setf (ldb (byte 8 8) u2) (read-byte in)) + (setf (ldb (byte 8 0) u2) (read-byte in)) + u2)) + + To write a number out as a 16-bit integer, you need to extract the +individual 8-bit bytes and write them one at a time. To extract the +individual bytes, you just need to use LDB with the same byte +specifiers. + + (defun write-u2 (out value) + (write-byte (ldb (byte 8 8) value) out) + (write-byte (ldb (byte 8 0) value) out)) + + Of course, you can also encode integers in many other ways-with +different numbers of bytes, with different endianness, and in signed and +unsigned format. + + ---------- Footnotes ---------- + + (1) The term big-endian and its opposite, little-endian, borrowed +from Jonathan Swift's Gulliver's Travels, refer to the way a multibyte +number is represented in an ordered sequence of bytes such as in memory +or in a file. For instance, the number 43981, or abcd in hex, +represented as a 16-bit quantity, consists of two bytes, ab and cd. It +doesn't matter to a computer in what order these two bytes are stored as +long as everybody agrees. Of course, whenever there's an arbitrary +choice to be made between two equally good options, the one thing you +can be sure of is that everybody is not going to agree. For more than +you ever wanted to know about it, and to see where the terms big-endian +and little-endian were first applied in this fashion, read "On Holy Wars +and a Plea for Peace" by Danny Cohen, available at +http://khavrinen.lcs.mit.edu/wollman/ien-137.txt. + + (2) LDB and DPB, a related function, were named after the DEC PDP-10 +assembly functions that did essentially the same thing. Both functions +operate on integers as if they were represented using twos-complement +format, regardless of the internal representation used by a particular +Common Lisp implementation. + + (3) Common Lisp also provides functions for shifting and masking the +bits of integers in a way that may be more familiar to C and Java +programmers. For instance, you could write read-u2 yet a third way, +using those functions, like this: + + (defun read-u2 (in) + (logior (ash (read-byte in) 8) (read-byte in))) + + which would be roughly equivalent to this Java method: + + public int readU2 (InputStream in) throws IOException { + return (in.read() << 8) | (in.read()); + } + + The names LOGIOR and ASH are short for LOGical Inclusive OR and +Arithmetic SHift. ASH shifts an integer a given number of bits to the +left when its second argument is positive or to the right if the second +argument is negative. LOGIOR combines integers by logically oring each +bit. Another function, LOGAND, performs a bitwise and, which can be +used to mask off certain bits. However, for the kinds of bit twiddling +you'll need to do in this chapter and the next, LDB and BYTE will be +both more convenient and more idiomatic Common Lisp style. + + +File: pcl.info, Node: 24-3, Next: 24-4, Prev: 24-2, Up: Chapter 24 + +Strings in Binary Files +======================= + +Textual strings are another kind of primitive data type you'll find in +many binary formats. When you read files one byte at a time, you can't +read and write strings directly-you need to decode and encode them one +byte at a time, just as you do with binary-encoded numbers. And just as +you can encode an integer in several ways, you can encode a string in +many ways. To start with, the binary format must specify how individual +characters are encoded. + + To translate bytes to characters, you need to know both what +character code and what character encoding you're using. A character +code defines a mapping from positive integers to characters. Each +number in the mapping is called a code point. For instance, ASCII is a +character code that maps the numbers from 0-127 to particular characters +used in the Latin alphabet. A character encoding, on the other hand, +defines how the code points are represented as a sequence of bytes in a +byte-oriented medium such as a file. For codes that use eight or fewer +bits, such as ASCII and ISO-8859-1, the encoding is trivial-each numeric +value is encoded as a single byte. + + Nearly as straightforward are pure double-byte encodings, such as +UCS-2, which map between 16-bit values and characters. The only reason +double-byte encodings can be more complex than single-byte encodings is +that you may also need to know whether the 16-bit values are supposed to +be encoded in big-endian or little-endian format. + + Variable-width encodings use different numbers of octets for +different numeric values, making them more complex but allowing them to +be more compact in many cases. For instance, UTF-8, an encoding +designed for use with the Unicode character code, uses a single octet to +encode the values 0-127 while using up to four octets to encode values +up to 1,114,111. (1) + + Since the code points from 0-127 map to the same characters in +Unicode as they do in ASCII, a UTF-8 encoding of text consisting only of +characters also in ASCII is the same as the ASCII encoding. On the +other hand, texts consisting mostly of characters requiring four bytes +in UTF-8 could be more compactly encoded in a straight double-byte +encoding. + + Common Lisp provides two functions for translating between numeric +character codes and character objects: CODE-CHAR, which takes an numeric +code and returns as a character, and CHAR-CODE, which takes a character +and returns its numeric code. The language standard doesn't specify +what character encoding an implementation must use, so there's no +guarantee you can represent every character that can possibly be encoded +in a given file format as a Lisp character. However, almost all +contemporary Common Lisp implementations use ASCII, ISO-8859-1, or +Unicode as their native character code. Because Unicode is a superset +ofISO-8859-1, which is in turn a superset of ASCII, if you're using a +Unicode Lisp, CODE-CHAR and CHAR-CODE can be used directly for +translating any of those three character codes. (2) + + In addition to specifying a character encoding, a string encoding +must also specify how to encode the length of the string. Three +techniques are typically used in binary file formats. + + The simplest is to not encode it but to let it be implicit in the +position of the string in some larger structure: a particular element of +a file may always be a string of a certain length, or a string may be +the last element of a variable-length data structure whose overall size +determines how many bytes are left to read as string data. Both these +techniques are used in ID3 tags, as you'll see in the next chapter. + + The other two techniques can be used to encode variable-length +strings without relying on context. One is to encode the length of the +string followed by the character data-the parser reads an integer value +(in some specified integer format) and then reads that number of +characters. Another is to write the character data followed by a +delimiter that can't appear in the string such as a null character. + + The different representations have different advantages and +disadvantages, but when you're dealing with already specified binary +formats, you won't have any control over which encoding is used. +However, none of the encodings is particularly more difficult to read +and write than any other. Here, as an example, is a function that reads +a null-terminated ASCII string, assuming your Lisp implementation uses +ASCII or one of its supersets such as ISO-8859-1 or full Unicode as its +native character encoding: + + (defconstant +null+ (code-char 0)) + + (defun read-null-terminated-ascii (in) + (with-output-to-string (s) + (loop for char = (code-char (read-byte in)) + until (char= char +null+) do (write-char char s)))) + + The WITH-OUTPUT-TO-STRING macro, which I mentioned in Chapter 14, is +an easy way to build up a string when you don't know how long it'll be. +It creates a STRING-STREAM and binds it to the variable name specified, +s in this case. All characters written to the stream are collected into +a string, which is then returned as the value of the +WITH-OUTPUT-TO-STRING form. + + To write a string back out, you just need to translate the characters +back to numeric values that can be written with WRITE-BYTE and then +write the null terminator after the string contents. + + (defun write-null-terminated-ascii (string out) + (loop for char across string + do (write-byte (char-code char) out)) + (write-byte (char-code +null+) out)) + + As these examples show, the main intellectual challenge-such as it +is-of reading and writing primitive elements of binary files is +understanding how exactly to interpret the bytes that appear in a file +and to map them to Lisp data types. If a binary file format is well +specified, this should be a straightforward proposition. Actually +writing functions to read and write a particular encoding is, as they +say, a simple matter of programming. + + Now you can turn to the issue of reading and writing more complex +on-disk structures and how to map them to Lisp objects. + + ---------- Footnotes ---------- + + (1) Originally, UTF-8 was designed to represent a 31-bit character +code and used up to six bytes per code point. However, the maximum +Unicode code point is #x10ffff, so a UTF-8 encoding of Unicode requires +at most four bytes per code point. + + (2) If you need to parse a file format that uses other character +codes, or if you need to parse files containing arbitrary Unicode +strings using a non-Unicode-Common-Lisp implementation, you can always +represent such strings in memory as vectors of integer code points. +They won't be Lisp strings, so you won't be able to manipulate or +compare them with the string functions, but you'll still be able to do +anything with them that you can with arbitrary vectors. + + +File: pcl.info, Node: 24-4, Next: 24-5, Prev: 24-3, Up: Chapter 24 + +Composite Structures +==================== + +Since binary formats are usually used to represent data in a way that +makes it easy to map to in-memory data structures, it should come as no +surprise that composite on-disk structures are usually defined in ways +similar to the way programming languages define in-memory structures. +Usually a composite on-disk structure will consist of a number of named +parts, each of which is itself either a primitive type such as a number +or a string, another composite structure, or possibly a collection of +such values. + + For instance, an ID3 tag defined in the 2.2 version of the +specification consists of a header made up of a three-character +ISO-8859-1 string, which is always "ID3"; two one-byte unsigned integers +that specify the major version and revision of the specification; eight +bits worth of boolean flags; and four bytes that encode the size of the +tag in an encoding particular to the ID3 specification. Following the +header is a list of frames, each of which has its own internal +structure. After the frames are as many null bytes as are necessary to +pad the tag out to the size specified in the header. + + If you look at the world through the lens of object orientation, +composite structures look a lot like classes. For instance, you could +write a class to represent an ID3 tag. + + (defclass id3-tag () + ((identifier :initarg :identifier :accessor identifier) + (major-version :initarg :major-version :accessor major-version) + (revision :initarg :revision :accessor revision) + (flags :initarg :flags :accessor flags) + (size :initarg :size :accessor size) + (frames :initarg :frames :accessor frames))) + + An instance of this class would make a perfect repository to hold the +data needed to represent an ID3 tag. You could then write functions to +read and write instances of this class. For example, assuming the +existence of certain other functions for reading the appropriate +primitive data types, a read-id3-tag function might look like this: + + (defun read-id3-tag (in) + (let ((tag (make-instance 'id3-tag))) + (with-slots (identifier major-version revision flags size frames) tag + (setf identifier (read-iso-8859-1-string in :length 3)) + (setf major-version (read-u1 in)) + (setf revision (read-u1 in)) + (setf flags (read-u1 in)) + (setf size (read-id3-encoded-size in)) + (setf frames (read-id3-frames in :tag-size size))) + tag)) + + The write-id3-tag function would be structured similarly-you'd use +the appropriate write-* functions to write out the values stored in the +slots of the id3-tag object. + + It's not hard to see how you could write the appropriate classes to +represent all the composite data structures in a specification along +with read-foo and write-foo functions for each class and for necessary +primitive types. But it's also easy to tell that all the reading and +writing functions are going to be pretty similar, differing only in the +specifics of what types they read and the names of the slots they store +them in. It's particularly irksome when you consider that in the ID3 +specification it takes about four lines of text to specify the structure +of an ID3 tag, while you've already written eighteen lines of code and +haven't even written write-id3-tag yet. + + What you'd really like is a way to describe the structure of +something like an ID3 tag in a form that's as compressed as the +specification's pseudocode yet that can also be expanded into code that +defines the id3-tag class and the functions that translate between bytes +on disk and instances of the class. Sounds like a job for a macro. + + +File: pcl.info, Node: 24-5, Next: 24-6, Prev: 24-4, Up: Chapter 24 + +Designing the Macros +==================== + +Since you already have a rough idea what code your macros will need to +generate, the next step, according to the process for writing a macro I +outlined in Chapter 8, is to switch perspectives and think about what a +call to the macro should look like. Since the goal is to be able to +write something as compressed as the pseudocode in the ID3 +specification, you can start there. The header of an ID3 tag is +specified like this: + + ID3/file identifier "ID3" + ID3 version $02 00 + ID3 flags %xx000000 + ID3 size 4 * %0xxxxxxx + + In the notation of the specification, this means the "file +identifier" slot of an ID3 tag is the string "ID3" in ISO-8859-1 +encoding. The version consists of two bytes, the first of which-for +this version of the specification-has the value 2 and the second of +which-again for this version of the specification-is 0. The flags slot +is eight bits, of which all but the first two are 0, and the size +consists of four bytes, each of which has a 0 in the most significant +bit. + + Some information isn't captured by this pseudocode. For instance, +exactly how the four bytes that encode the size are to be interpreted is +described in a few lines of prose. Likewise, the spec describes in +prose how the frame and subsequent padding is stored after this header. +But most of what you need to know to be able to write code to read and +write an ID3 tag is specified by this pseudocode. Thus, you ought to be +able to write an s-expression version of this pseudocode and have it +expanded into the class and function definitions you'd otherwise have to +write by hand-something, perhaps, like this: + + (define-binary-class id3-tag + ((file-identifier (iso-8859-1-string :length 3)) + (major-version u1) + (revision u1) + (flags u1) + (size id3-tag-size) + (frames (id3-frames :tag-size size)))) + + The basic idea is that this form defines a class id3-tag similar to +the way you could with DEFCLASS, but instead of specifying things such +as :initarg and :accessors, each slot specification consists of the name +of the slot-file-identifier, major-version, and so on-and information +about how that slot is represented on disk. Since this is just a bit of +fantasizing, you don't have to worry about exactly how the macro +define-binary-class will know what to do with expressions such as +(iso-8859-1-string :length 3), u1, id3-tag-size, and (id3-frames +:tag-size size); as long as each expression contains the information +necessary to know how to read and write a particular data encoding, you +should be okay. + + +File: pcl.info, Node: 24-6, Next: 24-7, Prev: 24-5, Up: Chapter 24 + +Making the Dream a Reality +========================== + +Okay, enough fantasizing about good-looking code; now you need to get to +work writing define-binary-class-writing the code that will turn that +concise expression of what an ID3 tag looks like into code that can +represent one in memory, read one off disk, and write it back out. + + To start with, you should define a package for this library. Here's +the package file that comes with the version you can download from the +book's Web site: + + (in-package :cl-user) + + (defpackage :com.gigamonkeys.binary-data + (:use :common-lisp :com.gigamonkeys.macro-utilities) + (:export :define-binary-class + :define-tagged-binary-class + :define-binary-type + :read-value + :write-value + :*in-progress-objects* + :parent-of-type + :current-binary-object + :+null+)) + + The COM.GIGAMONKEYS.MACRO-UTILITIES package contains the with-gensyms +and once-only macros from Chapter 8. + + Since you already have a handwritten version of the code you want to +generate, it shouldn't be too hard to write such a macro. Just take it +in small pieces, starting with a version of define-binary-class that +generates just the DEFCLASS form. + + If you look back at the define-binary-class form, you'll see that it +takes two arguments, the name id3-tag and a list of slot specifiers, +each of which is itself a two-item list. From those pieces you need to +build the appropriate DEFCLASS form. Clearly, the biggest difference +between the define-binary-class form and a proper DEFCLASS form is in +the slot specifiers. A single slot specifier from define-binary-class +looks something like this: + + (major-version u1) + + But that's not a legal slot specifier for a DEFCLASS. Instead, you +need something like this: + + (major-version :initarg :major-version :accessor major-version) + + Easy enough. First define a simple function to translate a symbol to +the corresponding keyword symbol. + + (defun as-keyword (sym) (intern (string sym) :keyword)) + + Now define a function that takes a define-binary-class slot specifier +and returns a DEFCLASS slot specifier. + + (defun slot->defclass-slot (spec) + (let ((name (first spec))) + `(,name :initarg ,(as-keyword name) :accessor ,name))) + + You can test this function at the REPL after switching to your new +package with a call to IN-PACKAGE. + + BINARY-DATA> (slot->defclass-slot '(major-version u1)) + (MAJOR-VERSION :INITARG :MAJOR-VERSION :ACCESSOR MAJOR-VERSION) + + Looks good. Now the first version of define-binary-class is trivial. + + (defmacro define-binary-class (name slots) + `(defclass ,name () + ,(mapcar #'slot->defclass-slot slots))) + + This is simple template-style macro-define-binary-class generates a +DEFCLASS form by interpolating the name of the class and a list of slot +specifiers constructed by applying slot->defclass-slot to each element +of the list of slots specifiers from the define-binary-class form. + + To see exactly what code this macro generates, you can evaluate this +expression at the REPL. + + (macroexpand-1 '(define-binary-class id3-tag + ((identifier (iso-8859-1-string :length 3)) + (major-version u1) + (revision u1) + (flags u1) + (size id3-tag-size) + (frames (id3-frames :tag-size size))))) + + The result, slightly reformatted here for better readability, should +look familiar since it's exactly the class definition you wrote by hand +earlier: + + (defclass id3-tag () + ((identifier :initarg :identifier :accessor identifier) + (major-version :initarg :major-version :accessor major-version) + (revision :initarg :revision :accessor revision) + (flags :initarg :flags :accessor flags) + (size :initarg :size :accessor size) + (frames :initarg :frames :accessor frames))) + + +File: pcl.info, Node: 24-7, Next: 24-8, Prev: 24-6, Up: Chapter 24 + +Reading Binary Objects +====================== + +Next you need to make define-binary-class also generate a function that +can read an instance of the new class. Looking back at the read-id3-tag +function you wrote before, this seems a bit trickier, as the +read-id3-tag wasn't quite so regular-to read each slot's value, you had +to call a different function. Not to mention, the name of the function, +read-id3-tag, while derived from the name of the class you're defining, +isn't one of the arguments to define-binary-class and thus isn't +available to be interpolated into a template the way the class name was. + + You could deal with both of those problems by devising and following +a naming convention so the macro can figure out the name of the function +to call based on the name of the type in the slot specifier. However, +this would require define-binary-class to generate the name +read-id3-tag, which is possible but a bad idea. Macros that create +global definitions should generally use only names passed to them by +their callers; macros that generate names under the covers can cause +hard-to-predict-and hard-to-debug-name conflicts when the generated +names happen to be the same as names used elsewhere. (1) + + You can avoid both these inconveniences by noticing that all the +functions that read a particular type of value have the same fundamental +purpose, to read a value of a specific type from a stream. Speaking +colloquially, you might say they're all instances of a single generic +operation. And the colloquial use of the word generic should lead you +directly to the solution to your problem: instead of defining a bunch of +independent functions, all with different names, you can define a single +generic function, read-value, with methods specialized to read different +types of values. + + That is, instead of defining functions read-iso-8859-1-string and +read-u1, you can define read-value as a generic function taking two +required arguments, a type and a stream, and possibly some keyword +arguments. + + (defgeneric read-value (type stream &key) + (:documentation "Read a value of the given type from the stream.")) + + By specifying &key without any actual keyword parameters, you allow +different methods to define their own &key parameters without requiring +them to do so. This does mean every method specialized on read-value +will have to include either &key or an &rest parameter in its parameter +list to be compatible with the generic function. + + Then you'll define methods that use EQL specializers to specialize +the type argument on the name of the type you want to read. + + (defmethod read-value ((type (eql 'iso-8859-1-string)) in &key length) ...) + + (defmethod read-value ((type (eql 'u1)) in &key) ...) + + Then you can make define-binary-class generate a read-value method +specialized on the type name id3-tag, and that method can be implemented +in terms of calls to read-value with the appropriate slot types as the +first argument. The code you want to generate is going to look like +this: + + (defmethod read-value ((type (eql 'id3-tag)) in &key) + (let ((object (make-instance 'id3-tag))) + (with-slots (identifier major-version revision flags size frames) object + (setf identifier (read-value 'iso-8859-1-string in :length 3)) + (setf major-version (read-value 'u1 in)) + (setf revision (read-value 'u1 in)) + (setf flags (read-value 'u1 in)) + (setf size (read-value 'id3-encoded-size in)) + (setf frames (read-value 'id3-frames in :tag-size size))) + object)) + + So, just as you needed a function to translate a define-binary-class +slot specifier to a DEFCLASS slot specifier in order to generate the +DEFCLASS form, now you need a function that takes a define-binary-class +slot specifier and generates the appropriate SETF form, that is, +something that takes this: + + (identifier (iso-8859-1-string :length 3)) + + and returns this: + + (setf identifier (read-value 'iso-8859-1-string in :length 3)) + + However, there's a difference between this code and the DEFCLASS slot +specifier: it includes a reference to a variable in-the method parameter +from the read-value method-that wasn't derived from the slot specifier. +It doesn't have to be called in, but whatever name you use has to be the +same as the one used in the method's parameter list and in the other +calls to read-value. For now you can dodge the issue of where that name +comes from by defining slot->read-value to take a second argument of the +name of the stream variable. + + (defun slot->read-value (spec stream) + (destructuring-bind (name (type &rest args)) (normalize-slot-spec spec) + `(setf ,name (read-value ',type ,stream ,@args)))) + + The function normalize-slot-spec normalizes the second element of the +slot specifier, converting a symbol like u1 to the list (u1) so the +DESTRUCTURING-BIND can parse it. It looks like this: + + (defun normalize-slot-spec (spec) + (list (first spec) (mklist (second spec)))) + + (defun mklist (x) (if (listp x) x (list x))) + + You can test slot->read-value with each type of slot specifier. + + BINARY-DATA> (slot->read-value '(major-version u1) 'stream) + (SETF MAJOR-VERSION (READ-VALUE 'U1 STREAM)) + BINARY-DATA> (slot->read-value '(identifier (iso-8859-1-string :length 3)) 'stream) + (SETF IDENTIFIER (READ-VALUE 'ISO-8859-1-STRING STREAM :LENGTH 3)) + + With these functions you're ready to add read-value to +define-binary-class. If you take the handwritten read-value method and +strip out anything that's tied to a particular class, you're left with +this skeleton: + + (defmethod read-value ((type (eql ...)) stream &key) + (let ((object (make-instance ...))) + (with-slots (...) object + ... + object))) + + All you need to do is add this skeleton to the define-binary-class +template, replacing ellipses with code that fills in the skeleton with +the appropriate names and code. You'll also want to replace the +variables type, stream, and object with gensymed names to avoid +potential conflicts with slot names, (2) which you can do with the +with-gensyms macro from Chapter 8. + + Also, because a macro must expand into a single form, you need to +wrap some form around the DEFCLASS and DEFMETHOD. PROGN is the customary +form to use for macros that expand into multiple definitions because of +the special treatment it gets from the file compiler when appearing at +the top level of a file, as I discussed in Chapter 20. + + So, you can change define-binary-class as follows: + + (defmacro define-binary-class (name slots) + (with-gensyms (typevar objectvar streamvar) + `(progn + (defclass ,name () + ,(mapcar #'slot->defclass-slot slots)) + + (defmethod read-value ((,typevar (eql ',name)) ,streamvar &key) + (let ((,objectvar (make-instance ',name))) + (with-slots ,(mapcar #'first slots) ,objectvar + ,@(mapcar #'(lambda (x) (slot->read-value x streamvar)) slots)) + ,objectvar))))) + + ---------- Footnotes ---------- + + (1) Unfortunately, the language itself doesn't always provide a good +model in this respect: the macro DEFSTRUCT, which I don't discuss since +it has largely been superseded by DEFCLASS, generates functions with +names that it generates based on the name of the structure it's given. +DEFSTRUCT's bad example leads many new macro writers astray. + + (2) Technically there's no possibility of type or object conflicting +with slot names-at worst they'd be shadowed within the WITH-SLOTS form. +But it doesn't hurt anything to simply GENSYM all local variable names +used within a macro template. + + +File: pcl.info, Node: 24-8, Next: 24-9, Prev: 24-7, Up: Chapter 24 + +Writing Binary Objects +====================== + +Generating code to write out an instance of a binary class will proceed +similarly. First you can define a write-value generic function. + + (defgeneric write-value (type stream value &key) + (:documentation "Write a value as the given type to the stream.")) + + Then you define a helper function that translates a +define-binary-class slot specifier into code that writes out the slot +using write-value. As with the slot->read-value function, this helper +function needs to take the name of the stream variable as an argument. + + (defun slot->write-value (spec stream) + (destructuring-bind (name (type &rest args)) (normalize-slot-spec spec) + `(write-value ',type ,stream ,name ,@args))) + + Now you can add a write-value template to the define-binary-class +macro. + + (defmacro define-binary-class (name slots) + (with-gensyms (typevar objectvar streamvar) + `(progn + (defclass ,name () + ,(mapcar #'slot->defclass-slot slots)) + + (defmethod read-value ((,typevar (eql ',name)) ,streamvar &key) + (let ((,objectvar (make-instance ',name))) + (with-slots ,(mapcar #'first slots) ,objectvar + ,@(mapcar #'(lambda (x) (slot->read-value x streamvar)) slots)) + ,objectvar)) + + (defmethod write-value ((,typevar (eql ',name)) ,streamvar ,objectvar &key) + (with-slots ,(mapcar #'first slots) ,objectvar + ,@(mapcar #'(lambda (x) (slot->write-value x streamvar)) slots)))))) + + +File: pcl.info, Node: 24-9, Next: 24-10, Prev: 24-8, Up: Chapter 24 + +Adding Inheritance and Tagged Structures +======================================== + +While this version of define-binary-class will handle stand-alone +structures, binary file formats often define on-disk structures that +would be natural to model with subclasses and superclasses. So you +might want to extend define-binary-class to support inheritance. + + A related technique used in many binary formats is to have several +on-disk structures whose exact type can be determined only by reading +some data that indicates how to parse the following bytes. For +instance, the frames that make up the bulk of an ID3 tag all share a +common header structure consisting of a string identifier and a length. +To read a frame, you need to read the identifier and use its value to +determine what kind of frame you're looking at and thus how to parse the +body of the frame. + + The current define-binary-class macro has no way to handle this kind +of reading-you could use define-binary-class to define a class to +represent each kind of frame, but you'd have no way to know what type of +frame to read without reading at least the identifier. And if other +code reads the identifier in order to determine what type to pass to +read-value, then that will break read-value since it's expecting to read +all the data that makes up the instance of the class it instantiates. + + You can solve this problem by adding inheritance to +define-binary-class and then writing another macro, +define-tagged-binary-class, for defining "abstract" classes that aren't +instantiated directly but that can be specialized on by read-value +methods that know how to read enough data to determine what kind of +class to create. + + The first step to adding inheritance to define-binary-class is to add +a parameter to the macro to accept a list of superclasses. + + (defmacro define-binary-class (name (&rest superclasses) slots) ... + + Then, in the DEFCLASS template, interpolate that value instead of the +empty list. + + (defclass ,name ,superclasses + ...) + + However, there's a bit more to it than that. You also need to change +the read-value and write-value methods so the methods generated when +defining a superclass can be used by the methods generated as part of a +subclass to read and write inherited slots. + + The current way read-value works is particularly problematic since it +instantiates the object before filling it in-obviously, you can't have +the method responsible for reading the superclass's fields instantiate +one object while the subclass's method instantiates and fills in a +different object. + + You can fix that problem by splitting read-value into two parts-one +responsible for instantiating the correct kind of object and another +responsible for filling slots in an existing object. On the writing +side it's a bit simpler, but you can use the same technique. + + So you'll define two new generic functions, read-object and +write-object, that will both take an existing object and a stream. +Methods on these generic functions will be responsible for reading or +writing the slots specific to the class of the object on which they're +specialized. + + (defgeneric read-object (object stream) + (:method-combination progn :most-specific-last) + (:documentation "Fill in the slots of object from stream.")) + + (defgeneric write-object (object stream) + (:method-combination progn :most-specific-last) + (:documentation "Write out the slots of object to the stream.")) + + Defining these generic functions to use the PROGN method combination +with the option :most-specific-last allows you to define methods that +specialize object on each binary class and have them deal only with the +slots actually defined in that class; the PROGN method combination will +combine all the applicable methods so the method specialized on the +least specific class in the hierarchy runs first, reading or writing the +slots defined in that class, then the method specialized on next least +specific subclass, and so on. And since all the heavy lifting for a +specific class is now going to be done by read-object and write-object, +you don't even need to define specialized read-value and write-value +methods; you can define default methods that assume the type argument is +the name of a binary class. + + (defmethod read-value ((type symbol) stream &key) + (let ((object (make-instance type))) + (read-object object stream) + object)) + + (defmethod write-value ((type symbol) stream value &key) + (assert (typep value type)) + (write-object value stream)) + + Note how you can use MAKE-INSTANCE as a generic object factory-while +you normally call MAKE-INSTANCE with a quoted symbol as the first +argument because you normally know exactly what class you want to +instantiate, you can use any expression that evaluates to a class name +such as, in this case, the type parameter in the read-value method. + + The actual changes to define-binary-class to define methods on +read-object and write-object rather than read-value and write-value are +fairly minor. + + (defmacro define-binary-class (name superclasses slots) + (with-gensyms (objectvar streamvar) + `(progn + (defclass ,name ,superclasses + ,(mapcar #'slot->defclass-slot slots)) + + (defmethod read-object progn ((,objectvar ,name) ,streamvar) + (with-slots ,(mapcar #'first slots) ,objectvar + ,@(mapcar #'(lambda (x) (slot->read-value x streamvar)) slots))) + + (defmethod write-object progn ((,objectvar ,name) ,streamvar) + (with-slots ,(mapcar #'first slots) ,objectvar + ,@(mapcar #'(lambda (x) (slot->write-value x streamvar)) slots)))))) + + +File: pcl.info, Node: 24-10, Next: 24-11, Prev: 24-9, Up: Chapter 24 + +Keeping Track of Inherited Slots +================================ + +This definition will work for many purposes. However, it doesn't handle +one fairly common situation, namely, when you have a subclass that needs +to refer to inherited slots in its own slot specifications. For +instance, with the current definition of define-binary-class, you can +define a single class like this: + + (define-binary-class generic-frame () + ((id (iso-8859-1-string :length 3)) + (size u3) + (data (raw-bytes :bytes size)))) + + The reference to size in the specification of data works the way +you'd expect because the expressions that read and write the data slot +are wrapped in a WITH-SLOTS that lists all the object's slots. However, +if you try to split that class into two classes like this: + + (define-binary-class frame () + ((id (iso-8859-1-string :length 3)) + (size u3))) + + (define-binary-class generic-frame (frame) + ((data (raw-bytes :bytes size)))) + + you'll get a compile-time warning when you compile the generic-frame +definition and a runtime error when you try to use it because there will +be no lexically apparent variable size in the read-object and +write-object methods specialized on generic-frame. + + What you need to do is keep track of the slots defined by each binary +class and then include inherited slots in the WITH-SLOTS forms in the +read-object and write-object methods. + + The easiest way to keep track of information like this is to hang it +off the symbol that names the class. As I discussed in Chapter 21, +every symbol object has an associated property list, which can be +accessed via the functions SYMBOL-PLIST and GET. You can associate +arbitrary key/value pairs with a symbol by adding them to its property +list with SETF of GET. For instance, if the binary class foo defines +three slots-x, y, and z-you can keep track of that fact by adding a +slots key to the symbol foo's property list with the value (x y z) with +this expression: + + (setf (get 'foo 'slots) '(x y z)) + + You want this bookkeeping to happen as part of evaluating the +define-binary-class of foo. However, it's not clear where to put the +expression. If you evaluate it when you compute the macro's expansion, +it'll get evaluated when you compile the define-binary-class form but +not if you later load a file that contains the resulting compiled code. +On the other hand, if you include the expression in the expansion, then +it won't be evaluated during compilation, which means if you compile a +file with several define-binary-class forms, none of the information +about what classes define what slots will be available until the whole +file is loaded, which is too late. + + This is what the special operator EVAL-WHEN I discussed in Chapter 20 +is for. By wrapping a form in an EVAL-WHEN, you can control whether +it's evaluated at compile time, when the compiled code is loaded, or +both. For cases like this where you want to squirrel away some +information during the compilation of a macro form that you also want to +be available after the compiled form is loaded, you should wrap it in an +EVAL-WHEN like this: + + (eval-when (:compile-toplevel :load-toplevel :execute) + (setf (get 'foo 'slots) '(x y z))) + + and include the EVAL-WHEN in the expansion generated by the macro. +Thus, you can save both the slots and the direct superclasses of a +binary class by adding this form to the expansion generated by +define-binary-class: + + (eval-when (:compile-toplevel :load-toplevel :execute) + (setf (get ',name 'slots) ',(mapcar #'first slots)) + (setf (get ',name 'superclasses) ',superclasses)) + + Now you can define three helper functions for accessing this +information. The first simply returns the slots directly defined by a +binary class. It's a good idea to return a copy of the list since you +don't want other code to modify the list of slots after the binary class +has been defined. + + (defun direct-slots (name) + (copy-list (get name 'slots))) + + The next function returns the slots inherited from other binary +classes. + + (defun inherited-slots (name) + (loop for super in (get name 'superclasses) + nconc (direct-slots super) + nconc (inherited-slots super))) + + Finally, you can define a function that returns a list containing the +names of all directly defined and inherited slots. + + (defun all-slots (name) + (nconc (direct-slots name) (inherited-slots name))) + + When you're computing the expansion of a define-generic-binary-class +form, you want to generate a WITH-SLOTS form that contains the names of +all the slots defined in the new class and all its superclasses. +However, you can't use all-slots while you're generating the expansion +since the information won't be available until after the expansion is +compiled. Instead, you should use the following function, which takes +the list of slot specifiers and superclasses passed to +define-generic-binary-class and uses them to compute the list of all the +new class's slots: + + (defun new-class-all-slots (slots superclasses) + (nconc (mapcan #'all-slots superclasses) (mapcar #'first slots))) + + With these functions defined, you can change define-binary-class to +store the information about the class currently being defined and to use +the already stored information about the superclasses' slots to generate +the WITH-SLOTS forms you want like this: + + (defmacro define-binary-class (name (&rest superclasses) slots) + (with-gensyms (objectvar streamvar) + `(progn + (eval-when (:compile-toplevel :load-toplevel :execute) + (setf (get ',name 'slots) ',(mapcar #'first slots)) + (setf (get ',name 'superclasses) ',superclasses)) + + (defclass ,name ,superclasses + ,(mapcar #'slot->defclass-slot slots)) + + (defmethod read-object progn ((,objectvar ,name) ,streamvar) + (with-slots ,(new-class-all-slots slots superclasses) ,objectvar + ,@(mapcar #'(lambda (x) (slot->read-value x streamvar)) slots))) + + (defmethod write-object progn ((,objectvar ,name) ,streamvar) + (with-slots ,(new-class-all-slots slots superclasses) ,objectvar + ,@(mapcar #'(lambda (x) (slot->write-value x streamvar)) slots)))))) + + +File: pcl.info, Node: 24-11, Next: 24-12, Prev: 24-10, Up: Chapter 24 + +Tagged Structures +================= + +With the ability to define binary classes that extend other binary +classes, you're ready to define a new macro for defining classes to +represent "tagged" structures. The strategy for reading tagged +structures will be to define a specialized read-value method that knows +how to read the values that make up the start of the structure and then +use those values to determine what subclass to instantiate. It'll then +make an instance of that class with MAKE-INSTANCE, passing the already +read values as initargs, and pass the object to read-object, allowing +the actual class of the object to determine how the rest of the +structure is read. + + The new macro, define-tagged-binary-class, will look like +define-binary-class with the addition of a :dispatch option used to +specify a form that should evaluate to the name of a binary class. The +:dispatch form will be evaluated in a context where the names of the +slots defined by the tagged class are bound to variables that hold the +values read from the file. The class whose name it returns must accept +initargs corresponding to the slot names defined by the tagged class. +This is easily ensured if the :dispatch form always evaluates to the +name of a class that subclasses the tagged class. + + For instance, supposing you have a function, find-frame-class, that +will map a string identifier to a binary class representing a particular +kind of ID3 frame, you might define a tagged binary class, id3-frame, +like this: + + (define-tagged-binary-class id3-frame () + ((id (iso-8859-1-string :length 3)) + (size u3)) + (:dispatch (find-frame-class id))) + + The expansion of a define-tagged-binary-class will contain a DEFCLASS +and a write-object method just like the expansion of +define-binary-class, but instead of a read-object method it'll contain a +read-value method that looks like this: + + (defmethod read-value ((type (eql 'id3-frame)) stream &key) + (let ((id (read-value 'iso-8859-1-string stream :length 3)) + (size (read-value 'u3 stream))) + (let ((object (make-instance (find-frame-class id) :id id :size size))) + (read-object object stream) + object))) + + Since the expansions of define-tagged-binary-class and +define-binary-class are going to be identical except for the read +method, you can factor out the common bits into a helper macro, +define-generic-binary-class, that accepts the read method as a parameter +and interpolates it. + + (defmacro define-generic-binary-class (name (&rest superclasses) slots read-method) + (with-gensyms (objectvar streamvar) + `(progn + (eval-when (:compile-toplevel :load-toplevel :execute) + (setf (get ',name 'slots) ',(mapcar #'first slots)) + (setf (get ',name 'superclasses) ',superclasses)) + + (defclass ,name ,superclasses + ,(mapcar #'slot->defclass-slot slots)) + + ,read-method + + (defmethod write-object progn ((,objectvar ,name) ,streamvar) + (declare (ignorable ,streamvar)) + (with-slots ,(new-class-all-slots slots superclasses) ,objectvar + ,@(mapcar #'(lambda (x) (slot->write-value x streamvar)) slots)))))) + + Now you can define both define-binary-class and +define-tagged-binary-class to expand into a call to +define-generic-binary-class. Here's a new version of +define-binary-class that generates the same code as the earlier version +when it's fully expanded: + + (defmacro define-binary-class (name (&rest superclasses) slots) + (with-gensyms (objectvar streamvar) + `(define-generic-binary-class ,name ,superclasses ,slots + (defmethod read-object progn ((,objectvar ,name) ,streamvar) + (declare (ignorable ,streamvar)) + (with-slots ,(new-class-all-slots slots superclasses) ,objectvar + ,@(mapcar #'(lambda (x) (slot->read-value x streamvar)) slots)))))) + + And here's define-tagged-binary-class along with two new helper +functions it uses: + + (defmacro define-tagged-binary-class (name (&rest superclasses) slots &rest options) + (with-gensyms (typevar objectvar streamvar) + `(define-generic-binary-class ,name ,superclasses ,slots + (defmethod read-value ((,typevar (eql ',name)) ,streamvar &key) + (let* ,(mapcar #'(lambda (x) (slot->binding x streamvar)) slots) + (let ((,objectvar + (make-instance + ,@(or (cdr (assoc :dispatch options)) + (error "Must supply :dispatch form.")) + ,@(mapcan #'slot->keyword-arg slots)))) + (read-object ,objectvar ,streamvar) + ,objectvar)))))) + + (defun slot->binding (spec stream) + (destructuring-bind (name (type &rest args)) (normalize-slot-spec spec) + `(,name (read-value ',type ,stream ,@args)))) + + (defun slot->keyword-arg (spec) + (let ((name (first spec))) + `(,(as-keyword name) ,name))) + + +File: pcl.info, Node: 24-12, Next: 24-13, Prev: 24-11, Up: Chapter 24 + +Primitive Binary Types +====================== + +While define-binary-class and define-tagged-binary-class make it easy to +define composite structures, you still have to write read-value and +write-value methods for primitive data types by hand. You could decide +to live with that, specifying that users of the library need to write +appropriate methods on read-value and write-value to support the +primitive types used by their binary classes. + + However, rather than having to document how to write a suitable +read-value/write-value pair, you can provide a macro to do it +automatically. This also has the advantage of making the abstraction +created by define-binary-class less leaky. Currently, +define-binary-class depends on having methods on read-value and +write-value defined in a particular way, but that's really just an +implementation detail. By defining a macro that generates the +read-value and write-value methods for primitive types, you hide those +details behind an abstraction you control. If you decide later to +change the implementation of define-binary-class, you can change your +primitive-type-defining macro to meet the new requirements without +requiring any changes to code that uses the binary data library. + + So you should define one last macro, define-binary-type, that will +generate read-value and write-value methods for reading values +represented by instances of existing classes, rather than by classes +defined with define-binary-class. + + For a concrete example, consider a type used in the id3-tag class, a +fixed-length string encoded in ISO-8859-1 characters. I'll assume, as I +did earlier, that the native character encoding of your Lisp is +ISO-8859-1 or a superset, so you can use CODE-CHAR and CHAR-CODE to +translate bytes to characters and back. + + As always, your goal is to write a macro that allows you to express +only the essential information needed to generate the required code. In +this case, there are four pieces of essential information: the name of +the type, iso-8859-1-string; the &key parameters that should be accepted +by the read-value and write-value methods, length in this case; the code +for reading from a stream; and the code for writing to a stream. Here's +an expression that contains those four pieces of information: + + (define-binary-type iso-8859-1-string (length) + (:reader (in) + (let ((string (make-string length))) + (dotimes (i length) + (setf (char string i) (code-char (read-byte in)))) + string)) + (:writer (out string) + (dotimes (i length) + (write-byte (char-code (char string i)) out)))) + + Now you just need a macro that can take apart this form and put it +back together in the form of two DEFMETHODs wrapped in a PROGN. If you +define the parameter list to define-binary-type like this: + + (defmacro define-binary-type (name (&rest args) &body spec) ... + + then within the macro the parameter spec will be a list containing +the reader and writer definitions. You can then use ASSOC to extract +the elements of spec using the tags :reader and :writer and then use +DESTRUCTURING-BIND to take apart the REST of each element. (1) + + From there it's just a matter of interpolating the extracted values +into the backquoted templates of the read-value and write-value methods. + + (defmacro define-binary-type (name (&rest args) &body spec) + (with-gensyms (type) + `(progn + ,(destructuring-bind ((in) &body body) (rest (assoc :reader spec)) + `(defmethod read-value ((,type (eql ',name)) ,in &key ,@args) + ,@body)) + ,(destructuring-bind ((out value) &body body) (rest (assoc :writer spec)) + `(defmethod write-value ((,type (eql ',name)) ,out ,value &key ,@args) + ,@body))))) + + Note how the backquoted templates are nested: the outermost template +starts with the backquoted PROGN form. That template consists of the +symbol PROGN and two comma-unquoted DESTRUCTURING-BIND expressions. +Thus, the outer template is filled in by evaluating the +DESTRUCTURING-BIND expressions and interpolating their values. Each +DESTRUCTURING-BIND expression in turn contains another backquoted +template, which is used to generate one of the method definitions to be +interpolated in the outer template. + + With this macro defined, the define-binary-type form given previously +expands to this code: + + (progn + (defmethod read-value ((#:g1618 (eql 'iso-8859-1-string)) in &key length) + (let ((string (make-string length))) + (dotimes (i length) + (setf (char string i) (code-char (read-byte in)))) + string)) + (defmethod write-value ((#:g1618 (eql 'iso-8859-1-string)) out string &key length) + (dotimes (i length) + (write-byte (char-code (char string i)) out)))) + + Of course, now that you've got this nice macro for defining binary +types, it's tempting to make it do a bit more work. For now you should +just make one small enhancement that will turn out to be pretty handy +when you start using this library to deal with actual formats such as +ID3 tags. + + ID3 tags, like many other binary formats, use lots of primitive types +that are minor variations on a theme, such as unsigned integers in one-, +two-, three-, and four-byte varieties. You could certainly define each +of those types with define-binary-type as it stands. Or you could +factor out the common algorithm for reading and writing n-byte unsigned +integers into helper functions. + + But suppose you had already defined a binary type, unsigned-integer, +that accepts a :bytes parameter to specify how many bytes to read and +write. Using that type, you could specify a slot representing a +one-byte unsigned integer with a type specifier of (unsigned-integer +:bytes 1). But if a particular binary format specifies lots of slots of +that type, it'd be nice to be able to easily define a new type-say, +u1-that means the same thing. As it turns out, it's easy to change +define-binary-type to support two forms, a long form consisting of a +:reader and :writer pair and a short form that defines a new binary type +in terms of an existing type. Using a short form define-binary-type, +you can define u1 like this: + + (define-binary-type u1 () (unsigned-integer :bytes 1)) + + which will expand to this: + + (progn + (defmethod read-value ((#:g161887 (eql 'u1)) #:g161888 &key) + (read-value 'unsigned-integer #:g161888 :bytes 1)) + (defmethod write-value ((#:g161887 (eql 'u1)) #:g161888 #:g161889 &key) + (write-value 'unsigned-integer #:g161888 #:g161889 :bytes 1))) + + To support both long- and short-form define-binary-type calls, you +need to differentiate based on the value of the spec argument. If spec +is two items long, it represents a long-form call, and the two items +should be the :reader and :writer specifications, which you extract as +before. On the other hand, if it's only one item long, the one item +should be a type specifier, which needs to be parsed differently. You +can use ECASE to switch on the LENGTH of spec and then parse spec and +generate an appropriate expansion for either the long form or the short +form. + + (defmacro define-binary-type (name (&rest args) &body spec) + (ecase (length spec) + (1 + (with-gensyms (type stream value) + (destructuring-bind (derived-from &rest derived-args) (mklist (first spec)) + `(progn + (defmethod read-value ((,type (eql ',name)) ,stream &key ,@args) + (read-value ',derived-from ,stream ,@derived-args)) + (defmethod write-value ((,type (eql ',name)) ,stream ,value &key ,@args) + (write-value ',derived-from ,stream ,value ,@derived-args)))))) + (2 + (with-gensyms (type) + `(progn + ,(destructuring-bind ((in) &body body) (rest (assoc :reader spec)) + `(defmethod read-value ((,type (eql ',name)) ,in &key ,@args) + ,@body)) + ,(destructuring-bind ((out value) &body body) (rest (assoc :writer spec)) + `(defmethod write-value ((,type (eql ',name)) ,out ,value &key ,@args) + ,@body))))))) + + ---------- Footnotes ---------- + + (1) Using ASSOC to extract the :reader and :writer elements of spec +allows users of define-binary-type to include the elements in either +order; if you required the :reader element to be always be first, you +could then have used (rest (first spec)) to extract the reader and (rest +(second spec)) to extract the writer. However, as long as you require +the :reader and :writer keywords to improve the readability of +define-binary-type forms, you might as well use them to extract the +correct data. + + +File: pcl.info, Node: 24-13, Next: Chapter 25, Prev: 24-12, Up: Chapter 24 + +The Current Object Stack +======================== + +One last bit of functionality you'll need in the next chapter is a way +to get at the binary object being read or written while reading and +writing. More generally, when reading or writing nested composite +objects, it's useful to be able to get at any of the objects currently +being read or written. Thanks to dynamic variables and :around methods, +you can add this enhancement with about a dozen lines of code. To +start, you should define a dynamic variable that will hold a stack of +objects currently being read or written. + + (defvar *in-progress-objects* nil) + + Then you can define :around methods on read-object and write-object +that push the object being read or written onto this variable before +invoking CALL-NEXT-METHOD. + + (defmethod read-object :around (object stream) + (declare (ignore stream)) + (let ((*in-progress-objects* (cons object *in-progress-objects*))) + (call-next-method))) + + (defmethod write-object :around (object stream) + (declare (ignore stream)) + (let ((*in-progress-objects* (cons object *in-progress-objects*))) + (call-next-method))) + + Note how you rebind *in-progress-objects* to a list with a new item +on the front rather than assigning it a new value. This way, at the end +of the LET, after CALL-NEXT-METHOD returns, the old value of +*in-progress-objects* will be restored, effectively popping the object +of the stack. + + With those two methods defined, you can provide two convenience +functions for getting at specific objects in the in-progress stack. The +function current-binary-object will return the head of the stack, the +object whose read-object or write-object method was invoked most +recently. The other, parent-of-type, takes an argument that should be +the name of a binary object class and returns the most recently pushed +object of that type, using the TYPEP function that tests whether a given +object is an instance of a particular type. + + (defun current-binary-object () (first *in-progress-objects*)) + + (defun parent-of-type (type) + (find-if #'(lambda (x) (typep x type)) *in-progress-objects*)) + + These two functions can be used in any code that will be called +within the dynamic extent of a read-object or write-object call. You'll +see one example of how current-binary-object can be used in the next +chapter. (1) + + Now you have all the tools you need to tackle an ID3 parsing library, +so you're ready to move onto the next chapter where you'll do just that. + + ---------- Footnotes ---------- + + (1) The ID3 format doesn't require the parent-of-type function since +it's a relatively flat structure. This function comes into its own when +you need to parse a format made up of many deeply nested structures +whose parsing depends on information stored in higher-level structures. +For example, in the Java class file format, the top-level class file +structure contains a constant pool that maps numeric values used in +other substructures within the class file to constant values that are +needed while parsing those substructures. If you were writing a class +file parser, you could use parent-of-type in the code that reads and +writes those substructures to get at the top-level class file object and +from there to the constant pool. + + +File: pcl.info, Node: Chapter 25, Next: Chapter 26, Prev: Chapter 24, Up: Top + +25. Practical: An ID3 Parser +============================ + +With a library for parsing binary data, you're ready to write some code +for reading and writing an actual binary format, that of ID3 tags. ID3 +tags are used to embed metadata in MP3 audio files. Dealing with ID3 +tags will be a good test of the binary data library because the ID3 +format is a true real-world format-a mix of engineering trade-offs and +idiosyncratic design choices that does, whatever else might be said +about it, get the job done. In case you missed the file-sharing +revolution, here's a quick overview of what ID3 tags are and how they +relate to MP3 files. + + MP3, also known as MPEG Audio Layer 3, is a format for storing +compressed audio data, designed by researchers at Fraunhofer IIS and +standardized by the Moving Picture Experts Group, a joint committee of +the International Organization for Standardization (ISO) and the +International Electrotechnical Commission (IEC). However, the MP3 +format, by itself, defines only how to store audio data. That's fine as +long as all your MP3 files are managed by a single application that can +store metadata externally and keep track of which metadata goes with +which files. However, when people started passing around individual MP3 +files on the Internet, via file-sharing systems such as Napster, they +soon discovered they needed a way to embed metadata in the MP3 files +themselves. + + Because the MP3 standard was already codified and a fair bit of +software and hardware had already been written that knew how to decode +the existing MP3 format, any scheme for embedding information in an MP3 +file would have to be invisible to MP3 decoders. Enter ID3. + + The original ID3 format, invented by programmer Eric Kemp, consisted +of 128 bytes stuck on the end of an MP3 file where it'd be ignored by +most MP3 software. It consisted of four 30-character fields, one each +for the song title, the album title, the artist name, and a comment; a +four-byte year field; and a one-byte genre code. Kemp provided standard +meanings for the first 80 genre codes. Nullsoft, the makers of Winamp, +a popular MP3 player, later supplemented this list with another 60 or so +genres. + + This format was easy to parse but obviously quite limited. It had no +way to encode names longer than 30 characters; it was limited to 256 +genres, and the meaning of the genre codes had to be agreed upon by all +users of ID3-aware software. There wasn't even a way to encode the CD +track number of a particular MP3 file until another programmer, Michael +Mutschler, proposed embedding the track number in the comment field, +separated from the rest of the comment by a null byte, so existing ID3 +software, which tended to read up to the first null in each of the text +fields, would ignore it. Kemp's version is now called ID3v1, and +Mutschler's is ID3v1.1. + + Limited as they were, the version 1 proposals were at least a partial +solution to the metadata problem, so they were adopted by many MP3 +ripping programs (which had to put the ID3 tag into the MP3 files) and +MP3 players (which would extract the information in the ID3 tag to +display to the user). (1) + + By 1998, however, the limitations were really becoming annoying, and +a new group, led by Martin Nilsson, started work on a completely new +tagging scheme, which came to be called ID3v2. The ID3v2 format is +extremely flexible, allowing for many kinds of information to be +included, with almost no length limitations. It also takes advantage of +certain details of the MP3 format to allow ID3v2 tags to be placed at +the beginning of an MP3 file. + + ID3v2 tags are, however, more of a challenge to parse than version 1 +tags. In this chapter, you'll use the binary data parsing library from +the previous chapter to develop code that can read and write ID3v2 tags. +Or at least you'll make a reasonable start-where ID3v1 was too simple, +ID3v2 is baroque to the point of being completely overengineered. +Implementing every nook and cranny of the specification, especially if +you want to support all three versions that have been specified, would +be a fair bit of work. However, you can ignore many of the features in +those specifications since they're rarely used "in the wild." For +starters, you can ignore, for now, a whole version, 2.4, since it has +not been widely adopted and mostly just adds more needless flexibility +compared to version 2.3. I'll focus on versions 2.2 and 2.3 because +they're both widely used and are different enough from each other to +keep things interesting. + +* Menu: + +* 25-1:: Structure of an ID3v2 Tag +* 25-2:: Defining a Package +* 25-3:: Integer Types +* 25-4:: String Types +* 25-5:: ID3 Tag Header +* 25-6:: ID3 Frames +* 25-7:: Detecting Tag Padding +* 25-8:: Supporting Multiple Versions of ID3 +* 25-9:: Versioned Frame Base Classes +* 25-10:: Versioned Concrete Frame Classes +* 25-11:: What Frames Do You Actually Need? +* 25-12:: Text Information Frames +* 25-13:: Comment Frames +* 25-14:: Extracting Information from an ID3 Tag + + ---------- Footnotes ---------- + + (1) Ripping is the process by which a song on an audio CD is +converted to an MP3 file on your hard drive. These days most ripping +software also automatically retrieves information about the songs being +ripped from online databases such as Gracenote (née the Compact Disc +Database [CDDB]) or FreeDB, which it then embeds in the MP3 files as ID3 +tags. + + +File: pcl.info, Node: 25-1, Next: 25-2, Prev: Chapter 25, Up: Chapter 25 + +Structure of an ID3v2 Tag +========================= + +Before you can start cutting code, you'll need to be familiar with the +overall structure of an ID3v2 tag. A tag starts with a header +containing information about the tag as a whole. The first three bytes +of the header encode the string "ID3" in ISO-8859-1 characters. In +other words, they're the bytes 73, 68, and 51. Then comes two bytes +that encode the major version and revision of the ID3 specification to +which the tag purports to conform. They're followed by a single byte +whose individual bits are treated as flags. The meanings of the +individual flags depend on the version of the spec. Some of the flags +can affect the way the rest of the tag is parsed. The "major version" +is actually used to record the minor version of the spec, while the +"revision" is the subminor version of the spec. Thus, the "major +version" field for a tag conforming to the 2.3.0 spec is 3. The +revision field is always zero since each new ID3v2 spec has bumped the +minor version, leaving the subminor version at zero. The value stored +in the major version field of the tag has, as you'll see, a dramatic +effect on how you'll parse the rest of the tag. + + The last field in the tag header is an integer, encoded in four bytes +but using only seven bits from each byte, that gives the total size of +the tag, not counting the header. In version 2.3 tags, the header may +be followed by several extended header fields; otherwise, the remainder +of the tag data is divided into frames. Different types of frames store +different kinds of information, from simple textual information, such as +the song name, to embedded images. Each frame starts with a header +containing a string identifier and a size. In version 2.3, the frame +header also contains two bytes worth of flags and, depending on the +value of one the flags, an optional one-byte code indicating how the +rest of the frame is encrypted. + + Frames are a perfect example of a tagged data structure-to know how +to parse the body of a frame, you need to read the header and use the +identifier to determine what kind of frame you're reading. + + The ID3 tag header contains no direct indication of how many frames +are in a tag-the tag header tells you how big the tag is, but since many +frames are variable length, the only way to find out how many frames the +tag contains is to read the frame data. Also, the size given in the tag +header may be larger than the actual number of bytes of frame data; the +frames may be followed with enough null bytes to pad the tag out to the +specified size. This makes it possible for tag editors to modify a tag +without having to rewrite the whole MP3 file. (1) + + So, the main issues you have to deal with are reading the ID3 header; +determining whether you're reading a version 2.2 or 2.3 tag; and reading +the frame data, stopping either when you've read the complete tag or +when you've hit the padding bytes. + + ---------- Footnotes ---------- + + (1) Almost all file systems provide the ability to overwrite existing +bytes of a file, but few, if any, provide a way to add or remove data at +the beginning or middle of a file without having to rewrite the rest of +the file. Since ID3 tags are typically stored at the beginning of a +file, to rewrite an ID3 tag without disturbing the rest of the file you +must replace the old tag with a new tag of exactly the same length. By +writing ID3 tags with a certain amount of padding, you have a better +chance of being able to do so-if the new tag has more data than the +original tag, you use less padding, and if it's shorter, you use more. + + +File: pcl.info, Node: 25-2, Next: 25-3, Prev: 25-1, Up: Chapter 25 + +Defining a Package +================== + +Like the other libraries you've developed so far, the code you'll write +in this chapter is worth putting in its own package. You'll need to +refer to functions from both the binary data and pathname libraries +developed in Chapters 24 and 15 and will also want to export the names +of the functions that make up the public API to this package. The +following package definition does all that: + + (defpackage :com.gigamonkeys.id3v2 + (:use :common-lisp + :com.gigamonkeys.binary-data + :com.gigamonkeys.pathnames) + (:export + :read-id3 + :mp3-p + :id3-p + :album + :composer + :genre + :encoding-program + :artist + :part-of-set + :track + :song + :year + :size + :translated-genre)) + + As usual, you can, and probably should, change the com.gigamonkeys +part of the package name to your own domain. + + +File: pcl.info, Node: 25-3, Next: 25-4, Prev: 25-2, Up: Chapter 25 + +Integer Types +============= + +You can start by defining binary types for reading and writing several +of the primitive types used by the ID3 format, various sizes of unsigned +integers, and four kinds of strings. + + ID3 uses unsigned integers encoded in one, two, three, and four +bytes. If you first write a general unsigned-integer binary type that +takes the number of bytes to read as an argument, you can then use the +short form of define-binary-type to define the specific types. The +general unsigned-integer type looks like this: + + (define-binary-type unsigned-integer (bytes) + (:reader (in) + (loop with value = 0 + for low-bit downfrom (* 8 (1- bytes)) to 0 by 8 do + (setf (ldb (byte 8 low-bit) value) (read-byte in)) + finally (return value))) + (:writer (out value) + (loop for low-bit downfrom (* 8 (1- bytes)) to 0 by 8 + do (write-byte (ldb (byte 8 low-bit) value) out)))) + + Now you can use the short form of define-binary-type to define one +type for each size of integer used in the ID3 format like this: + + (define-binary-type u1 () (unsigned-integer :bytes 1)) + (define-binary-type u2 () (unsigned-integer :bytes 2)) + (define-binary-type u3 () (unsigned-integer :bytes 3)) + (define-binary-type u4 () (unsigned-integer :bytes 4)) + + Another type you'll need to be able to read and write is the 28-bit +value used in the header. This size is encoded using 28 bits rather +than a multiple of 8, such as 32 bits, because an ID3 tag can't contain +the byte #xff followed by a byte with the top 3 bits on because that +pattern has a special meaning to MP3 decoders. None of the other fields +in the ID3 header could possibly contain such a byte sequence, but if +you encoded the tag size as a regular unsigned-integer, it might. To +avoid that possibility, the size is encoded using only the bottom seven +bits of each byte, with the top bit always zero. (1) + + Thus, it can be read and written a lot like an unsigned-integer +except the size of the byte specifier you pass to LDB should be seven +rather than eight. This similarity suggests that if you add a +parameter, bits-per-byte, to the existing unsigned-integer binary type, +you could then define a new type, id3-tag-size, using a short-form +define-binary-type. The new version of unsigned-integer is just like +the old version except with bits-per-byte used everywhere the old +version hardwired the number eight. It looks like this: + + (define-binary-type unsigned-integer (bytes bits-per-byte) + (:reader (in) + (loop with value = 0 + for low-bit downfrom (* bits-per-byte (1- bytes)) to 0 by bits-per-byte do + (setf (ldb (byte bits-per-byte low-bit) value) (read-byte in)) + finally (return value))) + (:writer (out value) + (loop for low-bit downfrom (* bits-per-byte (1- bytes)) to 0 by bits-per-byte + do (write-byte (ldb (byte bits-per-byte low-bit) value) out)))) + + The definition of id3-tag-size is then trivial. + + (define-binary-type id3-tag-size () (unsigned-integer :bytes 4 :bits-per-byte 7)) + + You'll also have to change the definitions of u1 through u4 to +specify eight bits per byte like this: + + (define-binary-type u1 () (unsigned-integer :bytes 1 :bits-per-byte 8)) + (define-binary-type u2 () (unsigned-integer :bytes 2 :bits-per-byte 8)) + (define-binary-type u3 () (unsigned-integer :bytes 3 :bits-per-byte 8)) + (define-binary-type u4 () (unsigned-integer :bytes 4 :bits-per-byte 8)) + + ---------- Footnotes ---------- + + (1) The frame data following the ID3 header could also potentially +contain the illegal sequence. That's prevented using a different scheme +that's turned on via one of the flags in the tag header. The code in +this chapter doesn't account for the possibility that this flag might be +set; in practice it's rarely used. + + +File: pcl.info, Node: 25-4, Next: 25-5, Prev: 25-3, Up: Chapter 25 + +String Types +============ + +The other kinds of primitive types that are ubiquitous in the ID3 format +are strings. In the previous chapter I discussed some of the issues you +have to consider when dealing with strings in binary files, such as the +difference between character codes and character encodings. + + ID3 uses two different character codes, ISO 8859-1 and Unicode. ISO +8859-1, also known as Latin-1, is an eight-bit character code that +extends ASCII with characters used by the languages of Western Europe. +In other words, the code points from 0-127 map to the same characters in +ASCII and ISO 8859-1, but ISO 8859-1 also provides mappings for code +points up to 255. Unicode is a character code designed to provide a +code point for virtually every character of all the world's languages. +Unicode is a superset of ISO 8859-1 in the same way that ISO 8859-1 is a +superset of ASCII-the code points from 0-255 map to the same characters +in both ISO 8859-1 and Unicode. (Thus, Unicode is also a superset of +ASCII.) + + Since ISO 8859-1 is an eight-bit character code, it's encoded using +one byte per character. For Unicode strings, ID3 uses the UCS-2 +encoding with a leading byte order mark. (1) I'll discuss what a byte +order mark is in a moment. + + Reading and writing these two encodings isn't a problem-it's just a +question of reading and writing unsigned integers in various formats, +and you just finished writing the code to do that. The trick is how you +translate those numeric values to Lisp character objects. + + The Lisp implementation you're using probably uses either Unicode or +ISO 8859-1 as its internal character code. And since all the values +from 0-255 map to the same characters in both ISO 8859-1 and Unicode, +you can use Lisp's CODE-CHAR and CHAR-CODE functions to translate those +values in both character codes. However, if your Lisp supports only ISO +8859-1, then you'll be able to represent only the first 255 Unicode +characters as Lisp characters. In other words, in such a Lisp +implementation, if you try to process an ID3 tag that uses Unicode +strings and if any of those strings contain characters with code points +higher than 255, you'll get an error when you try to translate the code +point to a Lisp character. For now I'll assume either you're using a +Unicode-based Lisp or you won't process any files containing characters +outside the ISO 8859-1 range. + + The other issue with encoding strings is how to know how many bytes +to interpret as character data. ID3 uses two strategies I mentioned in +the previous chapter-some strings are terminated with a null character, +while other strings occur in positions where you can determine the +number of bytes to read, either because the string at that position is +always the same length or because the string is at the end of a +composite structure whose overall size you know. Note, however, that +the number of bytes isn't necessarily the same as the number of +characters in the string. + + Putting all these variations together, the ID3 format uses four ways +to read and write strings-two characters crossed with two ways of +delimiting the string data. + + Obviously, much of the logic of reading and writing strings will be +quite similar. So, you can start by defining two binary types, one for +reading strings of a specific length (in characters) and another for +reading terminated strings. Both types take advantage of that the type +argument to read-value and write-value is just another piece of data; +you can make the type of character to read a parameter of these types. +This is a technique you'll use quite a few times in this chapter. + + (define-binary-type generic-string (length character-type) + (:reader (in) + (let ((string (make-string length))) + (dotimes (i length) + (setf (char string i) (read-value character-type in))) + string)) + (:writer (out string) + (dotimes (i length) + (write-value character-type out (char string i))))) + + (define-binary-type generic-terminated-string (terminator character-type) + (:reader (in) + (with-output-to-string (s) + (loop for char = (read-value character-type in) + until (char= char terminator) do (write-char char s)))) + (:writer (out string) + (loop for char across string + do (write-value character-type out char) + finally (write-value character-type out terminator)))) + + With these types available, there's not much to reading ISO 8859-1 +strings. Because the character-type argument you pass to read-value and +write-value of a generic-string must be the name of a binary type, you +need to define an iso-8859-1-char binary type. This also gives you a +good place to put a bit of sanity checking on the code points of +characters you read and write. + + (define-binary-type iso-8859-1-char () + (:reader (in) + (let ((code (read-byte in))) + (or (code-char code) + (error "Character code ~d not supported" code)))) + (:writer (out char) + (let ((code (char-code char))) + (if (<= 0 code #xff) + (write-byte code out) + (error "Illegal character for iso-8859-1 encoding: character: ~c with code: ~d" char code))))) + + Now defining the ISO 8859-1 string types is trivial using the short +form of define-binary-type as follows: + + (define-binary-type iso-8859-1-string (length) + (generic-string :length length :character-type 'iso-8859-1-char)) + + (define-binary-type iso-8859-1-terminated-string (terminator) + (generic-terminated-string :terminator terminator :character-type 'iso-8859-1-char)) + + Reading UCS-2 strings is only slightly more complex. The complexity +arises because you can encode a UCS-2 code point in two ways: most +significant byte first (big-endian) or least significant byte first +(little-endian). UCS-2 strings therefore start with two extra bytes, +called the byte order mark, made up of the numeric value #xfeff encoded +in either big-endian form or little-endian form. When reading a UCS-2 +string, you read the byte order mark and then, depending on its value, +read either big-endian or little-endian characters. Thus, you'll need +two different UCS-2 character types. But you need only one version of +the sanity-checking code, so you can define a parameterized binary type +like this: + + (define-binary-type ucs-2-char (swap) + (:reader (in) + (let ((code (read-value 'u2 in))) + (when swap (setf code (swap-bytes code))) + (or (code-char code) (error "Character code ~d not supported" code)))) + (:writer (out char) + (let ((code (char-code char))) + (unless (<= 0 code #xffff) + (error "Illegal character for ucs-2 encoding: ~c with char-code: ~d" char code)) + (when swap (setf code (swap-bytes code))) + (write-value 'u2 out code)))) + + where the swap-bytes function can be defined as follows, taking +advantage of LDB being SETFable and thus ROTATEFable: + + (defun swap-bytes (code) + (assert (<= code #xffff)) + (rotatef (ldb (byte 8 0) code) (ldb (byte 8 8) code)) + code) + + Using ucs-2-char, you can define two character types that will be +used as the character-type arguments to the generic string functions. + + (define-binary-type ucs-2-char-big-endian () (ucs-2-char :swap nil)) + + (define-binary-type ucs-2-char-little-endian () (ucs-2-char :swap t)) + + Then you need a function that returns the name of the character type +to use based on the value of the byte order mark. + + (defun ucs-2-char-type (byte-order-mark) + (ecase byte-order-mark + (#xfeff 'ucs-2-char-big-endian) + (#xfffe 'ucs-2-char-little-endian))) + + Now you can define length- and terminator-delimited string types for +UCS-2-encoded strings that read the byte order mark and use it to +determine which variant of UCS-2 character to pass as the character-type +argument to read-value and write-value. The only other wrinkle is that +you need to translate the length argument, which is a number of bytes, +to the number of characters to read, accounting for the byte order mark. + + (define-binary-type ucs-2-string (length) + (:reader (in) + (let ((byte-order-mark (read-value 'u2 in)) + (characters (1- (/ length 2)))) + (read-value + 'generic-string in + :length characters + :character-type (ucs-2-char-type byte-order-mark)))) + (:writer (out string) + (write-value 'u2 out #xfeff) + (write-value + 'generic-string out string + :length (length string) + :character-type (ucs-2-char-type #xfeff)))) + + (define-binary-type ucs-2-terminated-string (terminator) + (:reader (in) + (let ((byte-order-mark (read-value 'u2 in))) + (read-value + 'generic-terminated-string in + :terminator terminator + :character-type (ucs-2-char-type byte-order-mark)))) + (:writer (out string) + (write-value 'u2 out #xfeff) + (write-value + 'generic-terminated-string out string + :terminator terminator + :character-type (ucs-2-char-type #xfeff)))) + + ---------- Footnotes ---------- + + (1) In ID3v2.4, UCS-2 is replaced by the virtually identical UTF-16, +and UTF-16BE and UTF-8 are added as additional encodings. + + +File: pcl.info, Node: 25-5, Next: 25-6, Prev: 25-4, Up: Chapter 25 + +ID3 Tag Header +============== + +With the basic primitive types done, you're ready to switch to a +high-level view and start defining binary classes to represent first the +ID3 tag as a whole and then the individual frames. + + If you turn first to the ID3v2.2 specification, you'll see that the +basic structure of the tag is this header: + + ID3/file identifier "ID3" + ID3 version $02 00 + ID3 flags %xx000000 + ID3 size 4 * %0xxxxxxx + + followed by frame data and padding. Since you've already defined +binary types to read and write all the fields in the header, defining a +class that can read the header of an ID3 tag is just a matter of putting +them together. + + (define-binary-class id3-tag () + ((identifier (iso-8859-1-string :length 3)) + (major-version u1) + (revision u1) + (flags u1) + (size id3-tag-size))) + + If you have some MP3 files lying around, you can test this much of +the code and also see what version of ID3 tags your MP3s contain. First +you can write a function that reads an id3-tag, as just defined, from +the beginning of a file. Be aware, however, that ID3 tags aren't +required to appear at the beginning of a file, though these days they +almost always do. To find an ID3 tag elsewhere in a file, you can scan +the file looking for the sequence of bytes 73, 68, 51 (in other words, +the string "ID3"). (1) For now you can probably get away with assuming +the tags are the first thing in the file. + + (defun read-id3 (file) + (with-open-file (in file :element-type '(unsigned-byte 8)) + (read-value 'id3-tag in))) + + On top of this function you can build a function that takes a +filename and prints the information in the tag header along with the +name of the file. + + (defun show-tag-header (file) + (with-slots (identifier major-version revision flags size) (read-id3 file) + (format t "~a ~d.~d ~8,'0b ~d bytes -- ~a~%" + identifier major-version revision flags size (enough-namestring file)))) + + It prints output that looks like this: + + ID3V2> (show-tag-header "/usr2/mp3/Kitka/Wintersongs/02 Byla Cesta.mp3") + ID3 2.0 00000000 2165 bytes -- Kitka/Wintersongs/02 Byla Cesta.mp3 + NIL + + Of course, to determine what versions of ID3 are most common in your +MP3 library, it'd be handier to have a function that returns a summary +of all the MP3 files under a given directory. You can write one easily +enough using the walk-directory function defined in Chapter 15. First +define a helper function that tests whether a given filename has an mp3 +extension. + + (defun mp3-p (file) + (and + (not (directory-pathname-p file)) + (string-equal "mp3" (pathname-type file)))) + + Then you can combine show-tag-header and mp3-p with walk-directory to +print a summary of the ID3 header in each file under a given directory. + + (defun show-tag-headers (dir) + (walk-directory dir #'show-tag-header :test #'mp3-p)) + + However, if you have a lot of MP3s, you may just want a count of how +many ID3 tags of each version you have in your MP3 collection. To get +that information, you might write a function like this: + + (defun count-versions (dir) + (let ((versions (mapcar #'(lambda (x) (cons x 0)) '(2 3 4)))) + (flet ((count-version (file) + (incf (cdr (assoc (major-version (read-id3 file)) versions))))) + (walk-directory dir #'count-version :test #'mp3-p)) + versions)) + + Another function you'll need in Chapter 29 is one that tests whether +a file actually starts with an ID3 tag, which you can define like this: + + (defun id3-p (file) + (with-open-file (in file :element-type '(unsigned-byte 8)) + (string= "ID3" (read-value 'iso-8859-1-string in :length 3)))) + + ---------- Footnotes ---------- + + (1) The 2.4 version of the ID3 format also supports placing a footer +at the end of a tag, which makes it easier to find a tag appended to the +end of a file. + + +File: pcl.info, Node: 25-6, Next: 25-7, Prev: 25-5, Up: Chapter 25 + +ID3 Frames +========== + +As I discussed earlier, the bulk of an ID3 tag is divided into frames. +Each frame has a structure similar to that of the tag as a whole. Each +frame starts with a header indicating what kind of frame it is and the +size of the frame in bytes. The structure of the frame header changed +slightly between version 2.2 and version 2.3 of the ID3 format, and +eventually you'll have to deal with both forms. To start, you can focus +on parsing version 2.2 frames. + + The header of a 2.2 frame consists of three bytes that encode a +three-character ISO 8859-1 string followed by a three-byte unsigned +integer, which specifies the size of the frame in bytes, excluding the +six-byte header. The string identifies what type of frame it is, which +determines how you parse the data following the size. This is exactly +the kind of situation for which you defined the +define-tagged-binary-class macro. You can define a tagged class that +reads the frame header and then dispatches to the appropriate concrete +class using a function that maps IDs to a class names. + + (define-tagged-binary-class id3-frame () + ((id (iso-8859-1-string :length 3)) + (size u3)) + (:dispatch (find-frame-class id))) + + Now you're ready to start implementing concrete frame classes. +However, the specification defines quite a few-63 in version 2.2 and +even more in later specs. Even considering frame types that share a +common structure to be equivalent, you'll still find 24 unique frame +types in version 2.2. But only a few of these are used "in the wild." +So rather than immediately setting to work defining classes for each of +the frame types, you can start by writing a generic frame class that +lets you read the frames in a tag without parsing the data within the +frames themselves. This will give you a way to find out what frames are +actually present in the MP3s you want to process. You'll need this +class eventually anyway because the specification allows for +experimental frames that you'll need to be able to read without parsing. + + Since the size field of the frame header tells you exactly how many +bytes long the frame is, you can define a generic-frame class that +extends id3-frame and adds a single field, data, that will hold an array +of bytes. + + (define-binary-class generic-frame (id3-frame) + ((data (raw-bytes :size size)))) + + The type of the data field, raw-bytes, just needs to hold an array of +bytes. You can define it like this: + + (define-binary-type raw-bytes (size) + (:reader (in) + (let ((buf (make-array size :element-type '(unsigned-byte 8)))) + (read-sequence buf in) + buf)) + (:writer (out buf) + (write-sequence buf out))) + + For the time being, you'll want all frames to be read as +generic-frames, so you can define the find-frame-class function used in +id3-frame's :dispatch expression to always return generic-frame, +regardless of the frame's id. + + (defun find-frame-class (id) + (declare (ignore id)) + 'generic-frame) + + Now you need to modify id3-tag so it'll read frames after the header +fields. There's only one tricky bit to reading the frame data: although +the tag header tells you how many bytes long the tag is, that number +includes the padding that can follow the frame data. Since the tag +header doesn't tell you how many frames the tag contains, the only way +to tell when you've hit the padding is to look for a null byte where +you'd expect a frame identifier. + + To handle this, you can define a binary type, id3-frames, that will +be responsible for reading the remainder of a tag, creating frame +objects to represent all the frames it finds, and then skipping over any +padding. This type will take as a parameter the tag size, which it can +use to avoid reading past the end of the tag. But the reading code will +also need to detect the beginning of the padding that can follow the +tag's frame data. Rather than calling read-value directly in id3-frames +:reader, you should use a function read-frame, which you'll define to +return NIL when it detects padding, otherwise returning an id3-frame +object read using read-value. Assuming you define read-frame so it +reads only one byte past the end of the last frame in order to detect +the start of the padding, you can define the id3-frames binary type like +this: + + (define-binary-type id3-frames (tag-size) + (:reader (in) + (loop with to-read = tag-size + while (plusp to-read) + for frame = (read-frame in) + while frame + do (decf to-read (+ 6 (size frame))) + collect frame + finally (loop repeat (1- to-read) do (read-byte in)))) + (:writer (out frames) + (loop with to-write = tag-size + for frame in frames + do (write-value 'id3-frame out frame) + (decf to-write (+ 6 (size frame))) + finally (loop repeat to-write do (write-byte 0 out))))) + + You can use this type to add a frames slot to id3-tag. + + (define-binary-class id3-tag () + ((identifier (iso-8859-1-string :length 3)) + (major-version u1) + (revision u1) + (flags u1) + (size id3-tag-size) + (frames (id3-frames :tag-size size)))) + + +File: pcl.info, Node: 25-7, Next: 25-8, Prev: 25-6, Up: Chapter 25 + +Detecting Tag Padding +===================== + +Now all that remains is to implement read-frame. This is a bit tricky +since the code that actually reads bytes from the stream is several +layers down from read-frame. + + What you'd really like to do in read-frame is read one byte and +return NIL if it's a null and otherwise read a frame with read-value. +Unfortunately, if you read the byte in read-frame, then it won't be +available to be read by read-value. (1) + + It turns out this is a perfect opportunity to use the condition +system-you can check for null bytes in the low-level code that reads +from the stream and signal a condition when you read a null; read-frame +can then handle the condition by unwinding the stack before more bytes +are read. In addition to turning out to be a tidy solution to the +problem of detecting the start of the tag's padding, this is also an +example of how you can use conditions for purposes other than handling +errors. + + You can start by defining a condition type to be signaled by the +low-level code and handled by the high-level code. This condition +doesn't need any slots-you just need a distinct class of condition so +you know no other code will be signaling or handling it. + + (define-condition in-padding () ()) + + Next you need to define a binary type whose :reader reads a given +number of bytes, first reading a single byte and signaling an in-padding +condition if the byte is null and otherwise reading the remaining bytes +as an iso-8859-1-string and combining it with the first byte read. + + (define-binary-type frame-id (length) + (:reader (in) + (let ((first-byte (read-byte in))) + (when (= first-byte 0) (signal 'in-padding)) + (let ((rest (read-value 'iso-8859-1-string in :length (1- length)))) + (concatenate + 'string (string (code-char first-byte)) rest)))) + (:writer (out id) + (write-value 'iso-8859-1-string out id :length length))) + + If you redefine id3-frame to make the type of its id slot frame-id +instead of iso-8859-1-string, the condition will be signaled whenever +id3-frame's read-value method reads a null byte instead of the beginning +of a frame. + + (define-tagged-binary-class id3-frame () + ((id (frame-id :length 3)) + (size u3)) + (:dispatch (find-frame-class id))) + + Now all read-frame has to do is wrap a call to read-value in a +HANDLER-CASE that handles the in-padding condition by returning NIL. + + (defun read-frame (in) + (handler-case (read-value 'id3-frame in) + (in-padding () nil))) + + With read-frame defined, you can now read a complete version 2.2 ID3 +tag, representing frames with instances of generic-frame. In the "What +Frames Do You Actually Need?" section, you'll do some experiments at +the REPL to determine what frame classes you need to implement. But +first let's add support for version 2.3 ID3 tags. + + ---------- Footnotes ---------- + + (1) Character streams support two functions, PEEK-CHAR and +UNREAD-CHAR, either of which would be a perfect solution to this +problem, but binary streams support no equivalent functions. + + +File: pcl.info, Node: 25-8, Next: 25-9, Prev: 25-7, Up: Chapter 25 + +Supporting Multiple Versions of ID3 +=================================== + +Currently, id3-tag is defined using define-binary-class, but if you want +to support multiple versions of ID3, it makes more sense to use a +define-tagged-binary-class that dispatches on the major-version value. +As it turns out, all versions of ID3v2 have the same structure up to the +size field. So, you can define a tagged binary class like the following +that defines this basic structure and then dispatches to the appropriate +version-specific subclass: + + (define-tagged-binary-class id3-tag () + ((identifier (iso-8859-1-string :length 3)) + (major-version u1) + (revision u1) + (flags u1) + (size id3-tag-size)) + (:dispatch + (ecase major-version + (2 'id3v2.2-tag) + (3 'id3v2.3-tag)))) + + Version 2.2 and version 2.3 tags differ in two ways. First, the +header of a version 2.3 tag may be extended with up to four optional +extended header fields, as determined by values in the flags field. +Second, the frame format changed between version 2.2 and version 2.3, +which means you'll have to use different classes to represent version +2.2 frames and the corresponding version 2.3 frames. + + Since the new id3-tag class is based on the one you originally wrote +to represent version 2.2 tags, it's not surprising that the new +id3v2.2-tag class is trivial, inheriting most of its slots from the new +id3-tag class and adding the one missing slot, frames. Because version +2.2 and version 2.3 tags use different frame formats, you'll have to +change the id3-frames type to be parameterized with the type of frame to +read. For now, assume you'll do that and add a :frame-type argument to +the id3-frames type descriptor like this: + + (define-binary-class id3v2.2-tag (id3-tag) + ((frames (id3-frames :tag-size size :frame-type 'id3v2.2-frame)))) + + The id3v2.3-tag class is slightly more complex because of the +optional fields. The first three of the four optional fields are +included when the sixth bit in flags is set. They're a four- byte +integer specifying the size of the extended header, two bytes worth of +flags, and another four-byte integer specifying how many bytes of +padding are included in the tag. (1) The fourth optional field, +included when the fifteenth bit of the extended header flags is set, is +a four-byte cyclic redundancy check (CRC) of the rest of the tag. + + The binary data library doesn't provide any special support for +optional fields in a binary class, but it turns out that regular +parameterized binary types are sufficient. You can define a type +parameterized with the name of a type and a value that indicates whether +a value of that type should actually be read or written. + + (define-binary-type optional (type if) + (:reader (in) + (when if (read-value type in))) + (:writer (out value) + (when if (write-value type out value)))) + + Using if as the parameter name looks a bit strange in that code, but +it makes the optional type descriptors quite readable. For instance, +here's the definition of id3v2.3-tag using optional slots: + + (define-binary-class id3v2.3-tag (id3-tag) + ((extended-header-size (optional :type 'u4 :if (extended-p flags))) + (extra-flags (optional :type 'u2 :if (extended-p flags))) + (padding-size (optional :type 'u4 :if (extended-p flags))) + (crc (optional :type 'u4 :if (crc-p flags extra-flags))) + (frames (id3-frames :tag-size size :frame-type 'id3v2.3-frame)))) + + where extended-p and crc-p are helper functions that test the +appropriate bit of the flags value they're passed. To test whether an +individual bit of an integer is set, you can use LOGBITP, another +bit-twiddling function. It takes an index and an integer and returns +true if the specified bit is set in the integer. + + (defun extended-p (flags) (logbitp 6 flags)) + + (defun crc-p (flags extra-flags) + (and (extended-p flags) (logbitp 15 extra-flags))) + + As in the version 2.2 tag class, the frames slot is defined to be of +type id3-frames, passing the name of the frame type as a parameter. You +do, however, need to make a few small changes to id3-frames and +read-frame to support the extra frame-type parameter. + + (define-binary-type id3-frames (tag-size frame-type) + (:reader (in) + (loop with to-read = tag-size + while (plusp to-read) + for frame = (read-frame frame-type in) + while frame + do (decf to-read (+ (frame-header-size frame) (size frame))) + collect frame + finally (loop repeat (1- to-read) do (read-byte in)))) + (:writer (out frames) + (loop with to-write = tag-size + for frame in frames + do (write-value frame-type out frame) + (decf to-write (+ (frame-header-size frame) (size frame))) + finally (loop repeat to-write do (write-byte 0 out))))) + + (defun read-frame (frame-type in) + (handler-case (read-value frame-type in) + (in-padding () nil))) + + The changes are in the calls to read-frame and write-value, where you +need to pass the frame-type argument and, in computing the size of the +frame, where you need to use a function frame-header-size instead of the +literal value 6 since the frame header changed size between version 2.2 +and version 2.3. Since the difference in the result of this function is +based on the class of the frame, it makes sense to define it as a +generic function like this: + + (defgeneric frame-header-size (frame)) + + You'll define the necessary methods on that generic function in the +next section after you define the new frame classes. + + ---------- Footnotes ---------- + + (1) If a tag had an extended header, you could use this value to +determine where the frame data should end. However, if the extended +header isn't used, you'd have to use the old algorithm anyway, so it's +not worth adding code to do it another way. + + +File: pcl.info, Node: 25-9, Next: 25-10, Prev: 25-8, Up: Chapter 25 + +Versioned Frame Base Classes +============================ + +Where before you defined a single base class for all frames, you'll now +have two classes, id3v2.2-frame and id3v2.3-frame. The id3v2.2-frame +class will be essentially the same as the original id3-frame class. + + (define-tagged-binary-class id3v2.2-frame () + ((id (frame-id :length 3)) + (size u3)) + (:dispatch (find-frame-class id))) + + The id3v2.3-frame, on the other hand, requires more changes. The +frame identifier and size fields were extended in version 2.3 from three +to four bytes each, and two bytes worth of flags were added. +Additionally, the frame, like the version 2.3 tag, can contain optional +fields, controlled by the values of three of the frame's flags. (1) +With those changes in mind, you can define the version 2.3 frame base +class, along with some helper functions, like this: + + (define-tagged-binary-class id3v2.3-frame () + ((id (frame-id :length 4)) + (size u4) + (flags u2) + (decompressed-size (optional :type 'u4 :if (frame-compressed-p flags))) + (encryption-scheme (optional :type 'u1 :if (frame-encrypted-p flags))) + (grouping-identity (optional :type 'u1 :if (frame-grouped-p flags)))) + (:dispatch (find-frame-class id))) + + (defun frame-compressed-p (flags) (logbitp 7 flags)) + + (defun frame-encrypted-p (flags) (logbitp 6 flags)) + + (defun frame-grouped-p (flags) (logbitp 5 flags)) + + With these two classes defined, you can now implement the methods on +the generic function frame-header-size. + + (defmethod frame-header-size ((frame id3v2.2-frame)) 6) + + (defmethod frame-header-size ((frame id3v2.3-frame)) 10) + + The optional fields in a version 2.3 frame aren't counted as part of +the header for this computation since they're already included in the +value of the frame's size. + + ---------- Footnotes ---------- + + (1) These flags, in addition to controlling whether the optional +fields are included, can affect the parsing of the rest of the tag. In +particular, if the seventh bit of the flags is set, then the actual +frame data is compressed using the zlib algorithm, and if the sixth bit +is set, the data is encrypted. In practice these options are rarely, if +ever, used, so you can get away with ignoring them for now. But that +would be an area you'd have to address to make this a production-quality +ID3 library. One simple half solution would be to change +find-frame-class to accept a second argument and pass it the flags; if +the frame is compressed or encrypted, you could instantiate a generic +frame to hold the data. + + +File: pcl.info, Node: 25-10, Next: 25-11, Prev: 25-9, Up: Chapter 25 + +Versioned Concrete Frame Classes +================================ + +In the original definition, generic-frame subclassed id3-frame. But now +id3-frame has been replaced with the two version-specific base classes, +id3v2.2-frame and id3v2.3-frame. So, you need to define two new +versions of generic-frame, one for each base class. One way to define +this classes would be like this: + + (define-binary-class generic-frame-v2.2 (id3v2.2-frame) + ((data (raw-bytes :size size)))) + + (define-binary-class generic-frame-v2.3 (id3v2.3-frame) + ((data (raw-bytes :size size)))) + + However, it's a bit annoying that these two classes are the same +except for their superclass. It's not too bad in this case since +there's only one additional field. But if you take this approach for +other concrete frame classes, ones that have a more complex internal +structure that's identical between the two ID3 versions, the duplication +will be more irksome. + + Another approach, and the one you should actually use, is to define a +class generic-frame as a mixin: a class intended to be used as a +superclass along with one of the version-specific base classes to +produce a concrete, version-specific frame class. The only tricky bit +about this approach is that if generic-frame doesn't extend either of +the frame base classes, then you can't refer to the size slot in its +definition. Instead, you must use the current-binary-object function I +discussed at the end of the previous chapter to access the object you're +in the midst of reading or writing and pass it to size. And you need to +account for the difference in the number of bytes of the total frame +size that will be left over, in the case of a version 2.3 frame, if any +of the optional fields are included in the frame. So, you should define +a generic function data-bytes with methods that do the right thing for +both version 2.2 and version 2.3 frames. + + (define-binary-class generic-frame () + ((data (raw-bytes :size (data-bytes (current-binary-object)))))) + + (defgeneric data-bytes (frame)) + + (defmethod data-bytes ((frame id3v2.2-frame)) + (size frame)) + + (defmethod data-bytes ((frame id3v2.3-frame)) + (let ((flags (flags frame))) + (- (size frame) + (if (frame-compressed-p flags) 4 0) + (if (frame-encrypted-p flags) 1 0) + (if (frame-grouped-p flags) 1 0)))) + + Then you can define concrete classes that extend one of the +version-specific base classes and generic-frame to define +version-specific generic frame classes. + + (define-binary-class generic-frame-v2.2 (id3v2.2-frame generic-frame) ()) + + (define-binary-class generic-frame-v2.3 (id3v2.3-frame generic-frame) ()) + + With these classes defined, you can redefine the find-frame-class +function to return the right versioned class based on the length of the +identifier. + + (defun find-frame-class (id) + (ecase (length id) + (3 'generic-frame-v2.2) + (4 'generic-frame-v2.3))) + + +File: pcl.info, Node: 25-11, Next: 25-12, Prev: 25-10, Up: Chapter 25 + +What Frames Do You Actually Need? +================================= + +With the ability to read both version 2.2 and version 2.3 tags using +generic frames, you're ready to start implementing classes to represent +the specific frames you care about. However, before you dive in, you +should take a breather and figure out what frames you actually care +about since, as I mentioned earlier, the ID3 spec specifies many frames +that are almost never used. Of course, what frames you care about +depends on what kinds of applications you're interested in writing. If +you're mostly interested in extracting information from existing ID3 +tags, then you need implement only the classes representing the frames +containing the information you care about. On the other hand, if you +want to write an ID3 tag editor, you may need to support all the frames. + + Rather than guessing which frames will be most useful, you can use +the code you've already written to poke around a bit at the REPL and see +what frames are actually used in your own MP3s. To start, you need an +instance of id3-tag, which you can get with the read-id3 function. + + ID3V2> (read-id3 "/usr2/mp3/Kitka/Wintersongs/02 Byla Cesta.mp3") + # + + Since you'll want to play with this object a bit, you should save it +in a variable. + + ID3V2> (defparameter *id3* (read-id3 "/usr2/mp3/Kitka/Wintersongs/02 Byla Cesta.mp3")) + *ID3* + + Now you can see, for example, how many frames it has. + + ID3V2> (length (frames *id3*)) + 11 + + Not too many-let's take a look at what they are. + + ID3V2> (frames *id3*) + (# # + # # + # # + # # + # # + #) + + Okay, that's not too informative. What you really want to know are +what kinds of frames are in there. In other words, you want to know the +ids of those frames, which you can get with a simple MAPCAR like this: + + ID3V2> (mapcar #'id (frames *id3*)) + ("TT2" "TP1" "TAL" "TRK" "TPA" "TYE" "TCO" "TEN" "COM" "COM" "COM") + + If you look up these identifiers in the ID3v2.2 spec, you'll discover +that all the frames with identifiers starting with T are text +information frames and have a similar structure. And COM is the +identifier for comment frames, which have a structure similar to that of +text information frames. The particular text information frames +identified here turn out to be the frames for representing the song +title, artist, album, track, part of set, year, genre, and encoding +program. + + Of course, this is just one MP3 file. Maybe other frames are used in +other files. It's easy enough to discover. First define a function +that combines the previous MAPCAR expression with a call to read-id3 and +wraps the whole thing in a DELETE-DUPLICATES to keep things tidy. +You'll have to use a :test argument of #'string= to DELETE-DUPLICATES to +specify that you want two elements considered the same if they're the +same string. + + (defun frame-types (file) + (delete-duplicates (mapcar #'id (frames (read-id3 file))) :test #'string=)) + + This should give the same answer except with only one of each +identifier when passed the same filename. + + ID3V2> (frame-types "/usr2/mp3/Kitka/Wintersongs/02 Byla Cesta.mp3") + ("TT2" "TP1" "TAL" "TRK" "TPA" "TYE" "TCO" "TEN" "COM") + + Then you can use Chapter 15's walk-directory function along with +mp3-p to find every MP3 file under a directory and combine the results +of calling frame-types on each file. Recall that NUNION is the +recycling version of the UNION function; since frame-types makes a new +list for each file, this is safe. + + (defun frame-types-in-dir (dir) + (let ((ids ())) + (flet ((collect (file) + (setf ids (nunion ids (frame-types file) :test #'string=)))) + (walk-directory dir #'collect :test #'mp3-p)) + ids)) + + Now pass it the name of a directory, and it'll tell you the set of +identifiers used in all the MP3 files under that directory. It may take +a few seconds depending how many MP3 files you have, but you'll probably +get something similar to this: + + ID3V2> (frame-types-in-dir "/usr2/mp3/") + ("TCON" "COMM" "TRCK" "TIT2" "TPE1" "TALB" "TCP" "TT2" "TP1" "TCM" + "TAL" "TRK" "TPA" "TYE" "TCO" "TEN" "COM") + + The four-letter identifiers are the version 2.3 equivalents of the +version 2.2 identifiers I discussed previously. Since the information +stored in those frames is exactly the information you'll need in Chapter +27, it makes sense to implement classes only for the frames actually +used, namely, text information and comment frames, which you'll do in +the next two sections. If you decide later that you want to support +other frame types, it's mostly a matter of translating the ID3 +specifications into the appropriate binary class definitions. + + +File: pcl.info, Node: 25-12, Next: 25-13, Prev: 25-11, Up: Chapter 25 + +Text Information Frames +======================= + +All text information frames consist of two fields: a single byte +indicating which string encoding is used in the frame and a string +encoded in the remaining bytes of the frame. If the encoding byte is +zero, the string is encoded in ISO 8859-1; if the encoding is one, the +string is a UCS-2 string. + + You've already defined binary types representing the four different +kinds of strings-two different encodings each with two different methods +of delimiting the string. However, define-binary-class provides no +direct facility for determining the type of value to read based on other +values in the object. Instead, you can define a binary type that you +pass the value of the encoding byte and that then reads or writes the +appropriate kind of string. + + As long as you're defining such a type, you can also define it to +take two parameters, :length and :terminator, and pick the right type of +string based on which argument is supplied. To implement this new type, +you must first define some helper functions. The first two return the +name of the appropriate string type based on the encoding byte. + + (defun non-terminated-type (encoding) + (ecase encoding + (0 'iso-8859-1-string) + (1 'ucs-2-string))) + + (defun terminated-type (encoding) + (ecase encoding + (0 'iso-8859-1-terminated-string) + (1 'ucs-2-terminated-string))) + + Then string-args uses the encoding byte, the length, and the +terminator to determine several of the arguments to be passed to +read-value and write-value by the :reader and :writer of +id3-encoded-string. One of the length and terminator arguments to +string-args should always be NIL. + + (defun string-args (encoding length terminator) + (cond + (length + (values (non-terminated-type encoding) :length length)) + (terminator + (values (terminated-type encoding) :terminator terminator)))) + + With those helpers, the definition of id3-encoded-string is simple. +One detail to note is that the keyword-either :length or +:terminator-used in the call to read-value and write-value is just +another piece of data returned by string-args. Although keywords in +arguments lists are almost always literal keywords, they don't have to +be. + + (define-binary-type id3-encoded-string (encoding length terminator) + (:reader (in) + (multiple-value-bind (type keyword arg) + (string-args encoding length terminator) + (read-value type in keyword arg))) + (:writer (out string) + (multiple-value-bind (type keyword arg) + (string-args encoding length terminator) + (write-value type out string keyword arg)))) + + Now you can define a text-info mixin class, much the way you defined +generic-frame earlier. + + (define-binary-class text-info-frame () + ((encoding u1) + (information (id3-encoded-string :encoding encoding :length (bytes-left 1))))) + + As when you defined generic-frame, you need access to the size of the +frame, in this case to compute the :length argument to pass to +id3-encoded-string. Because you'll need to do a similar computation in +the next class you define, you can go ahead and define a helper +function, bytes-left, that uses current-binary-object to get at the size +of the frame. + + (defun bytes-left (bytes-read) + (- (size (current-binary-object)) bytes-read)) + + Now, as you did with the generic-frame mixin, you can define two +version-specific concrete classes with a minimum of duplicated code. + + (define-binary-class text-info-frame-v2.2 (id3v2.2-frame text-info-frame) ()) + + (define-binary-class text-info-frame-v2.3 (id3v2.3-frame text-info-frame) ()) + + To wire these classes in, you need to modify find-frame-class to +return the appropriate class name when the ID indicates the frame is a +text information frame, namely, whenever the ID starts with T and isn't +TXX or TXXX. + + (defun find-frame-class (name) + (cond + ((and (char= (char name 0) #\T) + (not (member name '("TXX" "TXXX") :test #'string=))) + (ecase (length name) + (3 'text-info-frame-v2.2) + (4 'text-info-frame-v2.3))) + (t + (ecase (length name) + (3 'generic-frame-v2.2) + (4 'generic-frame-v2.3))))) + + +File: pcl.info, Node: 25-13, Next: 25-14, Prev: 25-12, Up: Chapter 25 + +Comment Frames +============== + +Another commonly used frame type is the comment frame, which is like a +text information frame with a few extra fields. Like a text information +frame, it starts with a single byte indicating the string encoding used +in the frame. That byte is followed by a three-character ISO 8859-1 +string (regardless of the value of the string encoding byte), which +indicates what language the comment is in using an ISO-639-2 code, for +example, "eng" for English or "jpn" for Japanese. That field is +followed by two strings encoded as indicated by the first byte. The +first is a null-terminated string containing a description of the +comment. The second, which takes up the remainder of the frame, is the +comment text itself. + + (define-binary-class comment-frame () + ((encoding u1) + (language (iso-8859-1-string :length 3)) + (description (id3-encoded-string :encoding encoding :terminator +null+)) + (text (id3-encoded-string + :encoding encoding + :length (bytes-left + (+ 1 ; encoding + 3 ; language + (encoded-string-length description encoding t))))))) + + As in the definition of the text-info mixin, you can use bytes-left +to compute the size of the final string. However, since the description +field is a variable-length string, the number of bytes read prior to the +start of text isn't a constant. To make matters worse, the number of +bytes used to encode description is dependent on the encoding. So, you +should define a helper function that returns the number of bytes used to +encode a string given the string, the encoding code, and a boolean +indicating whether the string is terminated with an extra character. + + (defun encoded-string-length (string encoding terminated) + (let ((characters (+ (length string) (if terminated 1 0)))) + (* characters (ecase encoding (0 1) (1 2))))) + + And, as before, you can define the concrete version-specific comment +frame classes and wire them into find-frame-class. + + (define-binary-class comment-frame-v2.2 (id3v2.2-frame comment-frame) ()) + + (define-binary-class comment-frame-v2.3 (id3v2.3-frame comment-frame) ()) + + (defun find-frame-class (name) + (cond + ((and (char= (char name 0) #\T) + (not (member name '("TXX" "TXXX") :test #'string=))) + (ecase (length name) + (3 'text-info-frame-v2.2) + (4 'text-info-frame-v2.3))) + ((string= name "COM") 'comment-frame-v2.2) + ((string= name "COMM") 'comment-frame-v2.3) + (t + (ecase (length name) + (3 'generic-frame-v2.2) + (4 'generic-frame-v2.3))))) + + +File: pcl.info, Node: 25-14, Next: Chapter 26, Prev: 25-13, Up: Chapter 25 + +Extracting Information from an ID3 Tag +====================================== + +Now that you have the basic ability to read and write ID3 tags, you have +a lot of directions you could take this code. If you want to develop a +complete ID3 tag editor, you'll need to implement specific classes for +all the frame types. You'd also need to define methods for manipulating +the tag and frame objects in a consistent way (for instance, if you +change the value of a string in a text-info-frame, you'll likely need to +adjust the size); as the code stands, there's nothing to make sure that +happens. (1) + + Or, if you just need to extract certain pieces of information about +an MP3 file from its ID3 tag-as you will when you develop a streaming +MP3 server in Chapters 27, 28, and 29-you'll need to write functions +that find the appropriate frames and extract the information you want. + + Finally, to make this production-quality code, you'd have to pore +over the ID3 specs and deal with the details I skipped over in the +interest of space. In particular, some of the flags in both the tag and +the frame can affect the way the contents of the tag or frame is read; +unless you write some code that does the right thing when those flags +are set, there may be ID3 tags that this code won't be able to parse +correctly. But the code from this chapter should be capable of parsing +nearly all the MP3s you actually encounter. + + For now you can finish with a few functions to extract individual +pieces of information from an id3-tag. You'll need these functions in +Chapter 27 and probably in other code that uses this library. They +belong in this library because they depend on details of the ID3 format +that the users of this library shouldn't have to worry about. + + To get, say, the name of the song of the MP3 from which an id3-tag +was extracted, you need to find the ID3 frame with a specific identifier +and then extract the information field. And some pieces of information, +such as the genre, can require further decoding. Luckily, all the +frames that contain the information you'll care about are text +information frames, so extracting a particular piece of information +mostly boils down to using the right identifier to look up the +appropriate frame. Of course, the ID3 authors decided to change all the +identifiers between ID3v2.2 and ID3v2.3, so you'll have to account for +that. + + Nothing too complex-you just need to figure out the right path to get +to the various pieces of information. This is a perfect bit of code to +develop interactively, much the way you figured out what frame classes +you needed to implement. To start, you need an id3-tag object to play +with. Assuming you have an MP3 laying around, you can use read-id3 like +this: + + ID3V2> (defparameter *id3* (read-id3 "Kitka/Wintersongs/02 Byla Cesta.mp3")) + *ID3* + ID3V2> *id3* + # + + replacing Kitka/Wintersongs/02 Byla Cesta.mp3 with the filename of +your MP3. Once you have your id3-tag object, you can start poking +around. For instance, you can check out the list of frame objects with +the frames function. + + ID3V2> (frames *id3*) + (# + # + # + # + # + # + # + # + # + # + #) + + Now suppose you want to extract the song title. It's probably in one +of those frames, but to find it, you need to find the frame with the +"TT2" identifier. Well, you can check easily enough to see if the tag +contains such a frame by extracting all the identifiers like this: + + ID3V2> (mapcar #'id (frames *id3*)) + ("TT2" "TP1" "TAL" "TRK" "TPA" "TYE" "TCO" "TEN" "COM" "COM" "COM") + + There it is, the first frame. However, there's no guarantee it'll +always be the first frame, so you should probably look it up by +identifier rather than position. That's also straightforward using the +FIND function. + + ID3V2> (find "TT2" (frames *id3*) :test #'string= :key #'id) + # + + Now, to get at the actual information in the frame, do this: + + ID3V2> (information (find "TT2" (frames *id3*) :test #'string= :key #'id)) + "Byla Cesta^@" + + Whoops. That ^ is how Emacs prints a null character. In a maneuver +reminiscent of the kludge that turned ID3v1 into ID3v1.1, the +information slot of a text information frame, though not officially a +null-terminated string, can contain a null, and ID3 readers are supposed +to ignore any characters after the null. So, you need a function that +takes a string and returns the contents up to the first null character, +if any. That's easy enough using the +null+ constant from the binary +data library. + + (defun upto-null (string) + (subseq string 0 (position +null+ string))) + + Now you can get just the title. + + ID3V2> (upto-null (information (find "TT2" (frames *id3*) :test #'string= :key #'id))) + "Byla Cesta" + + You could just wrap that code in a function named song that takes an +id3-tag as an argument, and you'd be done. However, the only difference +between this code and the code you'll use to extract the other pieces of +information you'll need (such as the album name, the artist, and the +genre) is the identifier. So, it's better to split up the code a bit. +For starters, you can write a function that just finds a frame given an +id3-tag and an identifier like this: + + (defun find-frame (id3 id) + (find id (frames id3) :test #'string= :key #'id)) + + ID3V2> (find-frame *id3* "TT2") + # + + Then the other bit of code, the part that extracts the information +from a text-info-frame, can go in another function. + + (defun get-text-info (id3 id) + (let ((frame (find-frame id3 id))) + (when frame (upto-null (information frame))))) + + ID3V2> (get-text-info *id3* "TT2") + "Byla Cesta" + + Now the definition of song is just a matter of passing the right +identifier. + + (defun song (id3) (get-text-info id3 "TT2")) + + ID3V2> (song *id3*) + "Byla Cesta" + + However, this definition of song works only with version 2.2 tags +since the identifier changed from "TT2" to "TIT2" between version 2.2 +and version 2.3. And all the other tags changed too. Since the user of +this library shouldn't have to know about different versions of the ID3 +format to do something as simple as get the song title, you should +probably handle those details for them. A simple way is to change +find-frame to take not just a single identifier but a list of +identifiers like this: + + (defun find-frame (id3 ids) + (find-if #'(lambda (x) (find (id x) ids :test #'string=)) (frames id3))) + + Then change get-text-info slightly so it can take one or more +identifiers using a &rest parameter. + + (defun get-text-info (id3 &rest ids) + (let ((frame (find-frame id3 ids))) + (when frame (upto-null (information frame))))) + + Then the change needed to allow song to support both version 2.2 and +version 2.3 tags is just a matter of adding the version 2.3 identifier. + + (defun song (id3) (get-text-info id3 "TT2" "TIT2")) + + Then you just need to look up the appropriate version 2.2 and version +2.3 frame identifiers for any fields for which you want to provide an +accessor function. Here are the ones you'll need in Chapter 27: + + (defun album (id3) (get-text-info id3 "TAL" "TALB")) + + (defun artist (id3) (get-text-info id3 "TP1" "TPE1")) + + (defun track (id3) (get-text-info id3 "TRK" "TRCK")) + + (defun year (id3) (get-text-info id3 "TYE" "TYER" "TDRC")) + + (defun genre (id3) (get-text-info id3 "TCO" "TCON")) + + The last wrinkle is that the way the genre is stored in the TCO or +TCON frames isn't always human readable. Recall that in ID3v1, genres +were stored as a single byte that encoded a particular genre from a +fixed list. Unfortunately, those codes live on in ID3v2-if the text of +the genre frame is a number in parentheses, the number is supposed to be +interpreted as an ID3v1 genre code. But, again, users of this library +probably won't care about that ancient history. So, you should provide +a function that automatically translates the genre. The following +function uses the genre function just defined to extract the actual +genre text and then checks whether it starts with a left parenthesis, +decoding the version 1 genre code with a function you'll define in a +moment if it does: + + (defun translated-genre (id3) + (let ((genre (genre id3))) + (if (and genre (char= #\( (char genre 0))) + (translate-v1-genre genre) + genre))) + + Since a version 1 genre code is effectively just an index into an +array of standard names, the easiest way to implement translate-v1-genre +is to extract the number from the genre string and use it as an index +into an actual array. + + (defun translate-v1-genre (genre) + (aref *id3-v1-genres* (parse-integer genre :start 1 :junk-allowed t))) + + Then all you need to do is to define the array of names. The +following array of names includes the 80 official version 1 genres plus +the genres created by the authors of Winamp: + + (defparameter *id3-v1-genres* + #( + ;; These are the official ID3v1 genres. + "Blues" "Classic Rock" "Country" "Dance" "Disco" "Funk" "Grunge" + "Hip-Hop" "Jazz" "Metal" "New Age" "Oldies" "Other" "Pop" "R&B" "Rap" + "Reggae" "Rock" "Techno" "Industrial" "Alternative" "Ska" + "Death Metal" "Pranks" "Soundtrack" "Euro-Techno" "Ambient" + "Trip-Hop" "Vocal" "Jazz+Funk" "Fusion" "Trance" "Classical" + "Instrumental" "Acid" "House" "Game" "Sound Clip" "Gospel" "Noise" + "AlternRock" "Bass" "Soul" "Punk" "Space" "Meditative" + "Instrumental Pop" "Instrumental Rock" "Ethnic" "Gothic" "Darkwave" + "Techno-Industrial" "Electronic" "Pop-Folk" "Eurodance" "Dream" + "Southern Rock" "Comedy" "Cult" "Gangsta" "Top 40" "Christian Rap" + "Pop/Funk" "Jungle" "Native American" "Cabaret" "New Wave" + "Psychadelic" "Rave" "Showtunes" "Trailer" "Lo-Fi" "Tribal" + "Acid Punk" "Acid Jazz" "Polka" "Retro" "Musical" "Rock & Roll" + "Hard Rock" + + ;; These were made up by the authors of Winamp but backported into + ;; the ID3 spec. + "Folk" "Folk-Rock" "National Folk" "Swing" "Fast Fusion" + "Bebob" "Latin" "Revival" "Celtic" "Bluegrass" "Avantgarde" + "Gothic Rock" "Progressive Rock" "Psychedelic Rock" "Symphonic Rock" + "Slow Rock" "Big Band" "Chorus" "Easy Listening" "Acoustic" "Humour" + "Speech" "Chanson" "Opera" "Chamber Music" "Sonata" "Symphony" + "Booty Bass" "Primus" "Porn Groove" "Satire" "Slow Jam" "Club" + "Tango" "Samba" "Folklore" "Ballad" "Power Ballad" "Rhythmic Soul" + "Freestyle" "Duet" "Punk Rock" "Drum Solo" "A capella" "Euro-House" + "Dance Hall" + + ;; These were also invented by the Winamp folks but ignored by the + ;; ID3 authors. + "Goa" "Drum & Bass" "Club-House" "Hardcore" "Terror" "Indie" + "BritPop" "Negerpunk" "Polsk Punk" "Beat" "Christian Gangsta Rap" + "Heavy Metal" "Black Metal" "Crossover" "Contemporary Christian" + "Christian Rock" "Merengue" "Salsa" "Thrash Metal" "Anime" "Jpop" + "Synthpop")) + + Once again, it probably feels like you wrote a ton of code in this +chapter. But if you put it all in a file, or if you download the +version from this book's Web site, you'll see it's just not that many +lines-most of the pain of writing this library stems from having to +understand the intricacies of the ID3 format itself. Anyway, now you +have a major piece of what you'll turn into a streaming MP3 server in +Chapters 27, 28, and 29. The other major bit of infrastructure you'll +need is a way to write server-side Web software, the topic of the next +chapter. + + ---------- Footnotes ---------- + + (1) Ensuring that kind of interfield consistency would be a fine +application for :after methods on the accessor generic functions. For +instance, you could define this :after method to keep size in sync with +the information string: + + (defmethod (setf information) :after (value (frame text-info-frame)) + (declare (ignore value)) + (with-slots (encoding size information) frame + (setf size (encoded-string-length information encoding nil)))) + + +File: pcl.info, Node: Chapter 26, Next: Chapter 27, Prev: Chapter 25, Up: Top + +26. Practical: Web Programming with AllegroServe +================================================ + +In this chapter you'll look at one way to develop Web-based programs in +Common Lisp, using the open-source AllegroServe Web server. This isn't +meant as a full introduction to AllegroServe. And I'm certainly not +going to cover anything more than a tiny corner of the larger topic of +Web programming. My goal here is to cover enough of the basics of using +AllegroServe that you'll be able, in Chapter 29, to develop an +application for browsing a library of MP3 files and streaming them to an +MP3 client. Similarly, this chapter will serve as a brief introduction +to Web programming for folks new to the topic. + +* Menu: + +* 26-1:: A 30-Second Intro to Server-Side Web Programming +* 26-2:: AllegroServe +* 26-3:: Generating Dynamic Content with AllegroServe +* 26-4:: Generating HTML +* 26-5:: HTML Macros +* 26-6:: Query Parameters +* 26-7:: Cookies +* 26-8:: A Small Application Framework +* 26-9:: The Implementation + + +File: pcl.info, Node: 26-1, Next: 26-2, Prev: Chapter 26, Up: Chapter 26 + +A 30-Second Intro to Server-Side Web Programming +================================================ + +While Web programming today typically involves quite a number of +software frameworks and different protocols, the core bits of Web +programming haven't changed much since they were invented in the early +1990s. For simple applications, such as the one you'll write in Chapter +29, you need to understand only a few key concepts, so I'll review them +quickly here. Experienced Web programmers can skim or skip the rest of +this section. (1) + + To start, you need to understand the roles the Web browser and the +Web server play in Web programming. While a modern browser comes with a +lot of bells and whistles, the core functionality of a Web browser is to +request Web pages from a Web server and then render them. Typically +those pages will be written in the Hypertext Markup Language (HTML), +which tells the browser how to render the page, including where to +insert inline images and links to other Web pages. HTML consists of +text marked up with tags that give the text a structure that the browser +uses when rendering the page. For instance, a simple HTML document +looks like this: + + + + Hello + + +

Hello, world!

+

This is a picture:

+

This is a link to another page.

+ + + + Figure 26-1 shows how the browser renders this page. + +[image src="imgs/new-sample.jpg"] + + Figure 26-1. Sample Web page + + The browser and server communicate using a protocol called the +Hypertext Transfer Protocol (HTTP). While you don't need to worry about +the details of the protocol, it's worth understanding that it consists +entirely of a sequence of requests initiated by the browser and +responses generated by the server. That is, the browser connects to the +Web server and sends a request that includes, at the least, the desired +URL and the version of HTTP that the browser speaks. The browser can +also include data in its request; that's how the browser submits HTML +forms to the server. + + To reply to a request, the server sends a response made up of a set +of headers and a body. The headers contain information about the body, +such as what type of data it is (for instance, HTML, plain text, or an +image), and the body is the data itself, which is then rendered by the +browser. The server can also send an error response telling the browser +that its request couldn't be answered for some reason. + + And that's pretty much it. Once the browser has received the +complete response from the server, there's no communication between the +browser and the server until the next time the browser decides to +request a page from the server. (2) This is the main constraint of Web +programming-there's no way for code running on the server to affect what +the user sees in their browser unless the browser issues a new request +to the server. (3) + + Some Web pages, called static pages, are simply HTML files stored on +the Web server and served up when requested by the browser. Dynamic +pages, on the other hand, consist of HTML generated each time the page +is requested by a browser. For instance, a dynamic page might be +generated by querying a database and then constructing HTML to represent +the results of the query. (4) + + When generating its response to a request, server-side code has four +main pieces of information to act on. The first piece of information is +the requested URL. Typically, however, the URL is used by the Web server +itself to determine what code is responsible for generating the +response. Next, if the URL contains a question mark, everything after +the question mark is considered to be a query string, which is typically +ignored by the Web server except that it makes it available to the code +generating the response. Most of the time the query string contains a +set of key/value pairs. The request from the browser can also contain +post data, which also usually consists of key/value pairs. Post data is +typically used to submit HTML forms. The key/value pairs supplied in +either the query string or the post data are collectively called the +query parameters. + + Finally, in order to string together a sequence of individual +requests from the same browser, code running in the server can set a +cookie, sending a special header in its response to the browser that +contains a bit of opaque data called a cookie. After a cookie is set by +a particular server, the browser will send the cookie with each request +it sends to that server. The browser doesn't care about the data in the +cookie-it just echoes it back to the server for the server-side code to +interpret however it wants. + + These are the primitive elements on top of which 99 percent of +server-side Web programming is built. The browser sends a request, the +server finds some code to handle the request and runs it, and the code +uses query parameters and cookies to determine what to do. + + ---------- Footnotes ---------- + + (1) Readers new to Web programming will probably need to supplement +this introduction with a more in-depth tutorial or two. You can find a +good set of online tutorials at http://www.jmarshall.com/easy/. + + (2) Loading a single Web page may actually involve multiple +requests-to render the HTML of a page containing inline images, the +browser must request each image individually and then insert each into +the appropriate place in the rendered HTML. + + (3) Much of the complexity around Web programming is a result of +trying to work around this fundamental limitation in order to provide a +user experience that's more like the interactivity provided by desktop +applications. + + (4) Unfortunately, dynamic is somewhat overloaded in the Web world. +The phrase Dynamic HTML refers to HTML containing embedded code, usually +in the language JavaScript, that can be executed in the browser without +further communication with the Web server. Used with some discretion, +Dynamic HTML can improve the usability of a Web-based application since, +even with high-speed Internet connections, making a request to a Web +server, receiving the response, and rendering the new page can take a +noticeable amount of time. To further confuse things, dynamically +generated pages (in other words, generated on the server) could also +contain Dynamic HTML (code to be run on the client.) For the purposes +of this book, you'll stick to dynamically generating plain old +nondynamic HTML. + + +File: pcl.info, Node: 26-2, Next: 26-3, Prev: 26-1, Up: Chapter 26 + +AllegroServe +============ + +You can serve Web content using Common Lisp in a number of ways; there +are at least three open-source Web servers written in Common Lisp as +well as plug-ins such as mod_lisp (1) and Lisplets (2) that allow the +Apache Web server or any Java Servlet container to delegate requests to +a Lisp server running in a separate process. + + For this chapter, you'll use a version of the open-source Web server +AllegroServe, originally written by John Foderaro at Franz Inc.. +AllegroServe is included in the version of Allegro available from Franz +for use with this book. If you're not using Allegro, you can use +PortableAllegroServe, a friendly fork of the AllegroServe code base, +which includes an Allegro compatibility layer that allows +PortableAllegroServe to run on most Common Lisps. The code you'll write +in this chapter and in Chapter 29 should run in both vanilla +AllegroServe and PortableAllegroServe. + + AllegroServe provides a programming model similar in spirit to Java +Servlets-each time a browser requests a page, AllegroServe parses the +request and looks up an object, called an entity, which handles the +request. Some entity classes provided as part of AllegroServe know how +to serve static content-either individual files or the contents of a +directory tree. Others, the ones I'll spend most of this chapter +discussing, run arbitrary Lisp code to generate the response. (3) + + But before I get to that, you need to know how to start AllegroServe +and set it up to serve a few files. The first step is to load the +AllegroServe code into your Lisp image. In Allegro, you can simply type +(require :aserve). In other Lisps (or in Allegro), you can load +PortableAllegroServe by loading the file INSTALL.lisp at the top of the +portableaserve directory tree. Loading AllegroServe will create three +new packages, NET.ASERVE, NET.HTML.GENERATOR, and NET.ASERVE.CLIENT. (4) + + After loading the server, you start it with the function start in the +NET.ASERVE package. To have easy access to the symbols exported from +NET.ASERVE, from COM.GIGAMONKEYS.HTML (a package I'll discuss in a +moment), and from the rest of Common Lisp, you should create a new +package to play in like this: + + CL-USER> (defpackage :com.gigamonkeys.web + (:use :cl :net.aserve :com.gigamonkeys.html)) + # + + Now switch to that package with this IN-PACKAGE expression: + + CL-USER> (in-package :com.gigamonkeys.web) + # + WEB> + + Now you can use the exported names from NET.ASERVE without +qualification. The function start starts the server. It takes quite a +number of keyword parameters, but the only one you need to pass is +:port, which specifies the port to listen on. You should probably use a +high port such as 2001 instead of the default port for HTTP servers, 80, +because on Unix-derived operating systems only the root user can listen +on ports below 1024. To run AllegroServe listening on port 80 on Unix, +you'd need to start Lisp as root and then use the :setuid and :setgid +parameters to tell start to switch its identity after opening the port. +You can start a server listening on port 2001 like this: + + WEB> (start :port 2001) + # + + The server is now running in your Lisp. It's possible you'll get an +error that says something about "port already in use" when you try to +start the server. This means port 2001 is already in use by some other +server on your machine. In that case, the simplest fix is to use a +different port, supplying a different argument to start and then using +that value instead of 2001 in the URLs used throughout this chapter. + + You can continue to interact with Lisp via the REPL because +AllegroServe starts its own threads to handle requests from browsers. +This means, among other things, that you can use the REPL to get a view +into the guts of your server while it's running, which makes debugging +and testing a lot easier than if the server is a complete black box. + + Assuming you're running Lisp on the same machine as your browser, you +can check that the server is up and running by pointing your browser at +http://localhost:2001/. At this point you should get a page-not-found +error message in the browser since you haven't published anything yet. +But the error message will be from AllegroServe; it'll say so at the +bottom of the page. On the other hand, if the browser displays an error +dialog that says something like "The connection was refused when +attempting to contact localhost:2001," it means either that the server +isn't running or that you started it with a different port than 2001. + + Now you can publish some files. Suppose you have a file hello.html +in the directory /tmp/html with the following contents: + + + + Hello + + +

Hello, world!

+ + + + You can publish it individually with the publish-file function. + + WEB> (publish-file :path "/hello.html" :file "/tmp/html/hello.html") + # + + The :path argument is the path that will appear in the URL requested +by the browser, while the :file argument is the name of the file in the +file system. After evaluating the publish-file expression, you can +point your browser to http://localhost:2001/hello.html, and it should +display a page something like Figure 26-2. + +[image src="imgs/hello-world.jpg"] + + Figure 26-2. http://localhost:2001/hello.html + + You could also publish a whole directory tree of files using the +publish-directory function. First let's clear out the already published +entity with the following call to publish-file: + + WEB> (publish-file :path "/hello.html" :remove t) + NIL + + Now you can publish the whole /tmp/html/ directory (and all its +subdirectories) with the publish-directory function. + + WEB> (publish-directory :prefix "/" :destination "/tmp/html/") + # + + In this case, the :prefix argument specifies the beginning of the +path part of URLs that should be handled by this entity. Thus, if the +server receives a request for http://localhost:2001/foo/bar.html, the +path is /foo/bar.html, which starts with /. This path is then +translated to a filename by replacing the prefix, /, with the +destination, /tmp/html/. Thus, the URL http://localhost:2001/hello.html +will still be translated into a request for the file +/tmp/html/hello.html. + + ---------- Footnotes ---------- + + (1) http://www.fractalconcept.com/asp/html/mod_lisp.html + + (2) http://lisplets.sourceforge.net/ + + (3) AllegroServe also provides a framework called Webactions that's +analogous to JSPs in the Java world-instead of writing code that +generates HTML, with Webactions you write pages that are essentially +HTML with a bit of magic foo that turns into code to be run when the +page is served. I won't cover Webactions in this book. + + (4) Loading PortableAllegroServe will create some other packages for +the compatibility libraries, but the packages you'll care about are +those three. + + +File: pcl.info, Node: 26-3, Next: 26-4, Prev: 26-2, Up: Chapter 26 + +Generating Dynamic Content with AllegroServe +============================================ + +Publishing entities that generate dynamic content is nearly as simple as +publishing static content. The functions publish and publish-prefix are +the dynamic analogs of publish-file and publish-directory. The basic +idea of these two functions is that you publish a function that will be +called to generate the response to a request for either a specific URL +or any URL with a given prefix. The function will be called with two +arguments: an object representing the request and the published entity. +Most of time you don't need to do anything with the entity object except +to pass it along to a couple macros I'll discuss in a moment. On the +other hand, you'll use the request object to obtain information +submitted by the browser-query parameters included in the URL or data +posted using an HTML form. + + For a trivial example of using a function to generate dynamic +content, let's write a function that generates a page with a different +random number each time it's requested. + + (defun random-number (request entity) + (with-http-response (request entity :content-type "text/html") + (with-http-body (request entity) + (format + (request-reply-stream request) + "~ Random~ ~

Random number: ~d

~ ~ ~ " + (random 1000))))) + + The macros with-http-response and with-http-body are part of +AllegroServe. The former starts the process of generating an HTTP +response and can be used, as here, to specify things such as the type of +content that will be returned. It also handles various parts of HTTP +such as dealing with If-Modified-Since requests. The with-http-body +actually sends the HTTP response headers and then executes its body, +which should contain code that generates the content of the reply. +Within with-http-response but before the with-http-body, you can add or +change HTTP headers to be sent in the reply. The function +request-reply-stream is also part of AllegroServe and returns the stream +to which you should write output intended to be sent to the browser. + + As this function shows, you can just use FORMAT to print HTML to the +stream returned by request-reply-stream. In the next section, I'll show +you more convenient ways to programmatically generate HTML. (1) + + Now you're ready to publish this function. + + WEB> (publish :path "/random-number" :function 'random-number) + # + + As it does in the publish-file function, the :path argument specifies +the path part of the URL that will result in this function being +invoked. The :function argument specifies either the name or an actual +function object. Using the name of a function, as shown here, allows +you to redefine the function later without republishing and have +AllegroServe use the new function definition. After evaluating the call +to publish, you can point your browser at http:// +localhost:2001/random-number to get a page with a random number on it, +as shown in Figure 26-3. + +[image src="imgs/random-number.jpg"] + + Figure 26-3. http://localhost:2001/random-number + + ---------- Footnotes ---------- + + (1) The ~ followed by a newline tells FORMAT to ignore whitespace +after the newline, which allows you to indent your code nicely without +adding a bunch of whitespace to the HTML. Since white-space is typically +not significant in HTML, this doesn't matter to the browser, but it +makes the generated HTML source look a bit nicer to humans. + + +File: pcl.info, Node: 26-4, Next: 26-5, Prev: 26-3, Up: Chapter 26 + +Generating HTML +=============== + +Although using FORMAT to emit HTML works fine for the simple pages I've +discussed so far, as you start building more elaborate pages it'd be +nice to have a more concise way to generate HTML. Several libraries are +available for generating HTML from an s-expression representation +including one, htmlgen, that's included with AllegroServe. In this +chapter you'll use a library called FOO, (1) which is loosely modeled on +Franz's htmlgen and whose implementation you'll look at in more detail +in Chapters 30 and 31. For now, however, you just need to know how to +use FOO. + + Generating HTML from within Lisp is quite natural since s-expressions +and HTML are essentially isomorphic. You can represent HTML elements +with s-expressions by treating each element in HTML as a list "tagged" +with an appropriate first element, such as a keyword symbol of the same +name as the HTML tag. Thus, the HTML

foo

is represented by the +s-expression (:p "foo"). Because HTML elements nest the same way lists +in s-expressions do, this scheme extends to more complex HTML. For +instance, this HTML: + + + + Hello + + +

Hello, world!

+ + + + could be represented with the following s-expression: + + (:html + (:head (:title "Hello")) + (:body (:p "Hello, world!"))) + + HTML elements with attributes complicate things a bit but not in an +insurmountable way. FOO supports two ways of including attributes in a +tag. One is to simply follow the first item of the list with +keyword/value pairs. The first element that follows a keyword/value +pair that's not itself a keyword symbol marks the beginning of the +element's contents. Thus, you'd represent this HTML: + + This is a link + + with the following s-expression: + + (:a :href "foo.html" "This is a link") + + The other syntax FOO supports is to group the tag name and attributes +into their own list like this: + + ((:a :href "foo.html") "This is link.") + + FOO can use the s-expression representation of HTML in two ways. The +function emit-html takes an HTML s-expression and outputs the +corresponding HTML. + + WEB> (emit-html '(:html (:head (:title "Hello")) (:body (:p "Hello, world!")))) + + + Hello + + +

Hello, world!

+ + + T + + However, emit-html isn't always the most efficient way to generate +HTML because its argument must be a complete s-expression representation +of the HTML to be generated. While it's easy to build such a +representation, it's not always particularly efficient. For instance, +suppose you wanted to make an HTML page containing a list of 10,000 +random numbers. You could build the s-expression using a backquote +template and then pass it to emit-html like this: + + (emit-html + `(:html + (:head + (:title "Random numbers")) + (:body + (:h1 "Random numbers") + (:p ,@(loop repeat 10000 collect (random 1000) collect " "))))) + + However, this has to build a tree containing a 10,000-element list +before it can even start emitting HTML, and the whole s-expression will +become garbage as soon as the HTML is emitted. To avoid this +inefficiency, FOO also provides a macro html, which allows you to embed +bits of Lisp code in the middle of an HTML s-expression. + + Literal values such as strings and numbers in the input to html are +interpolated into the output HTML. Likewise, symbols are treated as +variable references, and code is generated to emit their value at +runtime. Thus, both of these: + + (html (:p "foo")) + + (let ((x "foo")) (html (:p x))) + + will emit the following: + +

foo

+ + List forms that don't start with a keyword symbol are assumed to be +code and are embedded in the generated code. Any values the embedded +code returns will be ignored, but the code can emit more HTML by calling +html itself. For instance, to emit the contents of a list in HTML, you +might write this: + + (html (:ul (dolist (item (list 1 2 3)) (html (:li item))))) + + which will emit the following HTML: + +
    +
  • 1
  • +
  • 2
  • +
  • 3
  • +
+ + If you want to emit the value of a list form, you must wrap it in the +pseudotag :print. Thus, this expression: + + (html (:p (+ 1 2))) + + generates this HTML after computing and discarding the value 3: + +

+ + To emit the 3, you must write this: + + (html (:p (:print (+ 1 2)))) + + Or you could compute the value and store it in a variable outside the +call to html like this: + + (let ((x (+ 1 2))) (html (:p x))) + + Thus, you can use the html macro to generate the list of random +numbers like this: + + (html + (:html + (:head + (:title "Random numbers")) + (:body + (:h1 "Random numbers") + (:p (loop repeat 10 do (html (:print (random 1000)) " ")))))) + + The macro version will be quite a bit more efficient than the +emit-html version. Not only do you never have to generate an +s-expression representing the whole page, also much of the work that +emit-html does at runtime to interpret the s-expression will be done +once, when the macro is expanded, rather than every time the code is +run. + + You can control where the output generated by both html and emit-html +is sent with the macro with-html-output, which is part of the FOO +library. Thus, you can use the with-html-output and html macros from +FOO to rewrite random-number like this: + + (defun random-number (request entity) + (with-http-response (request entity :content-type "text/html") + (with-http-body (request entity) + (with-html-output ((request-reply-stream request)) + (html + (:html + (:head (:title "Random")) + (:body + (:p "Random number: " (:print (random 1000)))))))))) + + ---------- Footnotes ---------- + + (1) FOO is a recursive tautological acronym for FOO Outputs Output. + + +File: pcl.info, Node: 26-5, Next: 26-6, Prev: 26-4, Up: Chapter 26 + +HTML Macros +=========== + +Another feature of FOO is that it allows you to define HTML "macros" +that can translate arbitrary forms into HTML s-expressions that the html +macro understands. For instance, suppose you frequently find yourself +writing pages of this form: + + (:html + (:head (:title "Some title")) + (:body + (:h1 "Some title") + ... stuff ...)) + + You could define an HTML macro to capture that pattern like this: + + (define-html-macro :standard-page ((&key title) &body body) + `(:html + (:head (:title ,title)) + (:body + (:h1 ,title) + ,@body))) + + Now you can use the "tag" :standard-page in your s-expression HTML, +and it'll be expanded before being interpreted or compiled. For +instance, the following: + + (html (:standard-page (:title "Hello") (:p "Hello, world."))) + + generates the following HTML: + + + + Hello + + +

Hello

+

Hello, world.

+ + + + +File: pcl.info, Node: 26-6, Next: 26-7, Prev: 26-5, Up: Chapter 26 + +Query Parameters +================ + +Of course, generating HTML output is only half of Web programming. The +other thing you need to do is get input from the user. As I discussed +in the "A 30-Second Intro to Server-Side Web Programming" section, when +a browser requests a page from a Web server, it can send query +parameters in the URL and post data, both of which act as input to the +server-side code. + + AllegroServe, like most Web programming frameworks, takes care of +parsing both these sources of input for you. By the time your published +functions are called, all the key/value pairs from the query string +and/or post data have been decoded and placed into an alist that you can +retrieve from the request object with the function request-query. The +following function returns a page showing all the query parameters it +receives: + + (defun show-query-params (request entity) + (with-http-response (request entity :content-type "text/html") + (with-http-body (request entity) + (with-html-output ((request-reply-stream request)) + (html + (:standard-page + (:title "Query Parameters") + (if (request-query request) + (html + (:table :border 1 + (loop for (k . v) in (request-query request) + do (html (:tr (:td k) (:td v)))))) + (html (:p "No query parameters."))))))))) + + (publish :path "/show-query-params" :function 'show-query-params) + + If you give your browser a URL with a query string in it like the +following: + + http://localhost:2001/show-query-params?foo=bar&baz=10 + + you should get back a page similar to the one shown in Figure 26-4. + +[image src="imgs/show-query-params-1a.jpg"] + + Figure 26-4. http://localhost:2001/show-query-params?foo=bar&baz=10 + + To generate some post data, you need an HTML form. The following +function generates a simple form, which submits its data to +show-query-params: + + (defun simple-form (request entity) + (with-http-response (request entity :content-type "text/html") + (with-http-body (request entity) + (let ((*html-output* (request-reply-stream request))) + (html + (:html + (:head (:title "Simple Form")) + (:body + (:form :method "POST" :action "/show-query-params" + (:table + (:tr (:td "Foo") + (:td (:input :name "foo" :size 20))) + (:tr (:td "Password") + (:td (:input :name "password" :type "password" :size 20)))) + (:p (:input :name "submit" :type "submit" :value "Okay") + (:input ::type "reset" :value "Reset")))))))))) + + (publish :path "/simple-form" :function 'simple-form) + + Point your browser to http://localhost:2001/simple-form, and you +should see a page like the one in Figure 26-5. + + If you fill in the form with the "abc" and "def" values, clicking the +Okay button should take you to a page like the one in Figure 26-6. + +[image src="imgs/simple-form.jpg"] + + Figure 26-5. http://localhost:2001/simple-form + +[image src="imgs/show-query-params-2.jpg"] + + Figure 26-6. Result of submitting the simple form + + However, most of the time you won't need to iterate over all the +query parameters; you'll want to pick out individual parameters. For +instance, you might want to modify random-number so the limit value you +pass to RANDOM can be supplied via a query parameter. In that case, you +use the function request-query-value, which takes the request object and +the name of the parameter whose value you want and returns the value as +a string or NIL if no such parameter has been supplied. A +parameterizable version of random-number might look like this: + + (defun random-number (request entity) + (with-http-response (request entity :content-type "text/html") + (with-http-body (request entity) + (let* ((*html-output* (request-reply-stream request)) + (limit-string (or (request-query-value "limit" request) "")) + (limit (or (parse-integer limit-string :junk-allowed t) 1000))) + (html + (:html + (:head (:title "Random")) + (:body + (:p "Random number: " (:print (random limit)))))))))) + + Because request-query-value can return either NIL or an empty string, +you have to deal with both those cases when parsing the parameter into a +number to pass to RANDOM. You can deal with a NIL value when you bind +limit-string, binding it to "" if there's no "limit" query parameter. +Then you can use the :junk-allowed argument to PARSE-INTEGER to ensure +that it returns either NIL (if it can't parse an integer from the string +given) or an integer. In the section "A Small Application Framework," +you'll develop some macros to make it easier to deal with grabbing query +parameters and converting them to various types. + + +File: pcl.info, Node: 26-7, Next: 26-8, Prev: 26-6, Up: Chapter 26 + +Cookies +======= + +In AllegroServe you can send a Set-Cookie header that tells the browser +to save a cookie and send it along with subsequent requests by calling +the function set-cookie-header within the body of with-http-response but +before the call to with-http-body. The first argument to the function +is the request object, and the remaining arguments are keyword arguments +used to set the various properties of the cookie. The only two you must +pass are the :name and :value arguments, both of which should be +strings. The other possible arguments that affect the cookie sent to +the browser are :expires, :path, :domain, and :secure. + + Of these, you need to worry only about :expires. It controls how +long the browser should save the cookie. If :expires is NIL (the +default), the browser will save the cookie only until it exits. Other +possible values are :never, which means the cookie should be kept +forever, or a universal time as returned by GET-UNIVERSAL-TIME or +ENCODE-UNIVERSAL-TIME. An :expires of zero tells the client to +immediately discard an existing cookie. (1) + + After you've set a cookie, you can use the function get-cookie-values +to get an alist containing one name/value pair for each cookie sent by +the browser. From that alist, you can pick out individual cookie values +using ASSOC and CDR. + + The following function shows the names and values of all the cookies +sent by the browser: + + (defun show-cookies (request entity) + (with-http-response (request entity :content-type "text/html") + (with-http-body (request entity) + (with-html-output ((request-reply-stream request)) + (html + (:standard-page + (:title "Cookies") + (if (null (get-cookie-values request)) + (html (:p "No cookies.")) + (html + (:table + (loop for (key . value) in (get-cookie-values request) + do (html (:tr (:td key) (:td value))))))))))))) + + (publish :path "/show-cookies" :function 'show-cookies) + + The first time you load the page http://localhost:2001/show-cookies +it should say "No cookies" as shown in Figure 26-7 since you haven't set +any yet. + +[image src="imgs/show-cookies-no-cookies.jpg"] + + Figure 26-7. http://localhost:2001/show-cookies with no cookies + + To set a cookie, you need another function, such as the following: + + (defun set-cookie (request entity) + (with-http-response (request entity :content-type "text/html") + (set-cookie-header request :name "MyCookie" :value "A cookie value") + (with-http-body (request entity) + (with-html-output ((request-reply-stream request)) + (html + (:standard-page + (:title "Set Cookie") + (:p "Cookie set.") + (:p (:a :href "/show-cookies" "Look at cookie jar.")))))))) + + (publish :path "/set-cookie" :function 'set-cookie) + + If you enter the URL http://localhost:2001/set-cookie, your browser +should display a page like the one in Figure 26-8. Additionally, the +server will send a Set-Cookie header with a cookie named "MyCookie" with +"A cookie value" as its value. If you click the link Look at cookie +jar, you'll be taken to the /show-cookies page where you'll see the new +cookie, as shown in Figure 26-9. Because you didn't specify an :expires +argument, the browser will continue to send the cookie with each request +until you quit the browser. + +[image src="imgs/set-cookie.jpg"] + + Figure 26-8. http://localhost:2001/set-cookie + +[image src="imgs/show-cookies-one-cookie.jpg"] + + Figure 26-9. http://localhost:2001/show-cookies after setting a +cookie + + ---------- Footnotes ---------- + + (1) For information about the meaning of the other parameters, see +the AllegroServe documentation and RFC 2109, which describes the cookie +mechanism. + + +File: pcl.info, Node: 26-8, Next: 26-9, Prev: 26-7, Up: Chapter 26 + +A Small Application Framework +============================= + +Although AllegroServe provides fairly straightforward access to all the +basic facilities you need to write server-side Web code (access to query +parameters from both the URL's query string and the post data; the +ability to set cookies and retrieve their values; and, of course, the +ability to generate the response sent back to the browser), there's a +fair bit of annoyingly repetitive code. + + For instance, every HTML-generating function you write is going to +take the arguments request and entity and then will contain calls to +with-http-response, with-http-response, and-if you're going to use FOO +to generate HTML-with-html-output. Then, in functions that need to get +at query parameters, there will be a bunch of calls to +request-query-value and then more code to convert the string returned to +whatever type you actually want. Finally, you need to remember to +publish the function. + + To reduce the amount of boilerplate you have to write, you can write +a small framework on top of AllegroServe to make it easier to define +functions that handle requests for a particular URL. + + The basic approach will be to define a macro, define-url-function, +that you'll use to define functions that will automatically be published +via publish. This macro will expand into a DEFUN that contains the +appropriate boilerplate as well as code to publish the function under a +URL of the same name. It'll also take care of generating code to +extract values from query parameters and cookies and to bind them to +variables declared in the function's parameter list. Thus, the basic +form of a define-url-function definition is this: + + (define-url-function name (request query-parameter*) + body) + + where the body is the code to emit the HTML of the page. It'll be +wrapped in a call to FOO's html macro, so for simple pages it might +contain nothing but s-expression HTML. + + Within the body, the query parameter variables will be bound to +values of query parameters with the same name or from a cookie. In the +simplest case, a query parameter's value will be the string taken from +the query parameter or post data field of the same name. If the query +parameter is specified with a list, you can also specify an automatic +type conversion, a default value, and whether to look for and save the +value of the parameter in a cookie. The complete syntax for a +query-parameter is as follows: + + name | (name type [default-value] [stickiness]) + + The type must be a name recognized by define-url-function. I'll +discuss in a moment how to define new types. The default-value must be +a value of the given type. Finally, stickiness, if supplied, indicates +that the parameter's value should be taken from an appropriately named +cookie if no query parameter is supplied and that a Set-Cookie header +should be sent in the response that saves the value in the cookie of the +same name. Thus, a sticky parameter, after being explicitly supplied a +value via a query parameter, will keep that value on subsequent requests +of the page even when no query parameter is supplied. + + The name of the cookie used depends on the value of stickiness: with +a value of :global, the cookie will be named the same as the parameter. +Thus, different functions that use globally sticky parameters with the +same name will share the value. If stickiness is :package, then the +cookie name is constructed from the name of the parameter and the +package of the function's name; this allows functions in the same +package to share values but not have to worry about stomping on +parameters of functions in other packages. Finally, a parameter with a +stickiness value of :local will use a cookie made from the name of the +parameter, the package of the function name, and the function name, +making it unique to that function. + + For instance, you can use define-url-function to replace the previous +eleven-line definition of random-page with this five-line version: + + (define-url-function random-number (request (limit integer 1000)) + (:html + (:head (:title "Random")) + (:body + (:p "Random number: " (:print (random limit)))))) + + If you wanted the limit argument to be sticky, you could change the +limit declaration to (limit integer 1000 :local). + + +File: pcl.info, Node: 26-9, Next: Chapter 27, Prev: 26-8, Up: Chapter 26 + +The Implementation +================== + +I'll explain the implementation of define-url-function from the top +down. The macro itself looks like this: + + (defmacro define-url-function (name (request &rest params) &body body) + (with-gensyms (entity) + (let ((params (mapcar #'normalize-param params))) + `(progn + (defun ,name (,request ,entity) + (with-http-response (,request ,entity :content-type "text/html") + (let* (,@(param-bindings name request params)) + ,@(set-cookies-code name request params) + (with-http-body (,request ,entity) + (with-html-output ((request-reply-stream ,request)) + (html ,@body)))))) + (publish :path ,(format nil "/~(~a~)" name) :function ',name))))) + + Let's take it bit by bit, starting with the first few lines. + + (defmacro define-url-function (name (request &rest params) &body body) + (with-gensyms (entity) + (let ((params (mapcar #'normalize-param params))) + + Up to here you're just getting ready to generate code. You GENSYM a +symbol to use later as the name of the entity parameter in the DEFUN. +Then you normalize the parameters, converting plain symbols to list form +using this function: + + (defun normalize-param (param) + (etypecase param + (list param) + (symbol `(,param string nil nil)))) + + In other words, declaring a parameter with just a symbol is the same +as declaring a nonsticky, string parameter with no default value. + + Then comes the PROGN. You must expand into a PROGN because you need +to generate code to do two things: define a function with DEFUN and call +publish. You should define the function first so if there's an error in +the definition, the function won't be published. The first two lines of +the DEFUN are just boilerplate. + + (defun ,name (,request ,entity) + (with-http-response (,request ,entity :content-type "text/html") + + Now you do the real work. The following two lines generate the +bindings for the parameters specified in define-url-function other than +request and the code that calls set-cookie-header for the sticky +parameters. Of course, the real work is done by helper functions that +you'll look at in a moment. (1) + + (let* (,@(param-bindings name request params)) + ,@(set-cookies-code name request params) + + The rest is just more boilerplate, putting the body from the +define-url-function definition in the appropriate context of +with-http-body, with-html-output, and html macros. Then comes the call +to publish. + + (publish :path ,(format nil "/~(~a~)" name) :function ',name) + + The expression (format nil "/~(~a~)" name) is evaluated at macro +expansion time, generating a string consisting of /, followed by an +all-lowercase version of the name of the function you're about to +define. That string becomes the :path argument to publish, while the +function name is interpolated as the :function argument. + + Now let's look at the helper functions used to generate the DEFUN +form. To generate parameter bindings, you need to loop over the params +and collect a snippet of code for each one, generated by param-binding. +That snippet will be a list containing the name of the variable to bind +and the code that will compute the value of that variable. The exact +form of code used to compute the value will depend on the type of the +parameter, whether it's sticky, and the default value, if any. Because +you already normalized the params, you can use DESTRUCTURING-BIND to +take them apart in param-binding. + + (defun param-bindings (function-name request params) + (loop for param in params + collect (param-binding function-name request param))) + + (defun param-binding (function-name request param) + (destructuring-bind (name type &optional default sticky) param + (let ((query-name (symbol->query-name name)) + (cookie-name (symbol->cookie-name function-name name sticky))) + `(,name (or + (string->type ',type (request-query-value ,query-name ,request)) + ,@(if cookie-name + (list `(string->type ',type (get-cookie-value ,request ,cookie-name)))) + ,default))))) + + The function string->type, which you use to convert strings obtained +from the query parameters and cookies to the desired type, is a generic +function with the following signature: + + (defgeneric string->type (type value)) + + To make a particular name usable as a type name for a query +parameter, you just need to define a method on string->type. You'll +need to define at least a method specialized on the symbol string since +that's the default type. Of course, that's pretty easy. Since browsers +sometimes submit forms with empty strings to indicate no value was +supplied for a particular value, you'll want to convert an empty string +to NIL as this method does: + + (defmethod string->type ((type (eql 'string)) value) + (and (plusp (length value)) value)) + + You can add conversions for other types needed by your application. +For instance, to make integer usable as a query parameter type so you +can handle the limit parameter of random-page, you might define this +method: + + (defmethod string->type ((type (eql 'integer)) value) + (parse-integer (or value "") :junk-allowed t)) + + Another helper function used in the code generated by param-binding +is get-cookie-value, which is just a bit of sugar around the +get-cookie-values function provided by AllegroServe. It looks like +this: + + (defun get-cookie-value (request name) + (cdr (assoc name (get-cookie-values request) :test #'string=))) + + The functions that compute the query parameter and cookies names are +similarly straightforward. + + (defun symbol->query-name (sym) + (string-downcase sym)) + + (defun symbol->cookie-name (function-name sym sticky) + (let ((package-name (package-name (symbol-package function-name)))) + (when sticky + (ecase sticky + (:global + (string-downcase sym)) + (:package + (format nil "~(~a:~a~)" package-name sym)) + (:local + (format nil "~(~a:~a:~a~)" package-name function-name sym)))))) + + To generate the code that sets cookies for sticky parameters, you +again loop over the list of parameters, this time collecting a snippet +of code for each sticky param. You can use the when and collect it LOOP +forms to collect only the non-NIL values returned by set-cookie-code. + + (defun set-cookies-code (function-name request params) + (loop for param in params + when (set-cookie-code function-name request param) collect it)) + + (defun set-cookie-code (function-name request param) + (destructuring-bind (name type &optional default sticky) param + (declare (ignore type default)) + (if sticky + `(when ,name + (set-cookie-header + ,request + :name ,(symbol->cookie-name function-name name sticky) + :value (princ-to-string ,name)))))) + + One of the advantages of defining macros in terms of helper functions +like this is that it's easy to make sure the individual bits of code +you're generating look right. For instance, you can check that the +following set-cookie-code: + + (set-cookie-code 'foo 'request '(x integer 20 :local)) + + generates something like this: + + (WHEN X + (SET-COOKIE-HEADER REQUEST + :NAME "com.gigamonkeys.web:foo:x" + :VALUE (PRINC-TO-STRING X))) + + Assuming this code will occur in a context where x is the name of a +variable, this looks good. + + Once again, macros have allowed you to distill the code you need to +write down to its essence-in this case, the data you want to extract +from the request and the HTML you want to generate. That said, this +framework isn't meant to be the be-all and end-all of Web application +frameworks-it's just a little sugar to make it a bit easier to write +simple apps like the one you'll write in Chapter 29. + + But before you can get to that, you need to write the guts of the +application for which the Chapter 29 application will be the user +interface. You'll start in the next chapter with a souped-up version of +the database you wrote in Chapter 3, this time to keep track of ID3 data +extracted from MP3 files. + + ---------- Footnotes ---------- + + (1) You need to use LET* rather than a LET to allow the default value +forms for parameters to refer to parameters that appear earlier in the +parameter list. For example, you could write this: + + (define-url-function (request (x integer 10) (y integer (* 2 x))) ...) + + and the value of y, if not explicitly supplied, would be twice the +value of x. + + +File: pcl.info, Node: Chapter 27, Next: Chapter 28, Prev: Chapter 26, Up: Top + +27. Practical: An MP3 Database +============================== + +In this chapter you'll revisit the idea first explored in Chapter 3 of +building an in-memory database out of basic Lisp data structures. This +time your goal is to hold information that you'll extract from a +collection of MP3 files using the ID3v2 library from Chapter 25. You'll +then use this database in Chapters 28 and 29 as part of a Web-based +streaming MP3 server. Of course, this time around you can use some of +the language features you've learned since Chapter 3 to build a more +sophisticated version. + +* Menu: + +* 27-1:: The Database +* 27-2:: Defining a Schema +* 27-3:: Inserting Values +* 27-4:: Querying the Database +* 27-5:: Matching Functions +* 27-6:: Getting at the Results +* 27-7:: Other Database Operations + + +File: pcl.info, Node: 27-1, Next: 27-2, Prev: Chapter 27, Up: Chapter 27 + +The Database +============ + +The main problem with the database in Chapter 3 is that there's only one +table, the list stored in the variable *db*. Another is that the code +doesn't know anything about what type of values are stored in different +columns. In Chapter 3 you got away with that by using the fairly +general-purpose EQUAL method to compare column values when selecting +rows from the database, but you would've been in trouble if you had +wanted to store values that couldn't be compared with EQUAL or if you +had wanted to sort the rows in the database since there's no ordering +function that's as general as EQUAL. + + This time you'll solve both problems by defining a class, table, to +represent individual database tables. Each table instance will consist +of two slots-one to hold the table's data and another to hold +information about the columns in the table that database operations will +be able to use. The class looks like this: + + (defclass table () + ((rows :accessor rows :initarg :rows :initform (make-rows)) + (schema :accessor schema :initarg :schema))) + + As in Chapter 3, you can represent the individual rows with plists, +but this time around you'll create an abstraction that will make that an +implementation detail you can change later without too much trouble. +And this time you'll store the rows in a vector rather than a list since +certain operations that you'll want to support, such as random access to +rows by a numeric index and the ability to sort a table, can be more +efficiently implemented with vectors. + + The function make-rows used to initialize the rows slot can be a +simple wrapper around MAKE-ARRAY that builds an empty, adjustable,vector +with a fill pointer. + +* Menu: + +* 27-1-1:: The Package + + (defparameter *default-table-size* 100) + + (defun make-rows (&optional (size *default-table-size*)) + (make-array size :adjustable t :fill-pointer 0)) + + To represent a table's schema, you need to define another class, +column, each instance of which will contain information about one column +in the table: its name, how to compare values in the column for equality +and ordering, a default value, and a function that will be used to +normalize the column's values when inserting data into the table and +when querying the table. The schema slot will hold a list of column +objects. The class definition looks like this: + + (defclass column () + ((name + :reader name + :initarg :name) + + (equality-predicate + :reader equality-predicate + :initarg :equality-predicate) + + (comparator + :reader comparator + :initarg :comparator) + + (default-value + :reader default-value + :initarg :default-value + :initform nil) + + (value-normalizer + :reader value-normalizer + :initarg :value-normalizer + :initform #'(lambda (v column) (declare (ignore column)) v)))) + + The equality-predicate and comparator slots of a column object hold +functions used to compare values from the given column for equivalence +and ordering. Thus, a column containing string values might have +STRING= as its equality-predicate and STRING< as its comparator, while a +column containing numbers might have = and <. + + The default-value and value-normalizer slots are used when inserting +rows into the database and, in the case of value-normalizer, when +querying the database. When you insert a row into the database, if no +value is provided for a particular column, you can use the value stored +in the column's default-value slot. Then the value-defaulted or +otherwise-is normalized by passing it and the column object to the +function stored in the value-normalizer slot. You pass the column in +case the value-normalizer function needs to use some data associated +with the column object. (You'll see an example of this in the next +section.) You should also normalize values passed in queries before +comparing them with values in the database. + + Thus, the value-normalizer's responsibility is primarily to return a +value that can be safely and correctly passed to the equality-predicate +and comparator functions. If the value-normalizer can't figure out an +appropriate value to return, it can signal an error. + + The other reason to normalize values before you store them in the +database is to save both memory and CPU cycles. For instance, if you +have a column that's going to contain string values but the number of +distinct strings that will be stored in the column is small-for +instance, the genre column in the MP3 database-you can save space and +speed by using the value-normalizer to intern the strings (translate all +STRING= values to a single string object). Thus, you'll need only as +many strings as there are distinct values, regardless of how many rows +are in the table, and you can use EQL to compare column values rather +than the slower STRING=. (1) + + ---------- Footnotes ---------- + + (1) The general theory behind interning objects is that if you're +going to compare a particular value many times, it's worth it to pay the +cost of interning it. The value-normalizer runs once when you insert a +value into the table and, as you'll see, once at the beginning of each +query. Since a query can involve invoking the equality-predicate once +per row in the table, the amortized cost of interning the values will +quickly approach zero. + + +File: pcl.info, Node: 27-1-1, Next: 27-1, Prev: 27-1, Up: 27-1 + +The Package +=========== + +The package for the code you'll develop in this chapter looks like this: + + (defpackage :com.gigamonkeys.mp3-database + (:use :common-lisp + :com.gigamonkeys.pathnames + :com.gigamonkeys.macro-utilities + :com.gigamonkeys.id3v2) + (:export :*default-table-size* + :*mp3-schema* + :*mp3s* + :column + :column-value + :delete-all-rows + :delete-rows + :do-rows + :extract-schema + :in + :insert-row + :load-database + :make-column + :make-schema + :map-rows + :matching + :not-nullable + :nth-row + :random-selection + :schema + :select + :shuffle-table + :sort-rows + :table + :table-size + :with-column-values)) + + The :use section gives you access to the functions and macros whose +names are exported from the packages defined in Chapter 15, 8, and 25 +and the :export section exports the API this library will provide, which +you'll use in Chapter 29. + + +File: pcl.info, Node: 27-2, Next: 27-3, Prev: 27-1, Up: Chapter 27 + +Defining a Schema +================= + +Thus, to make an instance of table, you need to build a list of column +objects. You could build the list by hand, using LIST and +MAKE-INSTANCE. But you'll soon notice that you're frequently making a +lot column objects with the same comparator and equality-predicate +combinations. This is because the combination of a comparator and +equality predicate essentially defines a column type. It'd be nice if +there was a way to give those types names that would allow you to say +simply that a given column is a string column, rather than having to +specify STRING< as its comparator and STRING= as its equality predicate. +One way is to define a generic function, make-column, like this: + + (defgeneric make-column (name type &optional default-value)) + + Now you can implement methods on this generic function that +specialize on type with EQL specializers and return column objects with +the slots filled in with appropriate values. Here's the generic +function and methods that define column types for the type names string +and number: + + (defmethod make-column (name (type (eql 'string)) &optional default-value) + (make-instance + 'column + :name name + :comparator #'string< + :equality-predicate #'string= + :default-value default-value + :value-normalizer #'not-nullable)) + + (defmethod make-column (name (type (eql 'number)) &optional default-value) + (make-instance + 'column + :name name + :comparator #'< + :equality-predicate #'= + :default-value default-value)) + + The following function, not-nullable, used as the value-normalizer +for string columns, simply returns the value it's given unless the value +is NIL, in which case it signals an error: + + (defun not-nullable (value column) + (or value (error "Column ~a can't be null" (name column)))) + + This is important because STRING< and STRING= will signal an error if +called on NIL; it's better to catch bad values before they go into the +table rather than when you try to use them. (1) + + Another column type you'll need for the MP3 database is an +interned-string whose values are interned as discussed previously. +Since you need a hash table in which to intern values, you should define +a subclass of column, interned-values-column, that adds a slot whose +value is the hash table you use to intern. + + To implement the actual interning, you'll also need to provide an +:initform for value-normalizer of a function that interns the value in +the column's interned-values hash table. And because one of the main +reasons to intern values is to allow you to use EQL as the equality +predicate, you should also add an :initform for the equality-predicate +of #'eql. + + (defclass interned-values-column (column) + ((interned-values + :reader interned-values + :initform (make-hash-table :test #'equal)) + (equality-predicate :initform #'eql) + (value-normalizer :initform #'intern-for-column))) + + (defun intern-for-column (value column) + (let ((hash (interned-values column))) + (or (gethash (not-nullable value column) hash) + (setf (gethash value hash) value)))) + + You can then define a make-column method specialized on the name +interned-string that returns an instance of interned-values-column. + + (defmethod make-column (name (type (eql 'interned-string)) &optional default-value) + (make-instance + 'interned-values-column + :name name + :comparator #'string< + :default-value default-value)) + + With these methods defined on make-column, you can now define a +function, make-schema, that builds a list of column objects from a list +of column specifications consisting of a column name, a column type +name, and, optionally, a default value. + + (defun make-schema (spec) + (mapcar #'(lambda (column-spec) (apply #'make-column column-spec)) spec)) + + For instance, you can define the schema for the table you'll use to +store data extracted from MP3s like this: + + (defparameter *mp3-schema* + (make-schema + '((:file string) + (:genre interned-string "Unknown") + (:artist interned-string "Unknown") + (:album interned-string "Unknown") + (:song string) + (:track number 0) + (:year number 0) + (:id3-size number)))) + + To make an actual table for holding information about MP3s, you pass +*mp3-schema* as the :schema initarg to MAKE-INSTANCE. + + (defparameter *mp3s* (make-instance 'table :schema *mp3-schema*)) + + ---------- Footnotes ---------- + + (1) As always, the first causality of concise exposition in +programming books is proper error handling; in production code you'd +probably want to define your own error type, such as the following, and +signal it instead: + + (error 'illegal-column-value :value value :column column) + + Then you'd want to think about where you can add restarts that might +be able to recover from this condition. And, finally, in any given +application you could establish condition handlers that would choose +from among those restarts. + + +File: pcl.info, Node: 27-3, Next: 27-4, Prev: 27-2, Up: Chapter 27 + +Inserting Values +================ + +Now you're ready to define your first table operation, insert-row, which +takes a plist of names and values and a table and adds a row to the +table containing the given values. The bulk of the work is done in a +helper function, normalize-row, that builds a plist with a defaulted, +normalized value for each column, using the values from names-and-values +if available and the default-value for the column if not. + + (defun insert-row (names-and-values table) + (vector-push-extend (normalize-row names-and-values (schema table)) (rows table))) + + (defun normalize-row (names-and-values schema) + (loop + for column in schema + for name = (name column) + for value = (or (getf names-and-values name) (default-value column)) + collect name + collect (normalize-for-column value column))) + + It's worth defining a separate helper function, normalize-for-column, +that takes a value and a column object and returns the normalized value +because you'll need to perform the same normalization on query +arguments. + + (defun normalize-for-column (value column) + (funcall (value-normalizer column) value column)) + + Now you're ready to combine this database code with code from +previous chapters to build a database of data extracted from MP3 files. +You can define a function, file->row, that uses read-id3 from the ID3v2 +library to extract an ID3 tag from a file and turns it into a plist that +you can pass to insert-row. + + (defun file->row (file) + (let ((id3 (read-id3 file))) + (list + :file (namestring (truename file)) + :genre (translated-genre id3) + :artist (artist id3) + :album (album id3) + :song (song id3) + :track (parse-track (track id3)) + :year (parse-year (year id3)) + :id3-size (size id3)))) + + You don't have to worry about normalizing the values since insert-row +takes care of that for you. You do, however, have to convert the string +values returned by the track and year into numbers. The track number in +an ID3 tag is sometimes stored as the ASCII representation of the track +number and sometimes as a number followed by a slash followed by the +total number of tracks on the album. Since you care only about the +actual track number, you should use the :end argument to PARSE-INTEGER +to specify that it should parse only up to the slash, if any. (1) + + (defun parse-track (track) + (when track (parse-integer track :end (position #\/ track)))) + + (defun parse-year (year) + (when year (parse-integer year))) + + Finally, you can put all these functions together, along with +walk-directory from the portable pathnames library and mp3-p from the +ID3v2 library, to define a function that loads an MP3 database with data +extracted from all the MP3 files it can find under a given directory. + + (defun load-database (dir db) + (let ((count 0)) + (walk-directory + dir + #'(lambda (file) + (princ #\.) + (incf count) + (insert-row (file->row file) db)) + :test #'mp3-p) + (format t "~&Loaded ~d files into database." count))) + + ---------- Footnotes ---------- + + (1) If any MP3 files have malformed data in the track and year +frames, PARSE-INTEGER could signal an error. One way to deal with that +is to pass PARSE-INTEGER the :junk-allowed argument of T, which will +cause it to ignore any non-numeric junk following the number and to +return NIL if no number can be found in the string. Or, if you want +practice at using the condition system, you could define an error and +signal it from these functions when the data is malformed and also +establish a few restarts to allow these functions to recover. + + +File: pcl.info, Node: 27-4, Next: 27-5, Prev: 27-3, Up: Chapter 27 + +Querying the Database +===================== + +Once you've loaded your database with data, you'll need a way to query +it. For the MP3 application you'll need a slightly more sophisticated +query function than you wrote in Chapter 3. This time around you want +not only to be able to select rows matching particular criteria but also +to limit the results to particular columns, to limit the results to +unique rows, and perhaps to sort the rows by particular columns. In +keeping with the spirit of relational database theory, the result of a +query will be a new table object containing the desired rows and +columns. + + The query function you'll write, select, is loosely modeled on the +SELECT statement from Structured Query Language (SQL). It'll take five +keyword parameters: :from, :columns, :where, :distinct, and :order-by. +The :from argument is the table object you want to query. The :columns +argument specifies which columns should be included in the result. The +value should be a list of column names, a single column name, or a T, +the default, meaning return all columns. The :where argument, if +provided, should be a function that accepts a row and returns true if it +should be included in the results. In a moment, you'll write two +functions, matching and in, that return functions appropriate for use as +:where arguments. The :order-by argument, if supplied, should be a list +of column names; the results will be sorted by the named columns. As +with the :columns argument, you can specify a single column using just +the name, which is equivalent to a one-item list containing the same +name. Finally, the :distinct argument is a boolean that says whether to +eliminate duplicate rows from the results. The default value for +:distinct is NIL. + + Here are some examples of using select: + + ;; Select all rows where the :artist column is "Green Day" + (select :from *mp3s* :where (matching *mp3s* :artist "Green Day")) + + ;; Select a sorted list of artists with songs in the genre "Rock" + (select + :columns :artist + :from *mp3s* + :where (matching *mp3s* :genre "Rock") + :distinct t + :order-by :artist) + + The implementation of select with its immediate helper functions +looks like this: + + (defun select (&key (columns t) from where distinct order-by) + (let ((rows (rows from)) + (schema (schema from))) + + (when where + (setf rows (restrict-rows rows where))) + + (unless (eql columns 't) + (setf schema (extract-schema (mklist columns) schema)) + (setf rows (project-columns rows schema))) + + (when distinct + (setf rows (distinct-rows rows schema))) + + (when order-by + (setf rows (sorted-rows rows schema (mklist order-by)))) + + (make-instance 'table :rows rows :schema schema))) + + (defun mklist (thing) + (if (listp thing) thing (list thing))) + + (defun extract-schema (column-names schema) + (loop for c in column-names collect (find-column c schema))) + + (defun find-column (column-name schema) + (or (find column-name schema :key #'name) + (error "No column: ~a in schema: ~a" column-name schema))) + + (defun restrict-rows (rows where) + (remove-if-not where rows)) + + (defun project-columns (rows schema) + (map 'vector (extractor schema) rows)) + + (defun distinct-rows (rows schema) + (remove-duplicates rows :test (row-equality-tester schema))) + + (defun sorted-rows (rows schema order-by) + (sort (copy-seq rows) (row-comparator order-by schema))) + + Of course, the really interesting part of select is how you implement +the functions extractor, row-equality-tester, and row-comparator. + + As you can tell by how they're used, each of these functions must +return a function. For instance, project-columns uses the value +returned by extractor as the function argument to MAP. Since the purpose +of project-columns is to return a set of rows with only certain column +values, you can infer that extractor returns a function that takes a row +as an argument and returns a new row containing only the columns +specified in the schema it's passed. Here's how you can implement it: + + (defun extractor (schema) + (let ((names (mapcar #'name schema))) + #'(lambda (row) + (loop for c in names collect c collect (getf row c))))) + + Note how you can do the work of extracting the names from the schema +outside the body of the closure: since the closure will be called many +times, you want it to do as little work as possible each time it's +called. + + The functions row-equality-tester and row-comparator are implemented +in a similar way. To decide whether two rows are equivalent, you need +to apply the appropriate equality predicate for each column to the +appropriate column values. Recall from Chapter 22 that the LOOP clause +always will return NIL as soon as a pair of values fails their test or +will cause the LOOP to return T. + + (defun row-equality-tester (schema) + (let ((names (mapcar #'name schema)) + (tests (mapcar #'equality-predicate schema))) + #'(lambda (a b) + (loop for name in names and test in tests + always (funcall test (getf a name) (getf b name)))))) + + Ordering two rows is a bit more complex. In Lisp, comparator +functions return true if their first argument should be sorted ahead of +the second and NIL otherwise. Thus, a NIL can mean that the second +argument should be sorted ahead of the first or that they're equivalent. +You want your row comparators to behave the same way: return T if the +first row should be sorted ahead of the second and NIL otherwise. + + Thus, to compare two rows, you should compare the values from the +columns you're sorting by, in order, using the appropriate comparator +for each column. First call the comparator with the value from the +first row as the first argument. If the comparator returns true, that +means the first row should definitely be sorted ahead of the second row, +so you can immediately return T. + + But if the column comparator returns NIL, then you need to determine +whether that's because the second value should sort ahead of the first +value or because they're equivalent. So you should call the comparator +again with the arguments reversed. If the comparator returns true this +time, it means the second column value sorts ahead of the first and thus +the second row ahead of the first row, so you can return NIL +immediately. Otherwise, the column values are equivalent, and you need +to move onto the next column. If you get through all the columns +without one row's value ever winning the comparison, then the rows are +equivalent, and you return NIL. A function that implements this +algorithm looks like this: + + (defun row-comparator (column-names schema) + (let ((comparators (mapcar #'comparator (extract-schema column-names schema)))) + #'(lambda (a b) + (loop + for name in column-names + for comparator in comparators + for a-value = (getf a name) + for b-value = (getf b name) + when (funcall comparator a-value b-value) return t + when (funcall comparator b-value a-value) return nil + finally (return nil))))) + + +File: pcl.info, Node: 27-5, Next: 27-6, Prev: 27-4, Up: Chapter 27 + +Matching Functions +================== + +The :where argument to select can be any function that takes a row +object and returns true if it should be included in the results. In +practice, however, you'll rarely need the full power of arbitrary code +to express query criteria. So you should provide two functions, +matching and in, that will build query functions that allow you to +express the common kinds of queries and that take care of using the +proper equality predicates and value normalizers for each column. + + The workhouse query-function constructor will be matching, which +returns a function that will match rows with specific column values. +You saw how it was used in the earlier examples of select. For +instance, this call to matching: + + (matching *mp3s* :artist "Green Day") + + returns a function that matches rows whose :artist value is "Green +Day". You can also pass multiple names and values; the returned +function matches when all the columns match. For example, the following +returns a closure that matches rows where the artist is "Green Day" and +the album is "American Idiot": + + (matching *mp3s* :artist "Green Day" :album "American Idiot") + + You have to pass matching the table object because it needs access to +the table's schema in order to get at the equality predicates and value +normalizer functions for the columns it matches against. + + You build up the function returned by matching out of smaller +functions, each responsible for matching one column's value. To build +these functions, you should define a function, column-matcher, that +takes a column object and an unnormalized value you want to match and +returns a function that accepts a single row and returns true when the +value of the given column in the row matches the normalized version of +the given value. + + (defun column-matcher (column value) + (let ((name (name column)) + (predicate (equality-predicate column)) + (normalized (normalize-for-column value column))) + #'(lambda (row) (funcall predicate (getf row name) normalized)))) + + You then build a list of column-matching functions for the names and +values you care about with the following function, column-matchers: + + (defun column-matchers (schema names-and-values) + (loop for (name value) on names-and-values by #'cddr + when value collect + (column-matcher (find-column name schema) value))) + + Now you can implement matching. Again, note that you do as much work +as possible outside the closure in order to do it only once rather than +once per row in the table. + + (defun matching (table &rest names-and-values) + "Build a where function that matches rows with the given column values." + (let ((matchers (column-matchers (schema table) names-and-values))) + #'(lambda (row) + (every #'(lambda (matcher) (funcall matcher row)) matchers)))) + + This function is a bit of a twisty maze of closures, but it's worth +contemplating for a moment to get a flavor of the possibilities of +programming with functions as first-class objects. + + The job of matching is to return a function that will be invoked on +each row in a table to determine whether it should be included in the +new table. So, matching returns a closure with one parameter, row. + + Now recall that the function EVERY takes a predicate function as its +first argument and returns true if, and only if, that function returns +true each time it's applied to an element of the list passed as EVERY's +second argument. However, in this case, the list you pass to EVERY is +itself a list of functions, the column matchers. What you want to know +is that every column matcher, when invoked on the row you're currently +testing, returns true. So, as the predicate argument to EVERY, you pass +yet another closure that FUNCALLs the column matcher, passing it the +row. + + Another matching function that you'll occasionally find useful is in, +which returns a function that matches rows where a particular column is +in a given set of values. You'll define in to take two arguments: a +column name and a table that contains the values you want to match. For +instance, suppose you wanted to find all the songs in the MP3 database +that have names the same as a song performed by the Dixie Chicks. You +can write that where clause using in and a subselect like this: (1) + + (select + :columns '(:artist :song) + :from *mp3s* + :where (in :song + (select + :columns :song + :from *mp3s* + :where (matching *mp3s* :artist "Dixie Chicks")))) + + Although the queries are more complex, the definition of in is much +simpler than that of matching. + + (defun in (column-name table) + (let ((test (equality-predicate (find-column column-name (schema table)))) + (values (map 'list #'(lambda (r) (getf r column-name)) (rows table)))) + #'(lambda (row) + (member (getf row column-name) values :test test)))) + + ---------- Footnotes ---------- + + (1) This query will also return all the songs performed by the Dixie +Chicks. If you want to limit it to songs by artists other than the +Dixie Chicks, you need a more complex :where function. Since the :where +argument can be any function, it's certainly possible; you could remove +the Dixie Chicks' own songs with this query: + + (let* ((dixie-chicks (matching *mp3s* :artist "Dixie Chicks")) + (same-song (in :song (select :columns :song :from *mp3s* :where dixie-chicks))) + (query #'(lambda (row) (and (not (funcall dixie-chicks row)) (funcall same-song row))))) + (select :columns '(:artist :song) :from *mp3s* :where query)) + + This obviously isn't quite as convenient. If you were going to write +an application that needed to do lots of complex queries, you might want +to consider coming up with a more expressive query language. + + +File: pcl.info, Node: 27-6, Next: 27-7, Prev: 27-5, Up: Chapter 27 + +Getting at the Results +====================== + +Since select returns another table, you need to think a bit about how +you want to get at the individual row and column values in a table. If +you're sure you'll never want to change the way you represent the data +in a table, you can just make the structure of a table part of the +API-that table has a slot rows that's a vector of plists-and use all the +normal Common Lisp functions for manipulating vectors and plists to get +at the values in the table. But that representation is really an +internal detail that you might want to change. Also, you don't +necessarily want other code manipulating the data structures +directly-for instance, you don't want anyone to use SETF to put an +unnormalized column value into a row. So it might be a good idea to +define a few abstractions that provide the operations you want to +support. Then if you decide to change the internal representation +later, you'll need to change only the implementation of these functions +and macros. And while Common Lisp doesn't enable you to absolutely +prevent folks from getting at "internal" data, by providing an official +API you at least make it clear where the boundary is. + + Probably the most common thing you'll need to do with the results of +a query is to iterate over the individual rows and extract specific +column values. So you need to provide a way to do both those things +without touching the rows vector directly or using GETF to get at the +column values within a row. + + For now these operations are trivial to implement; they're merely +wrappers around the code you'd write if you didn't have these +abstractions. You can provide two ways to iterate over the rows of a +table: a macro do-rows, which provides a basic looping construct, and a +function map-rows, which builds a list containing the results of +applying a function to each row in the table. (1) + + (defmacro do-rows ((row table) &body body) + `(loop for ,row across (rows ,table) do ,@body)) + + (defun map-rows (fn table) + (loop for row across (rows table) collect (funcall fn row))) + + To get at individual column values within a row, you should provide a +function, column-value, that takes a row and a column name and returns +the appropriate value. Again, it's a trivial wrapper around the code +you'd write otherwise. But if you change the internal representation of +a table later, users of column-value needn't be any the wiser. + + (defun column-value (row column-name) + (getf row column-name)) + + While column-value is a sufficient abstraction for getting at column +values, you'll often want to get at the values of multiple columns at +once. So you can provide a bit of syntactic sugar, a macro, +with-column-values, that binds a set of variables to the values +extracted from a row using the corresponding keyword names. Thus, +instead of writing this: + + (do-rows (row table) + (let ((song (column-value row :song)) + (artist (column-value row :artist)) + (album (column-value row :album))) + (format t "~a by ~a from ~a~%" song artist album))) + + you can simply write the following: + + (do-rows (row table) + (with-column-values (song artist album) row + (format t "~a by ~a from ~a~%" song artist album))) + + Again, the actual implementation isn't complicated if you use the +once-only macro from Chapter 8. + + (defmacro with-column-values ((&rest vars) row &body body) + (once-only (row) + `(let ,(column-bindings vars row) ,@body))) + + (defun column-bindings (vars row) + (loop for v in vars collect `(,v (column-value ,row ,(as-keyword v))))) + + (defun as-keyword (symbol) + (intern (symbol-name symbol) :keyword)) + + Finally, you should provide abstractions for getting at the number of +rows in a table and for accessing a specific row by numeric index. + + (defun table-size (table) + (length (rows table))) + + (defun nth-row (n table) + (aref (rows table) n)) + + ---------- Footnotes ---------- + + (1) The version of LOOP implemented at M.I.T. before Common Lisp was +standardized included a mechanism for extending the LOOP grammar to +support iteration over new data structures. Some Common Lisp +implementations that inherited their LOOP implementation from that code +base may still support that facility, which would make do-rows and +map-rows less necessary. + + +File: pcl.info, Node: 27-7, Next: Chapter 28, Prev: 27-6, Up: Chapter 27 + +Other Database Operations +========================= + +Finally, you'll implement a few other database operations that you'll +need in Chapter 29. The first two are analogs of the SQL DELETE +statement. The function delete-rows is used to delete rows from a table +that match particular criteria. Like select, it takes :from and :where +keyword arguments. Unlike select, it doesn't return a new table-it +actually modifies the table passed as the :from argument. + + (defun delete-rows (&key from where) + (loop + with rows = (rows from) + with store-idx = 0 + for read-idx from 0 + for row across rows + do (setf (aref rows read-idx) nil) + unless (funcall where row) do + (setf (aref rows store-idx) row) + (incf store-idx) + finally (setf (fill-pointer rows) store-idx))) + + In the interest of efficiency, you might want to provide a separate +function for deleting all the rows from a table. + + (defun delete-all-rows (table) + (setf (rows table) (make-rows *default-table-size*))) + + The remaining table operations don't really map to normal relational +database operations but will be useful in the MP3 browser application. +The first is a function to sort the rows of a table in place. + + (defun sort-rows (table &rest column-names) + (setf (rows table) (sort (rows table) (row-comparator column-names (schema table)))) + table) + + On the flip side, in the MP3 browser application, you'll need a +function that shuffles a table's rows in place using the function +nshuffle-vector from Chapter 23. + + (defun shuffle-table (table) + (nshuffle-vector (rows table)) + table) + + And finally, again for the purposes of the MP3 browser, you should +provide a function that selects n random rows, returning the results as +a new table. It also uses nshuffle-vector along with a version of +random-sample based on Algorithm S from Donald Knuth's The Art of +Computer Programming, Volume 2: Seminumerical Algorithms, Third Edition +(Addison-Wesley, 1998) that I discussed in Chapter 20. + + (defun random-selection (table n) + (make-instance + 'table + :schema (schema table) + :rows (nshuffle-vector (random-sample (rows table) n)))) + + (defun random-sample (vector n) + "Based on Algorithm S from Knuth. TAOCP, vol. 2. p. 142" + (loop with selected = (make-array n :fill-pointer 0) + for idx from 0 + do + (loop + with to-select = (- n (length selected)) + for remaining = (- (length vector) idx) + while (>= (* remaining (random 1.0)) to-select) + do (incf idx)) + (vector-push (aref vector idx) selected) + when (= (length selected) n) return selected)) + + With this code you'll be ready, in Chapter 29, to build a Web +interface for browsing a collection of MP3 files. But before you get to +that, you need to implement the part of the server that streams MP3s +using the Shoutcast protocol, which is the topic of the next chapter. + + +File: pcl.info, Node: Chapter 28, Next: Chapter 29, Prev: Chapter 27, Up: Top + +28. Practical: A Shoutcast Server +================================= + +In this chapter you'll develop another important part of what will +eventually be a Web-based application for streaming MP3s, namely, the +server that implements the Shoutcast protocol for actually streaming +MP3s to clients such as iTunes, XMMS, (1) or Winamp. + +* Menu: + +* 28-1:: The Shoutcast Protocol +* 28-2:: Song Sources +* 28-3:: Implementing Shoutcast + + ---------- Footnotes ---------- + + (1) The version of XMMS shipped with Red Hat 8.0 and 9.0 and Fedora +no longer knows how to play MP3s because the folks at Red Hat were +worried about the licensing issues related to the MP3 codec. To get an +XMMS with MP3 support on these versions of Linux, you can grab the +source from http://www.xmms.org and build it yourself. Or, see +http://www.fedorafaq.org/#xmms-mp3 for information about other +possibilities. + + +File: pcl.info, Node: 28-1, Next: 28-2, Prev: Chapter 28, Up: Chapter 28 + +The Shoutcast Protocol +====================== + +The Shoutcast protocol was invented by the folks at Nullsoft, the makers +of the Winamp MP3 software. It was designed to support Internet audio +broadcasting-Shoutcast DJs send audio data from their personal computers +to a central Shoutcast server that then turns around and streams it out +to any connected listeners. + + The server you'll build is actually only half a true Shoutcast +server-you'll use the protocol that Shoutcast servers use to stream MP3s +to listeners, but your server will be able to serve only songs already +stored on the file system of the computer where the server is running. + + You need to worry about only two parts of the Shoutcast protocol: the +request that a client makes in order to start receiving a stream and the +format of the response, including the mechanism by which metadata about +what song is currently playing is embedded in the stream. + + The initial request from the MP3 client to the Shoutcast server is +formatted as a normal HTTP request. In response, the Shoutcast server +sends an ICY response that looks like an HTTP response except with the +string "ICY" (1) in place of the normal HTTP version string and with +different headers. After sending the headers and a blank line, the +server streams a potentially endless amount of MP3 data. + + The only tricky thing about the Shoutcast protocol is the way +metadata about the songs being streamed is embedded in the data sent to +the client. The problem facing the Shoutcast designers was to provide a +way for the Shoutcast server to communicate new title information to the +client each time it started playing a new song so the client could +display it in its UI. (Recall from Chapter 25 that the MP3 format +doesn't make any provision for encoding metadata.) While one of the +design goals of ID3v2 had been to make it better suited for use when +streaming MP3s, the Nullsoft folks decided to go their own route and +invent a new scheme that's fairly easy to implement on both the client +side and the server side. That, of course, was ideal for them since +they were also the authors of their own MP3 client. + + Their scheme was to simply ignore the structure of MP3 data and embed +a chunk of self-delimiting metadata every n bytes. The client would +then be responsible for stripping out this metadata so it wasn't treated +as MP3 data. Since metadata sent to a client that isn't ready for it +will cause glitches in the sound, the server is supposed to send +metadata only if the client's original request contains a special +Icy-Metadata header. And in order for the client to know how often to +expect metadata, the server must send back a header Icy-Metaint whose +value is the number of bytes of MP3 data that will be sent between each +chunk of metadata. + + The basic content of the metadata is a string of the form +"StreamTitle='title';" where title is the title of the current song and +can't contain single quote marks. This payload is encoded as a +length-delimited array of bytes: a single byte is sent indicating how +many 16-byte blocks follow, and then that many blocks are sent. They +contain the string payload as an ASCII string, with the final block +padded out with null bytes as necessary. + + Thus, the smallest legal metadata chunk is a single byte, zero, +indicating zero subsequent blocks. If the server doesn't need to update +the metadata, it can send such an empty chunk, but it must send at least +the one byte so the client doesn't throw away actual MP3 data. + + ---------- Footnotes ---------- + + (1) To further confuse matters, there's a different streaming +protocol called Icecast. There seems to be no connection between the +ICY header used by Shoutcast and the Icecast protocol. + + +File: pcl.info, Node: 28-2, Next: 28-3, Prev: 28-1, Up: Chapter 28 + +Song Sources +============ + +Because a Shoutcast server has to keep streaming songs to the client for +as long as it's connected, you need to provide your server with a source +of songs to draw on. In the Web-based application, each connected +client will have a playlist that can be manipulated via the Web +interface. But in the interest of avoiding excessive coupling, you +should define an interface that the Shoutcast server can use to obtain +songs to play. You can write a simple implementation of this interface +now and then a more complex one as part of the Web application you'll +build in Chapter 29. + +* Menu: + +* 28-2-1:: The Package + + The idea behind the interface is that the Shoutcast server will find +a source of songs based on an ID extracted from the AllegroServe request +object. It can then do three things with the song source it's given. + + * Get the current song from the source + * Tell the song source that it's done with the current song + * Ask the source whether the song it was given earlier is still the + current song + + The last operation is necessary because there may be ways-and will be +in Chapter 29-to manipulate the songs source outside the Shoutcast +server. You can express the operations the Shoutcast server needs with +the following generic functions: + + (defgeneric current-song (source) + (:documentation "Return the currently playing song or NIL.")) + + (defgeneric maybe-move-to-next-song (song source) + (:documentation + "If the given song is still the current one update the value + returned by current-song.")) + + (defgeneric still-current-p (song source) + (:documentation + "Return true if the song given is the same as the current-song.")) + + The function maybe-move-to-next-song is defined the way it is so a +single operation checks whether the song is current and, if it is, moves +the song source to the next song. This will be important in the next +chapter when you need to implement a song source that can be safely +manipulated from two different threads. (1) + + To represent the information about a song that the Shoutcast server +needs, you can define a class, song, with slots to hold the name of the +MP3 file, the title to send in the Shoutcast metadata, and the size of +the ID3 tag so you can skip it when serving up the file. + + (defclass song () + ((file :reader file :initarg :file) + (title :reader title :initarg :title) + (id3-size :reader id3-size :initarg :id3-size))) + + The value returned by current-song (and thus the first argument to +still-current-p and maybe-move-to-next-song) will be an instance of +song. + + In addition, you need to define a generic function that the server +can use to find a song source based on the type of source desired and +the request object. Methods will specialize the type parameter in order +to return different kinds of song source and will pull whatever +information they need from the request object to determine which source +to return. + + (defgeneric find-song-source (type request) + (:documentation "Find the song-source of the given type for the given request.")) + + However, for the purposes of this chapter, you can use a trivial +implementation of this interface that always uses the same object, a +simple queue of song objects that you can manipulate from the REPL. You +can start by defining a class, simple-song-queue, and a global variable, +*songs*, that holds an instance of this class. + + (defclass simple-song-queue () + ((songs :accessor songs :initform (make-array 10 :adjustable t :fill-pointer 0)) + (index :accessor index :initform 0))) + + (defparameter *songs* (make-instance 'simple-song-queue)) + + Then you can define a method on find-song-source that specializes +type with an EQL specializer on the symbol singleton and returns the +instance stored in *songs*. + + (defmethod find-song-source ((type (eql 'singleton)) request) + (declare (ignore request)) + *songs*) + + Now you just need to implement methods on the three generic functions +that the Shoutcast server will use. + + (defmethod current-song ((source simple-song-queue)) + (when (array-in-bounds-p (songs source) (index source)) + (aref (songs source) (index source)))) + + (defmethod still-current-p (song (source simple-song-queue)) + (eql song (current-song source))) + + (defmethod maybe-move-to-next-song (song (source simple-song-queue)) + (when (still-current-p song source) + (incf (index source)))) + + And for testing purposes you should provide a way to add songs to +this queue. + + (defun add-file-to-songs (file) + (vector-push-extend (file->song file) (songs *songs*))) + + (defun file->song (file) + (let ((id3 (read-id3 file))) + (make-instance + 'song + :file (namestring (truename file)) + :title (format nil "~a by ~a from ~a" (song id3) (artist id3) (album id3)) + :id3-size (size id3)))) + + ---------- Footnotes ---------- + + (1) Technically, the implementation in this chapter will also be +manipulated from two threads-the AllegroServe thread running the +Shoutcast server and the REPL thread. But you can live with the race +condition for now. I'll discuss how to use locking to make code thread +safe in the next chapter. + + +File: pcl.info, Node: 28-2-1, Next: 28-2, Prev: 28-2, Up: 28-2 + +The Package +=========== + +The package for the code you'll develop in this chapter looks like this: + + (defpackage :com.gigamonkeys.shoutcast + (:use :common-lisp + :net.aserve + :com.gigamonkeys.id3v2) + (:export :song + :file + :title + :id3-size + :find-song-source + :current-song + :still-current-p + :maybe-move-to-next-song + :*song-source-type*)) + + +File: pcl.info, Node: 28-3, Next: Chapter 29, Prev: 28-2, Up: Chapter 28 + +Implementing Shoutcast +====================== + +Now you're ready to implement the Shoutcast server. Since the Shoutcast +protocol is loosely based on HTTP, you can implement the server as a +function within AllegroServe. However, since you need to interact with +some of the low-level features of AllegroServe, you can't use the +define-url-function macro from Chapter 26. Instead, you need to write a +regular function that looks like this: + + (defun shoutcast (request entity) + (with-http-response + (request entity :content-type "audio/MP3" :timeout *timeout-seconds*) + (prepare-icy-response request *metadata-interval*) + (let ((wants-metadata-p (header-slot-value request :icy-metadata))) + (with-http-body (request entity) + (play-songs + (request-socket request) + (find-song-source *song-source-type* request) + (if wants-metadata-p *metadata-interval*)))))) + + Then publish that function under the path /stream.mp3 like this: (1) + + (publish :path "/stream.mp3" :function 'shoutcast) + + In the call to with-http-response, in addition to the usual request +and entity arguments, you need to pass :content-type and :timeout +arguments. The :content-type argument tells AllegroServe how to set the +Content-Type header it sends. And the :timeout argument specifies the +number of seconds AllegroServe gives the function to generate its +response. By default AllegroServe times out each request after five +minutes. Because you're going to stream an essentially endless sequence +of MP3s, you need much more time. There's no way to tell AllegroServe +to never time out the request, so you should set it to the value of +*timeout-seconds*, which you can define to some suitably large value +such as the number of seconds in ten years. + + (defparameter *timeout-seconds* (* 60 60 24 7 52 10)) + + Then, within the body of the with-http-response and before the call +to with-http-body that will cause the response headers to be sent, you +need to manipulate the reply that AllegroServe will send. The function +prepare-icy-response encapsulates the necessary manipulations: changing +the protocol string from the default of "HTTP" to "ICY" and adding the +Shoutcast-specific headers. (2) You also need, in order to work around +a bug in iTunes, to tell AllegroServe not to use chunked +transfer-encoding. (3) The functions request-reply-protocol-string, +request-uri, and reply-header-slot-value are all part of AllegroServe. + + (defun prepare-icy-response (request metadata-interval) + (setf (request-reply-protocol-string request) "ICY") + (loop for (k v) in (reverse + `((:|icy-metaint| ,(princ-to-string metadata-interval)) + (:|icy-notice1| "
This stream blah blah blah
") + (:|icy-notice2| "More blah") + (:|icy-name| "MyLispShoutcastServer") + (:|icy-genre| "Unknown") + (:|icy-url| ,(request-uri request)) + (:|icy-pub| "1"))) + do (setf (reply-header-slot-value request k) v)) + ;; iTunes, despite claiming to speak HTTP/1.1, doesn't understand + ;; chunked Transfer-encoding. Grrr. So we just turn it off. + (turn-off-chunked-transfer-encoding request)) + + (defun turn-off-chunked-transfer-encoding (request) + (setf (request-reply-strategy request) + (remove :chunked (request-reply-strategy request)))) + + Within the with-http-body of shoutcast, you actually stream the MP3 +data. The function play-songs takes the stream to which it should write +the data, the song source, and the metadata interval it should use or +NIL if the client doesn't want metadata. The stream is the socket +obtained from the request object, the song source is obtained by calling +find-song-source, and the metadata interval comes from the global +variable *metadata-interval*. The type of song source is controlled by +the variable *song-source-type*, which for now you can set to singleton +in order to use the simple-song-queue you implemented previously. + + (defparameter *metadata-interval* (expt 2 12)) + + (defparameter *song-source-type* 'singleton) + + The function play-songs itself doesn't do much-it loops calling the +function play-current, which does all the heavy lifting of sending the +contents of a single MP3 file, skipping the ID3 tag and embedding ICY +metadata. The only wrinkle is that you need to keep track of when to +send the metadata. + + Since you must send metadata chunks at a fixed intervals, regardless +of when you happen to switch from one MP3 file to the next, each time +you call play-current you need to tell it when the next metadata is due, +and when it returns, it must tell you the same thing so you can pass the +information to the next call to play-current. If play-current gets NIL +from the song source, it returns NIL, which allows the play-songs LOOP +to end. + + In addition to handling the looping, play-songs also provides a +HANDLER-CASE to trap the error that will be signaled when the MP3 client +disconnects from the server and one of the writes to the socket, down in +play-current, fails. Since the HANDLER-CASE is outside the LOOP, +handling the error will break out of the loop, allowing play-songs to +return. + + (defun play-songs (stream song-source metadata-interval) + (handler-case + (loop + for next-metadata = metadata-interval + then (play-current + stream + song-source + next-metadata + metadata-interval) + while next-metadata) + (error (e) (format *trace-output* "Caught error in play-songs: ~a" e)))) + + Finally, you're ready to implement play-current, which actually sends +the Shoutcast data. The basic idea is that you get the current song +from the song source, open the song's file, and then loop reading data +from the file and writing it to the socket until either you reach the +end of the file or the current song is no longer the current song. + + There are only two complications: One is that you need to make sure +you send the metadata at the correct interval. The other is that if the +file starts with an ID3 tag, you want to skip it. If you don't worry +too much about I/O efficiency, you can implement play-current like this: + + (defun play-current (out song-source next-metadata metadata-interval) + (let ((song (current-song song-source))) + (when song + (let ((metadata (make-icy-metadata (title song)))) + (with-open-file (mp3 (file song)) + (unless (file-position mp3 (id3-size song)) + (error "Can't skip to position ~d in ~a" (id3-size song) (file song))) + (loop for byte = (read-byte mp3 nil nil) + while (and byte (still-current-p song song-source)) do + (write-byte byte out) + (decf next-metadata) + when (and (zerop next-metadata) metadata-interval) do + (write-sequence metadata out) + (setf next-metadata metadata-interval)) + + (maybe-move-to-next-song song song-source))) + next-metadata))) + + This function gets the current song from the song source and gets a +buffer containing the metadata it'll need to send by passing the title +to make-icy-metadata. Then it opens the file and skips past the ID3 tag +using the two-argument form of FILE-POSITION. Then it commences reading +bytes from the file and writing them to the request stream. (4) + + It'll break out of the loop either when it reaches the end of the +file or when the song source's current song changes out from under it. +In the meantime, whenever next-metadata gets to zero (if you're supposed +to send metadata at all), it writes metadata to the stream and resets +next-metadata. Once it finishes the loop, it checks to see if the song +is still the song source's current song; if it is, that means it broke +out of the loop because it read the whole file, in which case it tells +the song source to move to the next song. Otherwise, it broke out of +the loop because someone changed the current song out from under it, and +it just returns. In either case, it returns the number of bytes left +before the next metadata is due so it can be passed in the next call to +play-current. (5) + + The function make-icy-metadata, which takes the title of the current +song and generates an array of bytes containing a properly formatted +chunk of ICY metadata, is also straightforward. (6) + + (defun make-icy-metadata (title) + (let* ((text (format nil "StreamTitle='~a';" (substitute #\Space #\' title))) + (blocks (ceiling (length text) 16)) + (buffer (make-array (1+ (* blocks 16)) + :element-type '(unsigned-byte 8) + :initial-element 0))) + (setf (aref buffer 0) blocks) + (loop + for char across text + for i from 1 + do (setf (aref buffer i) (char-code char))) + buffer)) + + Depending on how your particular Lisp implementation handles its +streams, and also how many MP3 clients you want to serve at once, the +simple version of play-current may or may not be efficient enough. + + The potential problem with the simple implementation is that you have +to call READ-BYTE and WRITE-BYTE for every byte you transfer. It's +possible that each call may result in a relatively expensive system call +to read or write one byte. And even if Lisp implements its own streams +with internal buffering so not every call to READ-BYTE or WRITE-BYTE +results in a system call, function calls still aren't free. In +particular, in implementations that provide user-extensible streams +using so-called Gray Streams, READ-BYTE and WRITE-BYTE may result in a +generic function call under the covers to dispatch on the class of the +stream argument. While generic function dispatch is normally speedy +enough that you don't have to worry about it, it's a bit more expensive +than a nongeneric function call and thus not something you necessarily +want to do several million times in a few minutes if you can avoid it. + + A more efficient, if slightly more complex, way to implement +play-current is to read and write multiple bytes at a time using the +functions READ-SEQUENCE and WRITE-SEQUENCE. This also gives you a chance +to match your file reads with the natural block size of the file system, +which will likely give you the best disk throughput. Of course, no +matter what buffer size you use, keeping track of when to send the +metadata becomes a bit more complicated. A more efficient version of +play-current that uses READ-SEQUENCE and WRITE-SEQUENCE might look like +this: + + (defun play-current (out song-source next-metadata metadata-interval) + (let ((song (current-song song-source))) + (when song + (let ((metadata (make-icy-metadata (title song))) + (buffer (make-array size :element-type '(unsigned-byte 8)))) + (with-open-file (mp3 (file song)) + (labels ((write-buffer (start end) + (if metadata-interval + (write-buffer-with-metadata start end) + (write-sequence buffer out :start start :end end))) + + (write-buffer-with-metadata (start end) + (cond + ((> next-metadata (- end start)) + (write-sequence buffer out :start start :end end) + (decf next-metadata (- end start))) + (t + (let ((middle (+ start next-metadata))) + (write-sequence buffer out :start start :end middle) + (write-sequence metadata out) + (setf next-metadata metadata-interval) + (write-buffer-with-metadata middle end)))))) + + (multiple-value-bind (skip-blocks skip-bytes) + (floor (id3-size song) (length buffer)) + + (unless (file-position mp3 (* skip-blocks (length buffer))) + (error "Couldn't skip over ~d ~d byte blocks." + skip-blocks (length buffer))) + + (loop for end = (read-sequence buffer mp3) + for start = skip-bytes then 0 + do (write-buffer start end) + while (and (= end (length buffer)) + (still-current-p song song-source))) + + (maybe-move-to-next-song song song-source))))) + next-metadata))) + + Now you're ready to put all the pieces together. In the next chapter +you'll write a Web interface to the Shoutcast server developed in this +chapter, using the MP3 database from Chapter 27 as the source of songs. + + ---------- Footnotes ---------- + + (1) Another thing you may want to do while working on this code is to +evaluate the form (net.aserve::debug-on :notrap). This tells +AllegroServe to not trap errors signaled by your code, which will allow +you to debug them in the normal Lisp debugger. In SLIME this will pop +up a SLIME debugger buffer just like any other error. + + (2) Shoutcast headers are usually sent in lowercase, so you need to +escape the names of the keyword symbols used to identify them to +AllegroServe to keep the Lisp reader from converting them to all +uppercase. Thus, you'd write :|icy-metaint| rather than :icy-metaint. +You could also write :\i\c\y-\m\e\t\a\i\n\t, but that'd be silly. + + (3) The function turn-off-chunked-transfer-encoding is a bit of a +kludge. There's no way to turn off chunked transfer encoding via +AllegroServe's official APIs without specifying a content length because +any client that advertises itself as an HTTP/1.1 client, which iTunes +does, is supposed to understand it. But this does the trick. + + (4) Most MP3-playing software will display the metadata somewhere in +the user interface. However, the XMMS program on Linux by default +doesn't. To get XMMS to display Shoutcast metadata, press Ctrl+P to see +the Preferences pane. Then in the Audio I/O Plugins tab (the leftmost +tab in version 1.2.10), select the MPEG Layer 1/2/3 Player +(libmpg123.so) and hit the Configure button. Then select the Streaming +tab on the configuration window, and at the bottom of the tab in the +SHOUTCAST/Icecast section, check the "Enable SHOUTCAST/Icecast title +streaming" box. + + (5) Folks coming to Common Lisp from Scheme might wonder why +play-current can't just call itself recursively. In Scheme that would +work fine since Scheme implementations are required by the Scheme +specification to support "an unbounded number of active tail calls." +Common Lisp implementations are allowed to have this property, but it +isn't required by the language standard. Thus, in Common Lisp the +idiomatic way to write loops is with a looping construct, not with +recursion. + + (6) This function assumes, as has other code you've written, that +your Lisp implementation's internal character encoding is ASCII or a +superset of ASCII, so you can use CHAR-CODE to translate Lisp CHARACTER +objects to bytes of ASCII data. + + +File: pcl.info, Node: Chapter 29, Next: Chapter 30, Prev: Chapter 28, Up: Top + +29. Practical: An MP3 Browser +============================= + +The final step in building the MP3 streaming application is to provide a +Web interface that allows a user to find the songs they want to listen +to and add them to a playlist that the Shoutcast server will draw upon +when the user's MP3 client requests the stream URL. For this component +of the application, you'll pull together several bits of code from the +previous few chapters: the MP3 database, the define-url-function macro +from Chapter 26, and, of course, the Shoutcast server itself. + +* Menu: + +* 29-1:: Playlists +* 29-2:: Playlists As Song Sources +* 29-3:: Manipulating the Playlist +* 29-4:: Query Parameter Types +* 29-5:: Boilerplate HTML +* 29-6:: The Browse Page +* 29-7:: The Playlist +* 29-8:: Finding a Playlist +* 29-9:: Running the App + + +File: pcl.info, Node: 29-1, Next: 29-2, Prev: Chapter 29, Up: Chapter 29 + +Playlists +========= + +The basic idea behind the interface will be that each MP3 client that +connects to the Shoutcast server gets its own playlist, which serves as +the source of songs for the Shoutcast server. A playlist will also +provide facilities beyond those needed by the Shoutcast server: through +the Web interface the user will be able to add songs to the playlist, +delete songs already in the playlist, and reorder the playlist by +sorting and shuffling. + + You can define a class to represent playlists like this: + + (defclass playlist () + ((id :accessor id :initarg :id) + (songs-table :accessor songs-table :initform (make-playlist-table)) + (current-song :accessor current-song :initform *empty-playlist-song*) + (current-idx :accessor current-idx :initform 0) + (ordering :accessor ordering :initform :album) + (shuffle :accessor shuffle :initform :none) + (repeat :accessor repeat :initform :none) + (user-agent :accessor user-agent :initform "Unknown") + (lock :reader lock :initform (make-process-lock)))) + + The id of a playlist is the key you extract from the request object +passed to find-song-source when looking up a playlist. You don't +actually need to store it in the playlist object, but it makes debugging +a bit easier if you can find out from an arbitrary playlist object what +its id is. + + The heart of the playlist is the songs-table slot, which will hold a +table object. The schema for this table will be the same as for the +main MP3 database. The function make-playlist-table, which you use to +initialize songs-table, is simply this: + + (defun make-playlist-table () + (make-instance 'table :schema *mp3-schema*)) + +* Menu: + +* 29-1-1:: The Package + + By storing the list of songs as a table, you can use the database +functions from Chapter 27 to manipulate the playlist: you can add to the +playlist with insert-row, delete songs with delete-rows, and reorder the +playlist with sort-rows and shuffle-table. + + The current-song and current-idx slots keep track of which song is +playing: current-song is an actual song object, while current-idx is the +index into the songs-table of the row representing the current song. +You'll see in the section "Manipulating the Playlist" how to make sure +current-song is updated whenever current-idx changes. + + The ordering and shuffle slots hold information about how the songs +in songs-table are to be ordered. The ordering slot holds a keyword +that tells how the songs-table should be sorted when it's not shuffled. +The legal values are :genre, :artist, :album, and :song. The shuffle +slot holds one of the keywords :none, :song, or :album, which specifies +how songs-table should be shuffled, if at all. + + The repeat slot also holds a keyword, one of :none, :song, or :all, +which specifies the repeat mode for the playlist. If repeat is :none, +after the last song in the songs-table has been played, the current-song +goes back to a default MP3. When :repeat is :song, the playlist keeps +returning the same current-song forever. And if it's :all, after the +last song, current-song goes back to the first song. + + The user-agent slot holds the value of the User-Agent header sent by +the MP3 client in its request for the stream. You need to hold onto +this value purely for use in the Web interface-the User-Agent header +identifies the program that made the request, so you can display the +value on the page that lists all the playlists to make it easier to tell +which playlist goes with which connection when multiple clients connect. + + Finally, the lock slot holds a process lock created with the function +make-process-lock, which is part of Allegro's MULTIPROCESSING package. +You'll need to use that lock in certain functions that manipulate +playlist objects to ensure that only one thread at a time manipulates a +given playlist object. You can define the following macro, built upon +the with-process-lock macro from MULTIPROCESSING, to give an easy way to +wrap a body of code that should be performed while holding a playlist's +lock: + + (defmacro with-playlist-locked ((playlist) &body body) + `(with-process-lock ((lock ,playlist)) + ,@body)) + + The with-process-lock macro acquires exclusive access to the process +lock given and then executes the body forms, releasing the lock +afterward. By default, with-process-lock allows recursive locks, +meaning the same thread can safely acquire the same lock multiple times. + + +File: pcl.info, Node: 29-1-1, Next: 29-1, Prev: 29-1, Up: 29-1 + +The Package +=========== + +You can define the package for the code in this chapter with the +following DEFPACKAGE: + + (defpackage :com.gigamonkeys.mp3-browser + (:use :common-lisp + :net.aserve + :com.gigamonkeys.html + :com.gigamonkeys.shoutcast + :com.gigamonkeys.url-function + :com.gigamonkeys.mp3-database + :com.gigamonkeys.id3v2) + (:import-from :acl-socket + :ipaddr-to-dotted + :remote-host) + (:import-from :multiprocessing + :make-process-lock + :with-process-lock) + (:export :start-mp3-browser)) + + Because this is a high-level application, it uses a lot of +lower-level packages. It also imports three symbols from the ACL-SOCKET +package and two more from MULTIPROCESSING since it just needs those five +and not the other 139 symbols those two packages export. + + +File: pcl.info, Node: 29-2, Next: 29-3, Prev: 29-1, Up: Chapter 29 + +Playlists As Song Sources +========================= + +To use playlists as a source of songs for the Shoutcast server, you'll +need to implement a method on the generic function find-song-source from +Chapter 28. Since you're going to have multiple playlists, you need a +way to find the right one for each client that connects to the server. +The mapping part is easy-you can define a variable that holds an EQUAL +hash table that you can use to map from some identifier to the playlist +object. + + (defvar *playlists* (make-hash-table :test #'equal)) + + You'll also need to define a process lock to protect access to this +hash table like this: + + (defparameter *playlists-lock* (make-process-lock :name "playlists-lock")) + + Then define a function that looks up a playlist given an ID, creating +a new playlist object if necessary and using with-process-lock to ensure +that only one thread at a time manipulates the hash table. (1) + + (defun lookup-playlist (id) + (with-process-lock (*playlists-lock*) + (or (gethash id *playlists*) + (setf (gethash id *playlists*) (make-instance 'playlist :id id))))) + + Then you can implement find-song-source on top of that function and +another, playlist-id, that takes an AllegroServe request object and +returns the appropriate playlist identifier. The find-song-source +function is also where you grab the User-Agent string out of the request +object and stash it in the playlist object. + + (defmethod find-song-source ((type (eql 'playlist)) request) + (let ((playlist (lookup-playlist (playlist-id request)))) + (with-playlist-locked (playlist) + (let ((user-agent (header-slot-value request :user-agent))) + (when user-agent (setf (user-agent playlist) user-agent)))) + playlist)) + + The trick, then, is how you implement playlist-id, the function that +extracts the identifier from the request object. You have a couple +options, each with different implications for the user interface. You +can pull whatever information you want out of the request object, but +however you decide to identify the client, you need some way for the +user of the Web interface to get hooked up to the right playlist. + + For now you can take an approach that "just works" as long as there's +only one MP3 client per machine connecting to the server and as long as +the user is browsing the Web interface from the machine running the MP3 +client: you'll use the IP address of the client machine as the +identifier. This way you can find the right playlist for a request +regardless of whether the request is from the MP3 client or a Web +browser. You will, however, provide a way in the Web interface to +select a different playlist from the browser, so the only real +constraint this choice puts on the application is that there can be only +one connected MP3 client per client IP address. (2) The implementation +of playlist-id looks like this: + + (defun playlist-id (request) + (ipaddr-to-dotted (remote-host (request-socket request)))) + + The function request-socket is part of AllegroServe, while +remote-host and ipaddr-to-dotted are part of Allegro's socket library. + + To make a playlist usable as a song source by the Shoutcast server, +you need to define methods on current-song, still-current-p, and +maybe-move-to-next-song that specialize their source parameter on +playlist. The current-song method is already taken care of: by defining +the accessor current-song on the eponymous slot, you automatically got a +current-song method specialized on playlist that returns the value of +that slot. However, to make accesses to the playlist thread safe, you +need to lock the playlist before accessing the current-song slot. In +this case, the easiest way is to define an :around method like the +following: + + (defmethod current-song :around ((playlist playlist)) + (with-playlist-locked (playlist) (call-next-method))) + + Implementing still-current-p is also quite simple, assuming you can +be sure that current-song gets updated with a new song object only when +the current song actually changes. Again, you need to acquire the +process lock to ensure you get a consistent view of the playlist's +state. + + (defmethod still-current-p (song (playlist playlist)) + (with-playlist-locked (playlist) + (eql song (current-song playlist)))) + + The trick, then, is to make sure the current-song slot gets updated +at the right times. However, the current song can change in a number of +ways. The obvious one is when the Shoutcast server calls +maybe-move-to-next-song. But it can also change when songs are added to +the playlist, when the Shoutcast server has run out of songs, or even if +the playlist's repeat mode is changed. + + Rather than trying to write code specific to every situation to +determine whether to update current-song, you can define a function, +update-current-if-necessary, that updates current-song if the song +object in current-song no longer matches the file that the current-idx +slot says should be playing. Then, if you call this function after any +manipulation of the playlist that could possibly put those two slots out +of sync, you're sure to keep current-song set properly. Here are +update-current-if-necessary and its helper functions: + + (defun update-current-if-necessary (playlist) + (unless (equal (file (current-song playlist)) + (file-for-current-idx playlist)) + (reset-current-song playlist))) + + (defun file-for-current-idx (playlist) + (if (at-end-p playlist) + nil + (column-value (nth-row (current-idx playlist) (songs-table playlist)) :file))) + + (defun at-end-p (playlist) + (>= (current-idx playlist) (table-size (songs-table playlist)))) + + You don't need to add locking to these functions since they'll be +called only from functions that will take care of locking the playlist +first. + + The function reset-current-song introduces one more wrinkle: because +you want the playlist to provide an endless stream of MP3s to the +client, you don't want to ever set current-song to NIL. Instead, when a +playlist runs out of songs to play-when songs-table is empty or after +the last song has been played and repeat is set to :none-then you need +to set current-song to a special song whose file is an MP3 of silence +(3) and whose title explains why no music is playing. Here's some code +to define two parameters, *empty-playlist-song* and +*end-of-playlist-song*, each set to a song with the file named by +*silence-mp3* as their file and an appropriate title: + + (defparameter *silence-mp3* ...) + + (defun make-silent-song (title &optional (file *silence-mp3*)) + (make-instance + 'song + :file file + :title title + :id3-size (if (id3-p file) (size (read-id3 file)) 0))) + + (defparameter *empty-playlist-song* (make-silent-song "Playlist empty.")) + + (defparameter *end-of-playlist-song* (make-silent-song "At end of playlist.")) + + reset-current-song uses these parameters when the current-idx doesn't +point to a row in songs-table. Otherwise, it sets current-song to a +song object representing the current row. + + (defun reset-current-song (playlist) + (setf + (current-song playlist) + (cond + ((empty-p playlist) *empty-playlist-song*) + ((at-end-p playlist) *end-of-playlist-song*) + (t (row->song (nth-row (current-idx playlist) (songs-table playlist))))))) + + (defun row->song (song-db-entry) + (with-column-values (file song artist album id3-size) song-db-entry + (make-instance + 'song + :file file + :title (format nil "~a by ~a from ~a" song artist album) + :id3-size id3-size))) + + (defun empty-p (playlist) + (zerop (table-size (songs-table playlist)))) + + Now, at last, you can implement the method on maybe-move-to-next-song +that moves current-idx to its next value, based on the playlist's repeat +mode, and then calls update-current-if-necessary. You don't change +current-idx when it's already at the end of the playlist because you +want it to keep its current value, so it'll point at the next song you +add to the playlist. This function must lock the playlist before +manipulating it since it's called by the Shoutcast server code, which +doesn't do any locking. + + (defmethod maybe-move-to-next-song (song (playlist playlist)) + (with-playlist-locked (playlist) + (when (still-current-p song playlist) + (unless (at-end-p playlist) + (ecase (repeat playlist) + (:song) ; nothing changes + (:none (incf (current-idx playlist))) + (:all (setf (current-idx playlist) + (mod (1+ (current-idx playlist)) + (table-size (songs-table playlist))))))) + (update-current-if-necessary playlist)))) + + ---------- Footnotes ---------- + + (1) The intricacies of concurrent programming are beyond the scope of +this book. The basic idea is that if you have multiple threads of +control-as you will in this application with some threads running the +shoutcast function and other threads responding to requests from the +browser-then you need to make sure only one thread at a time manipulates +an object in order to prevent one thread from seeing the object in an +inconsistent state while another thread is working on it. In this +function, for instance, if two new MP3 clients are connecting at the +same time, they'd both try to add an entry to *playlists* and might +interfere with each other. The with-process-lock ensures that each +thread gets exclusive access to the hash table for long enough to do the +work it needs to do. + + (2) This approach also assumes that every client machine has a unique +IP address. This assumption should hold as long as all the users are on +the same LAN but may not hold if clients are connecting from behind a +firewall that does network address translation. Deploying this +application outside a LAN will require some modifications, but if you +want to deploy this application to the wider Internet, you'd better know +enough about networking to figure out an appropriate scheme yourself. + + (3) Unfortunately, because of licensing issues around the MP3 format, +it's not clear that it's legal for me to provide you with such an MP3 +without paying licensing fees to Fraunhofer IIS. I got mine as part of +the software that came with my Slimp3 from Slim Devices. You can grab +it from their Subversion repository via the Web at +http://svn.slimdevices.com/*checkout*/trunk/server/ +HTML/EN/html/silentpacket.mp3?rev=2. Or buy a Squeezebox, the new, +wireless version of Slimp3, and you'll get silentpacket.mp3 as part of +the software that comes with it. Or find an MP3 of John Cage's piece +4'33". + + +File: pcl.info, Node: 29-3, Next: 29-4, Prev: 29-2, Up: Chapter 29 + +Manipulating the Playlist +========================= + +The rest of the playlist code is functions used by the Web interface to +manipulate playlist objects, including adding and deleting songs, +sorting and shuffling, and setting the repeat mode. As in the helper +functions in the previous section, you don't need to worry about locking +in these functions because, as you'll see, the lock will be acquired in +the Web interface function that calls these. + + Adding and deleting is mostly a question of manipulating the +songs-table. The only extra work you have to do is to keep the +current-song and current-idx in sync. For instance, whenever the +playlist is empty, its current-idx will be zero, and the current-song +will be the *empty-playlist-song*. If you add a song to an empty +playlist, then the index of zero is now in bounds, and you should change +the current-song to the newly added song. By the same token, when +you've played all the songs in a playlist and current-song is +*end-of-playlist-song*, adding a song should cause current-song to be +reset. All this really means, though, is that you need to call +update-current-if-necessary at the appropriate points. + + Adding songs to a playlist is a bit involved because of the way the +Web interface communicates which songs to add. For reasons I'll discuss +in the next section, the Web interface code can't just give you a simple +set of criteria to use in selecting songs from the database. Instead, +it gives you the name of a column and a list of values, and you're +supposed to add all the songs from the main database where the given +column has a value in the list of values. Thus, to add the right songs, +you need to first build a table object containing the desired values, +which you can then use with an in query against the song database. So, +add-songs looks like this: + + (defun add-songs (playlist column-name values) + (let ((table (make-instance + 'table + :schema (extract-schema (list column-name) (schema *mp3s*))))) + (dolist (v values) (insert-row (list column-name v) table)) + (do-rows (row (select :from *mp3s* :where (in column-name table))) + (insert-row row (songs-table playlist)))) + (update-current-if-necessary playlist)) + + Deleting songs is a bit simpler; you just need to be able to delete +songs from the songs-table that match particular criteria-either a +particular song or all songs in a particular genre, by a particular +artist, or from a particular album. So, you can provide a delete-songs +function that takes keyword/value pairs, which are used to construct a +matching :where clause you can pass to the delete-rows database +function. + + Another complication that arises when deleting songs is that +current-idx may need to change. Assuming the current song isn't one of +the ones just deleted, you'd like it to remain the current song. But if +songs before it in songs-table are deleted, it'll be in a different +position in the table after the delete. So after a call to delete-rows, +you need to look for the row containing the current song and reset +current-idx. If the current song has itself been deleted, then, for +lack of anything better to do, you can reset current-idx to zero. After +updating current-idx, calling update-current-if-necessary will take care +of updating current-song. And if current-idx changed but still points +at the same song, current-song will be left alone. + + (defun delete-songs (playlist &rest names-and-values) + (delete-rows + :from (songs-table playlist) + :where (apply #'matching (songs-table playlist) names-and-values)) + (setf (current-idx playlist) (or (position-of-current playlist) 0)) + (update-current-if-necessary playlist)) + + (defun position-of-current (playlist) + (let* ((table (songs-table playlist)) + (matcher (matching table :file (file (current-song playlist)))) + (pos 0)) + (do-rows (row table) + (when (funcall matcher row) + (return-from position-of-current pos)) + (incf pos)))) + + You can also provide a function to completely clear the playlist, +which uses delete-all-rows and doesn't have to worry about finding the +current song since it has obviously been deleted. The call to +update-current-if-necessary will take care of setting current-song to +NIL. + + (defun clear-playlist (playlist) + (delete-all-rows (songs-table playlist)) + (setf (current-idx playlist) 0) + (update-current-if-necessary playlist)) + + Sorting and shuffling the playlist are related in that the playlist +is always either sorted or shuffled. The shuffle slot says whether the +playlist should be shuffled and if so how. If it's set to :none, then +the playlist is ordered according to the value in the ordering slot. +When shuffle is :song, the playlist will be randomly permuted. And when +it's set to :album, the list of albums is randomly permuted, but the +songs within each album are listed in track order. Thus, the +sort-playlist function, which will be called by the Web interface code +whenever the user selects a new ordering, needs to set ordering to the +desired ordering and set shuffle to :none before calling order-playlist, +which actually does the sort. As in delete-songs, you need to use +position-of-current to reset current-idx to the new location of the +current song. However, this time you don't need to call +update-current-if-necessary since you know the current song is still in +the table. + + (defun sort-playlist (playlist ordering) + (setf (ordering playlist) ordering) + (setf (shuffle playlist) :none) + (order-playlist playlist) + (setf (current-idx playlist) (position-of-current playlist))) + + In order-playlist, you can use the database function sort-rows to +actually perform the sort, passing a list of columns to sort by based on +the value of ordering. + + (defun order-playlist (playlist) + (apply #'sort-rows (songs-table playlist) + (case (ordering playlist) + (:genre '(:genre :album :track)) + (:artist '(:artist :album :track)) + (:album '(:album :track)) + (:song '(:song))))) + + The function shuffle-playlist, called by the Web interface code when +the user selects a new shuffle mode, works in a similar fashion except +it doesn't need to change the value of ordering. Thus, when +shuffle-playlist is called with a shuffle of :none, the playlist goes +back to being sorted according to the most recent ordering. Shuffling +by songs is simple-just call shuffle-table on songs-table. Shuffling by +albums is a bit more involved but still not rocket science. + + (defun shuffle-playlist (playlist shuffle) + (setf (shuffle playlist) shuffle) + (case shuffle + (:none (order-playlist playlist)) + (:song (shuffle-by-song playlist)) + (:album (shuffle-by-album playlist))) + (setf (current-idx playlist) (position-of-current playlist))) + + (defun shuffle-by-song (playlist) + (shuffle-table (songs-table playlist))) + + (defun shuffle-by-album (playlist) + (let ((new-table (make-playlist-table))) + (do-rows (album-row (shuffled-album-names playlist)) + (do-rows (song (songs-for-album playlist (column-value album-row :album))) + (insert-row song new-table))) + (setf (songs-table playlist) new-table))) + + (defun shuffled-album-names (playlist) + (shuffle-table + (select + :columns :album + :from (songs-table playlist) + :distinct t))) + + (defun songs-for-album (playlist album) + (select + :from (songs-table playlist) + :where (matching (songs-table playlist) :album album) + :order-by :track)) + + The last manipulation you need to support is setting the playlist's +repeat mode. Most of the time you don't need to take any extra action +when setting repeat-its value comes into play only in +maybe-move-to-next-song. However, you need to update the current-song +as a result of changing repeat in one situation, namely, if current-idx +is at the end of a nonempty playlist and repeat is being changed to +:song or :all. In that case, you want to continue playing, either +repeating the last song or starting at the beginning of the playlist. +So, you should define an :after method on the generic function (setf +repeat). + + (defmethod (setf repeat) :after (value (playlist playlist)) + (if (and (at-end-p playlist) (not (empty-p playlist))) + (ecase value + (:song (setf (current-idx playlist) (1- (table-size (songs-table playlist))))) + (:none) + (:all (setf (current-idx playlist) 0))) + (update-current-if-necessary playlist))) + + Now you have all the underlying bits you need. All that remains is +the code that will provide a Web-based user interface for browsing the +MP3 database and manipulating playlists. The interface will consist of +three main functions defined with define-url-function: one for browsing +the song database, one for viewing and manipulating a single playlist, +and one for listing all the available playlists. + + But before you get to writing these three functions, you need to +start with some helper functions and HTML macros that they'll use. + + +File: pcl.info, Node: 29-4, Next: 29-5, Prev: 29-3, Up: Chapter 29 + +Query Parameter Types +===================== + +Since you'll be using define-url-function, you need to define a few +methods on the string->type generic function from Chapter 28 that +define-url-function uses to convert string query parameters into Lisp +objects. In this application, you'll need methods to convert strings to +integers, keyword symbols, and a list of values. + + The first two are quite simple. + + (defmethod string->type ((type (eql 'integer)) value) + (parse-integer (or value "") :junk-allowed t)) + + (defmethod string->type ((type (eql 'keyword)) value) + (and (plusp (length value)) (intern (string-upcase value) :keyword))) + + The last string->type method is slightly more complex. For reasons +I'll get to in a moment, you'll need to generate pages that display a +form that contains a hidden field whose value is a list of strings. +Since you're responsible for generating the value in the hidden field +and for parsing it when it comes back, you can use whatever encoding is +convenient. You could use the functions WRITE-TO-STRING and +READ-FROM-STRING, which use the Lisp printer and reader to write and +read data to and from strings, except the printed representation of +strings can contain quotation marks and other characters that may cause +problems when embedded in the value attribute of an INPUT element. So, +you'll need to escape those characters somehow. Rather than trying to +come up with your own escaping scheme, you can just use base 64, an +encoding commonly used to protect binary data sent through e-mail. +AllegroServe comes with two functions, base64-encode and base64-decode, +that do the encoding and decoding for you, so all you have to do is +write a pair of functions: one that encodes a Lisp object by converting +it to a readable string with WRITE-TO-STRING and then base 64 encoding +it and, conversely, another to decode such a string by base 64 decoding +it and passing the result to READ-FROM-STRING. You'll want to wrap the +calls to WRITE-TO-STRING and READ-FROM-STRING in WITH-STANDARD-IO-SYNTAX +to make sure all the variables that affect the printer and reader are +set to their standard values. However, because you're going to be +reading data that's coming in from the network, you'll definitely want +to turn off one feature of the reader-the ability to evaluate arbitrary +Lisp code while reading! (1) You can define your own macro +with-safe-io-syntax, which wraps its body forms in +WITH-STANDARD-IO-SYNTAX wrapped around a LET that binds *READ-EVAL* to +NIL. + + (defmacro with-safe-io-syntax (&body body) + `(with-standard-io-syntax + (let ((*read-eval* nil)) + ,@body))) + + Then the encoding and decoding functions are trivial. + + (defun obj->base64 (obj) + (base64-encode (with-safe-io-syntax (write-to-string obj)))) + + (defun base64->obj (string) + (ignore-errors + (with-safe-io-syntax (read-from-string (base64-decode string))))) + + Finally, you can use these functions to define a method on +string->type that defines the conversion for the query parameter type +base64-list. + + (defmethod string->type ((type (eql 'base-64-list)) value) + (let ((obj (base64->obj value))) + (if (listp obj) obj nil))) + + ---------- Footnotes ---------- + + (1) The reader supports a bit of syntax, #., that causes the +following s-expression to be evaluated at read time. This is +occasionally useful in source code but obviously opens a big security +hole when you read untrusted data. However, you can turn off this +syntax by setting *READ-EVAL* to NIL, which will cause the reader to +signal an error if it encounters #.. + + +File: pcl.info, Node: 29-5, Next: 29-6, Prev: 29-4, Up: Chapter 29 + +Boilerplate HTML +================ + +Next you need to define some HTML macros and helper functions to make it +easy to give the different pages in the application a consistent look +and feel. You can start with an HTML macro that defines the basic +structure of a page in the application. + + (define-html-macro :mp3-browser-page ((&key title (header title)) &body body) + `(:html + (:head + (:title ,title) + (:link :rel "stylesheet" :type "text/css" :href "mp3-browser.css")) + (:body + (standard-header) + (when ,header (html (:h1 :class "title" ,header))) + ,@body + (standard-footer)))) + + You should define standard-header and standard-footer as separate +functions for two reasons. First, during development you can redefine +those functions and see the effect immediately without having to +recompile functions that use the :mp3-browser-page macro. Second, it +turns out that one of the pages you'll write later won't be defined with +:mp3-browser-page but will still need the standard header and footers. +They look like this: + + (defparameter *r* 25) + + (defun standard-header () + (html + ((:p :class "toolbar") + "[" (:a :href (link "/browse" :what "genre") "All genres") "] " + "[" (:a :href (link "/browse" :what "genre" :random *r*) "Random genres") "] " + "[" (:a :href (link "/browse" :what "artist") "All artists") "] " + "[" (:a :href (link "/browse" :what "artist" :random *r*) "Random artists") "] " + "[" (:a :href (link "/browse" :what "album") "All albums") "] " + "[" (:a :href (link "/browse" :what "album" :random *r*) "Random albums") "] " + "[" (:a :href (link "/browse" :what "song" :random *r*) "Random songs") "] " + "[" (:a :href (link "/playlist") "Playlist") "] " + "[" (:a :href (link "/all-playlists") "All playlists") "]"))) + + (defun standard-footer () + (html (:hr) ((:p :class "footer") "MP3 Browser v" *major-version* "." *minor-version*))) + + A couple of smaller HTML macros and helper functions automate other +common patterns. The :table-row HTML macro makes it easier to generate +the HTML for a single row of a table. It uses a feature of FOO that +I'll discuss in Chapter 31, an &attributes parameter, which causes uses +of the macro to be parsed just like normal s-expression HTML forms, with +any attributes gathered into a list that will be bound to the +&attributes parameter. It looks like this: + + (define-html-macro :table-row (&attributes attrs &rest values) + `(:tr ,@attrs ,@(loop for v in values collect `(:td ,v)))) + + And the link function generates a URL back into the application to be +used as the HREF attribute with an A element, building a query string +out of a set of keyword/value pairs and making sure all special +characters are properly escaped. For instance, instead of writing this: + + (:a :href "browse?what=artist&genre=Rhythm+%26+Blues" "Artists") + + you can write the following: + + (:a :href (link "browse" :what "artist" :genre "Rhythm & Blues") "Artists") + + It looks like this: + + (defun link (target &rest attributes) + (html + (:attribute + (:format "~a~@[?~{~(~a~)=~a~^&~}~]" target (mapcar #'urlencode attributes))))) + + To URL encode the keys and values, you use the helper function +urlencode, which is a wrapper around the function +encode-form-urlencoded, which is a nonpublic function from AllegroServe. +This is-on one hand-bad form; since the name encode-form-urlencoded +isn't exported from NET.ASERVE, it's possible that +encode-form-urlencoded may go away or get renamed out from under you. +On the other hand, using this unexported symbol for the time being lets +you get work done for the moment; by wrapping encode-form-urlencoded in +your own function, you isolate the crufty code to one function, which +you could rewrite if you had to. + + (defun urlencode (string) + (net.aserve::encode-form-urlencoded string)) + + Finally, you need the CSS style sheet mp3-browser.css used by +:mp3-browser-page. Since there's nothing dynamic about it, it's +probably easiest to just publish a static file with publish-file. + + (publish-file :path "/mp3-browser.css" :file filename :content-type "text/css") + + A sample style sheet is included with the source code for this +chapter on the book's Web site. You'll define a function, at the end of +this chapter, that starts the MP3 browser application. It'll take care +of, among other things, publishing this file. + + +File: pcl.info, Node: 29-6, Next: 29-7, Prev: 29-5, Up: Chapter 29 + +The Browse Page +=============== + +The first URL function will generate a page for browsing the MP3 +database. Its query parameters will tell it what kind of thing the user +is browsing and provide the criteria of what elements of the database +they're interested in. It'll give them a way to select database entries +that match a specific genre, artist, or album. In the interest of +serendipity, you can also provide a way to select a random subset of +matching items. When the user is browsing at the level of individual +songs, the title of the song will be a link that causes that song to be +added to the playlist. Otherwise, each item will be presented with +links that let the user browse the listed item by some other category. +For example, if the user is browsing genres, the entry "Blues" will +contain links to browse all albums, artists, and songs in the genre +Blues. Additionally, the browse page will feature an "Add all" button +that adds every song matching the page's criteria to the user's +playlist. The function looks like this: + + (define-url-function browse + (request (what keyword :genre) genre artist album (random integer)) + + (let* ((values (values-for-page what genre artist album random)) + (title (browse-page-title what random genre artist album)) + (single-column (if (eql what :song) :file what)) + (values-string (values->base-64 single-column values))) + (html + (:mp3-browser-page + (:title title) + ((:form :method "POST" :action "playlist") + (:input :name "values" :type "hidden" :value values-string) + (:input :name "what" :type "hidden" :value single-column) + (:input :name "action" :type "hidden" :value :add-songs) + (:input :name "submit" :type "submit" :value "Add all")) + (:ul (do-rows (row values) (list-item-for-page what row))))))) + + This function starts by using the function values-for-page to get a +table containing the values it needs to present. When the user is +browsing by song-when the what parameter is :song-you want to select +complete rows from the database. But when they're browsing by genre, +artist, or album, you want to select only the distinct values for the +given category. The database function select does most of the heavy +lifting, with values-for-page mostly responsible for passing the right +arguments depending on the value of what. This is also where you select +a random subset of the matching rows if necessary. + + (defun values-for-page (what genre artist album random) + (let ((values + (select + :from *mp3s* + :columns (if (eql what :song) t what) + :where (matching *mp3s* :genre genre :artist artist :album album) + :distinct (not (eql what :song)) + :order-by (if (eql what :song) '(:album :track) what)))) + (if random (random-selection values random) values))) + + To generate the title for the browse page, you pass the browsing +criteria to the following function, browse-page-title: + + (defun browse-page-title (what random genre artist album) + (with-output-to-string (s) + (when random (format s "~:(~r~) Random " random)) + (format s "~:(~a~p~)" what random) + (when (or genre artist album) + (when (not (eql what :song)) (princ " with songs" s)) + (when genre (format s " in genre ~a" genre)) + (when artist (format s " by artist ~a " artist)) + (when album (format s " on album ~a" album))))) + + Once you have the values you want to present, you need to do two +things with them. The main task, of course, is to present them, which +happens in the do-rows loop, leaving the rendering of each row to the +function list-item-for-page. That function renders :song rows one way +and all other kinds another way. + + (defun list-item-for-page (what row) + (if (eql what :song) + (with-column-values (song file album artist genre) row + (html + (:li + (:a :href (link "playlist" :file file :action "add-songs") (:b song)) " from " + (:a :href (link "browse" :what :song :album album) album) " by " + (:a :href (link "browse" :what :song :artist artist) artist) " in genre " + (:a :href (link "browse" :what :song :genre genre) genre)))) + (let ((value (column-value row what))) + (html + (:li value " - " + (browse-link :genre what value) + (browse-link :artist what value) + (browse-link :album what value) + (browse-link :song what value)))))) + + (defun browse-link (new-what what value) + (unless (eql new-what what) + (html + "[" + (:a :href (link "browse" :what new-what what value) (:format "~(~as~)" new-what)) + "] "))) + + The other thing on the browse page is a form with several hidden +INPUT fields and an "Add all" submit button. You need to use an HTML +form instead of a regular link to keep the application stateless-to make +sure all the information needed to respond to a request comes in the +request itself. Because the browse page results can be partially +random, you need to submit a fair bit of data for the server to be able +to reconstitute the list of songs to add to the playlist. If you didn't +allow the browse page to return randomly generated results, you wouldn't +need much data-you could just submit a request to add songs with +whatever search criteria the browse page used. But if you added songs +that way, with criteria that included a random argument, then you'd end +up adding a different set of random songs than the user was looking at +on the page when they hit the "Add all" button. + + The solution you'll use is to send back a form that has enough +information stashed away in a hidden INPUT element to allow the server +to reconstitute the list of songs matching the browse page criteria. +That information is the list of values returned by values-for-page and +the value of the what parameter. This is where you use the base64-list +parameter type; the function values->base64 extracts the values of a +specified column from the table returned by values-for-page into a list +and then makes a base 64-encoded string out of that list to embed in the +form. + + (defun values->base-64 (column values-table) + (flet ((value (r) (column-value r column))) + (obj->base64 (map-rows #'value values-table)))) + + When that parameter comes back as the value of the values query +parameter to a URL function that declares values to be of type +base-64-list, it'll be automatically converted back to a list. As +you'll see in a moment, that list can then be used to construct a query +that'll return the correct list of songs. (1) When you're browsing by +:song, you use the values from the :file column since they uniquely +identify the actual songs while the song names may not. + + ---------- Footnotes ---------- + + (1) This solution has its drawbacks-if a browse page returns a lot of +results, a fair bit of data is going back and forth under the covers. +Also, the database queries aren't necessarily the most efficient. But +it does keep the application stateless. An alternative approach is to +squirrel away, on the server side, information about the results +returned by browse and then, when a request to add songs come in, find +the appropriate bit of information in order to re-create the correct set +of songs. For instance, you could just save the values list instead of +sending it back in the form. Or you could copy the RANDOM-STATE object +before you generate the browse results so you can later re-create the +same "random" results. But this approach causes its own problems. For +instance, you'd then need to worry about when you can get rid of the +squirreled-away information; you never know when the user might hit the +Back button on their browser to return to an old browse page and then +hit the "Add all" button. Welcome to the wonderful world of Web +programming. + + +File: pcl.info, Node: 29-7, Next: 29-8, Prev: 29-6, Up: Chapter 29 + +The Playlist +============ + +This brings me to the next URL function, playlist. This is the most +complex page of the three-it's responsible for displaying the current +contents of the user's playlist as well as for providing the interface +to manipulate the playlist. But with most of the tedious bookkeeping +handled by define-url-function, it's not too hard to see how playlist +works. Here's the beginning of the definition, with just the parameter +list: + + (define-url-function playlist + (request + (playlist-id string (playlist-id request) :package) + (action keyword) ; Playlist manipulation action + (what keyword :file) ; for :add-songs action + (values base-64-list) ; " + file ; for :add-songs and :delete-songs actions + genre ; for :delete-songs action + artist ; " + album ; " + (order-by keyword) ; for :sort action + (shuffle keyword) ; for :shuffle action + (repeat keyword)) ; for :set-repeat action + + In addition to the obligatory request parameter, playlist takes a +number of query parameters. The most important in some ways is +playlist-id, which identifies which playlist object the page should +display and manipulate. For this parameter, you can take advantage of +define-url-function's "sticky parameter" feature. Normally, the +playlist-id won't be supplied explicitly, defaulting to the value +returned by the playlist-id function, namely, the IP address of the +client machine on which the browser is running. However, users can also +manipulate their playlists from different machines than the ones running +their MP3 clients by allowing this value to be explicitly specified. +And if it's specified once, define-url-function will arrange for it to +"stick" by setting a cookie in the browser. Later you'll define a URL +function that generates a list of all existing playlists, which users +can use to pick a playlist other than the one for the machines they're +browsing from. + + The action parameter specifies some action to take on the user's +playlist object. The value of this parameter, which will be converted +to a keyword symbol for you, can be :add-songs, :delete-songs, :clear, +:sort, :shuffle, or :set-repeat. The :add-songs action is used by the +"Add all" button in the browse page and also by the links used to add +individual songs. The other actions are used by the links on the +playlist page itself. + + The file, what, and values parameters are used with the :add-songs +action. By declaring values to be of type base-64-list, the +define-url-function infrastructure will take care of decoding the value +submitted by the "Add all" form. The other parameters are used with +other actions as noted in the comments. + + Now let's look at the body of playlist. The first thing you need to +do is use the playlist-id to look up the queue object and then acquire +the playlist's lock with the following two lines: + + (let ((playlist (lookup-playlist playlist-id))) + (with-playlist-locked (playlist) + + Since lookup-playlist will create a new playlist if necessary, this +will always return a playlist object. Then you take care of any +necessary queue manipulation, dispatching on the value of the action +parameter in order to call one of the playlist functions. + + (case action + (:add-songs (add-songs playlist what (or values (list file)))) + (:delete-songs (delete-songs + playlist + :file file :genre genre + :artist artist :album album)) + (:clear (clear-playlist playlist)) + (:sort (sort-playlist playlist order-by)) + (:shuffle (shuffle-playlist playlist shuffle)) + (:set-repeat (setf (repeat playlist) repeat))) + + All that's left of the playlist function is the actual HTML +generation. Again, you can use the :mp3-browser-page HTML macro to make +sure the basic form of the page matches the other pages in the +application, though this time you pass NIL to the :header argument in +order to leave out the H1 header. Here's the rest of the function: + + (html + (:mp3-browser-page + (:title (:format "Playlist - ~a" (id playlist)) :header nil) + (playlist-toolbar playlist) + (if (empty-p playlist) + (html (:p (:i "Empty."))) + (html + ((:table :class "playlist") + (:table-row "#" "Song" "Album" "Artist" "Genre") + (let ((idx 0) + (current-idx (current-idx playlist))) + (do-rows (row (songs-table playlist)) + (with-column-values (track file song album artist genre) row + (let ((row-style (if (= idx current-idx) "now-playing" "normal"))) + (html + ((:table-row :class row-style) + track + (:progn song (delete-songs-link :file file)) + (:progn album (delete-songs-link :album album)) + (:progn artist (delete-songs-link :artist artist)) + (:progn genre (delete-songs-link :genre genre))))) + (incf idx)))))))))))) + + The function playlist-toolbar generates a toolbar containing links to +playlist to perform the various :action manipulations. And +delete-songs-link generates a link to playlist with the :action +parameter set to :delete-songs and the appropriate arguments to delete +an individual file, or all files on an album, by a particular artist or +in a specific genre. + + (defun playlist-toolbar (playlist) + (let ((current-repeat (repeat playlist)) + (current-sort (ordering playlist)) + (current-shuffle (shuffle playlist))) + (html + (:p :class "playlist-toolbar" + (:i "Sort by:") + " [ " + (sort-playlist-button "genre" current-sort) " | " + (sort-playlist-button "artist" current-sort) " | " + (sort-playlist-button "album" current-sort) " | " + (sort-playlist-button "song" current-sort) " ] " + (:i "Shuffle by:") + " [ " + (playlist-shuffle-button "none" current-shuffle) " | " + (playlist-shuffle-button "song" current-shuffle) " | " + (playlist-shuffle-button "album" current-shuffle) " ] " + (:i "Repeat:") + " [ " + (playlist-repeat-button "none" current-repeat) " | " + (playlist-repeat-button "song" current-repeat) " | " + (playlist-repeat-button "all" current-repeat) " ] " + "[ " (:a :href (link "playlist" :action "clear") "Clear") " ] ")))) + + (defun playlist-button (action argument new-value current-value) + (let ((label (string-capitalize new-value))) + (if (string-equal new-value current-value) + (html (:b label)) + (html (:a :href (link "playlist" :action action argument new-value) label))))) + + (defun sort-playlist-button (order-by current-sort) + (playlist-button :sort :order-by order-by current-sort)) + + (defun playlist-shuffle-button (shuffle current-shuffle) + (playlist-button :shuffle :shuffle shuffle current-shuffle)) + + (defun playlist-repeat-button (repeat current-repeat) + (playlist-button :set-repeat :repeat repeat current-repeat)) + + (defun delete-songs-link (what value) + (html " [" (:a :href (link "playlist" :action :delete-songs what value) "x") "]")) + + +File: pcl.info, Node: 29-8, Next: 29-9, Prev: 29-7, Up: Chapter 29 + +Finding a Playlist +================== + +The last of the three URL functions is the simplest. It presents a +table listing all the playlists that have been created. Ordinarily +users won't need to use this page, but during development it gives you a +useful view into the state of the system. It also provides the +mechanism to choose a different playlist-each playlist ID is a link to +the playlist page with an explicit playlist-id query parameter, which +will then be made sticky by the playlist URL function. Note that you +need to acquire the *playlists-lock* to make sure the *playlists* hash +table doesn't change out from under you while you're iterating over it. + + (define-url-function all-playlists (request) + (:mp3-browser-page + (:title "All Playlists") + ((:table :class "all-playlists") + (:table-row "Playlist" "# Songs" "Most recent user agent") + (with-process-lock (*playlists-lock*) + (loop for playlist being the hash-values of *playlists* do + (html + (:table-row + (:a :href (link "playlist" :playlist-id (id playlist)) (:print (id playlist))) + (:print (table-size (songs-table playlist))) + (:print (user-agent playlist))))))))) + + +File: pcl.info, Node: 29-9, Next: Chapter 30, Prev: 29-8, Up: Chapter 29 + +Running the App +=============== + +And that's it. To use this app, you just need to load the MP3 database +with the load-database function from Chapter 27, publish the CSS style +sheet, set *song-source-type* to playlist so find-song-source uses +playlists instead of the singleton song source defined in the previous +chapter, and start AllegroServe. The following function takes care of +all these steps for you, after you fill in appropriate values for the +two parameters *mp3-dir*, which is the root directory of your MP3 +collection, and *mp3-css*, the filename of the CSS style sheet: + + (defparameter *mp3-dir* ...) + + (defparameter *mp3-css* ...) + + (defun start-mp3-browser () + (load-database *mp3-dir* *mp3s*) + (publish-file :path "/mp3-browser.css" :file *mp3-css* :content-type "text/css") + (setf *song-source-type* 'playlist) + (net.aserve::debug-on :notrap) + (net.aserve:start :port 2001)) + + When you invoke this function, it will print dots while it loads the +ID3 information from your ID3 files. Then you can point your MP3 client +at this URL: + + http://localhost:2001/stream.mp3 + + and point your browser at some good starting place, such as this: + + http://localhost:2001/browse + + which will let you start browsing by the default category, Genre. +After you've added some songs to the playlist, you can press Play on the +MP3 client, and it should start playing the first song. + + Obviously, you could improve the user interface in any of a number of +ways-for instance, if you have a lot of MP3s in your library, it might +be useful to be able to browse artists or albums by the first letter of +their names. Or maybe you could add a "Play whole album" button to the +playlist page that causes the playlist to immediately put all the songs +from the same album as the currently playing song at the top of the +playlist. Or you could change the playlist class, so instead of playing +silence when there are no songs queued up, it picks a random song from +the database. But all those ideas fall in the realm of application +design, which isn't really the topic of this book. Instead, the next +two chapters will drop back to the level of software infrastructure to +cover how the FOO HTML generation library works. + + +File: pcl.info, Node: Chapter 30, Next: Chapter 31, Prev: Chapter 29, Up: Top + +30. Practical: An HTML Generation Library, the Interpreter +========================================================== + +In this chapter and the next you'll take a look under the hood of the +FOO HTML generator that you've been using in the past few chapters. FOO +is an example of a kind of programming that's quite common in Common +Lisp and relatively uncommon in non-Lisp languages, namely, +language-oriented programming. Rather than provide an API built +primarily out of functions, classes, and macros, FOO provides language +processors for a domain-specific language that you can embed in your +Common Lisp programs. + + FOO provides two language processors for the same s-expression +language. One is an interpreter that takes a FOO "program" as data and +interprets it to generate HTML. The other is a compiler that compiles +FOO expressions, possibly with embedded Common Lisp code, into Common +Lisp that generates HTML and runs the embedded code. The interpreter is +exposed as the function emit-html and the compiler as the macro html, +which you used in previous chapters. + + In this chapter you'll look at some of the infrastructure shared +between the interpreter and the compiler and then at the implementation +of the interpreter. In the next chapter, I'll show you how the compiler +works. + +* Menu: + +* 30-1:: Designing a Domain-Specific Language +* 30-2:: The FOO Language +* 30-3:: Character Escaping +* 30-4:: Indenting Printer +* 30-5:: HTML Processor Interface +* 30-6:: The Pretty Printer Backend +* 30-7:: The Basic Evaluation Rule +* 30-8:: What's Next? + + +File: pcl.info, Node: 30-1, Next: 30-2, Prev: Chapter 30, Up: Chapter 30 + +Designing a Domain-Specific Language +==================================== + +Designing an embedded language requires two steps: first, design the +language that'll allow you to express the things you want to express, +and second, implement a processor, or processors, that accepts a +"program" in that language and either performs the actions indicated by +the program or translates the program into Common Lisp code that'll +perform equivalent behaviors. + + So, step one is to design the HTML-generating language. The key to +designing a good domain-specific language is to strike the right balance +between expressiveness and concision. For instance, a highly expressive +but not very concise "language" for generating HTML is the language of +literal HTML strings. The legal "forms" of this language are strings +containing literal HTML. Language processors for this "language" could +process such forms by simply emitting them as-is. + + (defvar *html-output* *standard-output*) + + (defun emit-html (html) + "An interpreter for the literal HTML language." + (write-sequence html *html-output*)) + + (defmacro html (html) + "A compiler for the literal HTML language." + `(write-sequence ,html *html-output*)) + + This "language" is highly expressive since it can express any HTML +you could possibly want to generate. (1) On the other hand, this +language doesn't win a lot of points for its concision because it gives +you zero compression-its input is its output. + + To design a language that gives you some useful compression without +sacrificing too much expressiveness, you need to identify the details of +the output that are either redundant or uninteresting. You can then +make those aspects of the output implicit in the semantics of the +language. + + For instance, because of the structure of HTML, every opening tag is +paired with a matching closing tag. (2) When you write HTML by hand, +you have to write those closing tags, but you can improve the concision +of your HTML-generating language by making the closing tags implicit. + + Another way you can gain concision at a slight cost in expressiveness +is to make the language processors responsible for adding appropriate +whitespace between elements-blank lines and indentation. When you're +generating HTML programmatically, you typically don't care much about +which elements have line breaks before or after them or about whether +different elements are indented relative to their parent elements. +Letting the language processor insert whitespace according to some rule +means you don't have to worry about it. As it turns out, FOO actually +supports two modes-one that uses the minimum amount of whitespace, which +allows it to generate extremely efficient code and compact HTML, and +another that generates nicely formatted HTML with different elements +indented and separated from other elements according to their role. + + Another detail that's best moved into the language processor is the +escaping of certain characters that have a special meaning in HTML such +as <, >, and &. Obviously, if you generate HTML by just printing +strings to a stream, then it's up to you to replace any occurrences of +those characters in the string with the appropriate escape sequences, +<, > and &. But if the language processor can know which +strings are to be emitted as element data, then it can take care of +automatically escaping those characters for you. + + ---------- Footnotes ---------- + + (1) In fact, it's probably too expressive since it can also generate +all sorts of output that's not even vaguely legal HTML. Of course, that +might be a feature if you need to generate HTML that's not strictly +correct to compensate for buggy Web browsers. Also, it's common for +language processors to accept programs that are syntactically correct +and otherwise well formed that'll nonetheless provoke undefined behavior +when run. + + (2) Well, almost every tag. Certain tags such as IMG and BR don't. +You'll deal with those in the section "The Basic Evaluation Rule." + + +File: pcl.info, Node: 30-2, Next: 30-3, Prev: 30-1, Up: Chapter 30 + +The FOO Language +================ + +So, enough theory. I'll give you a quick overview of the language +implemented by FOO, and then you'll look at the implementation of the +two FOO language processors-the interpreter, in this chapter, and the +compiler, in the next. + + Like Lisp itself, the basic syntax of the FOO language is defined in +terms of forms made up of Lisp objects. The language defines how each +legal FOO form is translated into HTML. + + The simplest FOO forms are self-evaluating Lisp objects such as +strings, numbers, and keyword symbols. (1) You'll need a function +self-evaluating-p that tests whether a given object is self-evaluating +for FOO's purposes. + + (defun self-evaluating-p (form) + (and (atom form) (if (symbolp form) (keywordp form) t))) + + Objects that satisfy this predicate will be emitted by converting +them to strings with PRINC-TO-STRING and then escaping any reserved +characters, such as <, >, or &. When the value is being emitted as an +attribute, the characters ", and ' are also escaped. Thus, you can +invoke the html macro on a self-evaluating object to emit it to +*html-output* (which is initially bound to *STANDARD-OUTPUT*). Table +30-1 shows how a few different self-evaluating values will be output. + + Table 30-1. FOO Output for Self-Evaluating Objects + +[image src="tables/30-1.png"] + + Of course, most HTML consists of tagged elements. The three pieces +of information that describe each element are the tag, a set of +attributes, and a body containing text and/or more HTML elements. Thus, +you need a way to represent these three pieces of information as Lisp +objects, preferably ones that the Lisp reader already knows how to read. +(2) If you forget about attributes for a moment, there's an obvious +mapping between Lisp lists and HTML elements: any HTML element can be +represented by a list whose FIRST is a symbol where the name is the name +of the element's tag and whose REST is a list of self-evaluating objects +or lists representing other HTML elements. Thus: + +

Foo

<==> (:p "Foo") + +

Now is the time

<==> (:p (:i "Now") " is the time") + + Now the only problem is where to squeeze in the attributes. Since +most elements have no attributes, it'd be nice if you could use the +preceding syntax for elements without attributes. FOO provides two ways +to notate elements with attributes. The first is to simply include the +attributes in the list immediately following the symbol, alternating +keyword symbols naming the attributes and objects representing the +attribute value forms. The body of the element starts with the first +item in the list that's in a position to be an attribute name and isn't +a keyword symbol. Thus: + + HTML> (html (:p "foo")) +

foo

+ NIL + HTML> (html (:p "foo " (:i "bar") " baz")) +

foo bar baz

+ NIL + HTML> (html (:p :style "foo" "Foo")) +

Foo

+ NIL + HTML> (html (:p :id "x" :style "foo" "Foo")) +

Foo

+ NIL + + For folks who prefer a bit more obvious delineation between the +element's attributes and its body, FOO supports an alternative syntax: +if the first element of a list is itself a list with a keyword as its +first element, then the outer list represents an HTML element with that +keyword indicating the tag, with the REST of the nested list as the +attributes, and with the REST of the outer list as the body. Thus, you +could write the previous two expressions like this: + + HTML> (html ((:p :style "foo") "Foo")) +

Foo

+ NIL + HTML> (html ((:p :id "x" :style "foo") "Foo")) +

Foo

+ NIL + + The following function tests whether a given object matches either of +these syntaxes: + + (defun cons-form-p (form &optional (test #'keywordp)) + (and (consp form) + (or (funcall test (car form)) + (and (consp (car form)) (funcall test (caar form)))))) + + You should parameterize the test function because later you'll need +to test the same two syntaxes with a slightly different predicate on the +name. + + To completely abstract the differences between the two syntax +variants, you can define a function, parse-cons-form, that takes a form +and parses it into three elements, the tag, the attributes plist, and +the body list, returning them as multiple values. The code that +actually evaluates cons forms will use this function and not have to +worry about which syntax was used. + + (defun parse-cons-form (sexp) + (if (consp (first sexp)) + (parse-explicit-attributes-sexp sexp) + (parse-implicit-attributes-sexp sexp))) + + (defun parse-explicit-attributes-sexp (sexp) + (destructuring-bind ((tag &rest attributes) &body body) sexp + (values tag attributes body))) + + (defun parse-implicit-attributes-sexp (sexp) + (loop with tag = (first sexp) + for rest on (rest sexp) by #'cddr + while (and (keywordp (first rest)) (second rest)) + when (second rest) + collect (first rest) into attributes and + collect (second rest) into attributes + end + finally (return (values tag attributes rest)))) + + Now that you have the basic language specified, you can think about +how you're actually going to implement the language processors. How do +you get from a series of FOO forms to the desired HTML? As I mentioned +previously, you'll be implementing two language processors for FOO: an +interpreter that walks a tree of FOO forms and emits the corresponding +HTML directly and a compiler that walks a tree and translates it into +Common Lisp code that'll emit the same HTML. Both the interpreter and +compiler will be built on top of a common foundation of code, which +provides support for things such as escaping reserved characters and +generating nicely indented output, so it makes sense to start there. + + ---------- Footnotes ---------- + + (1) In the strict language of the Common Lisp standard, keyword +symbols aren't self-evaluating, though they do, in fact, evaluate to +themselves. See section 3.1.2.1.3 of the language standard or HyperSpec +for a brief discussion. + + (2) The requirement to use objects that the Lisp reader knows how to +read isn't a hard-and-fast one. Since the Lisp reader is itself +customizable, you could also define a new reader-level syntax for a new +kind of object. But that tends to be more trouble than it's worth. + + +File: pcl.info, Node: 30-3, Next: 30-4, Prev: 30-2, Up: Chapter 30 + +Character Escaping +================== + +The first bit of the foundation you'll need to lay is the code that +knows how to escape characters with a special meaning in HTML. There are +three such characters, and they must not appear in the text of an +element or in an attribute value; they are <, >, and &. In element text +or attribute values, these characters must be replaced with the +character reference entities <, >, and &. Similarly, in +attribute values, the quotation marks used to delimit the value must be +escaped, ' with ' and " with ". Additionally, any character +can be represented by a numeric character reference entity consisting of +an ampersand, followed by a sharp sign, followed by the numeric code as +a base 10 integer, and followed by a semicolon. These numeric escapes +are sometimes used to embed non-ASCII characters in HTML. + +* Menu: + +* 30-3-1:: The Package + + The following function accepts a single character and returns a +string containing a character reference entity for that character: + + (defun escape-char (char) + (case char + (#\& "&") + (#\< "<") + (#\> ">") + (#\' "'") + (#\" """) + (t (format nil "&#~d;" (char-code char))))) + + You can use this function as the basis for a function, escape, that +takes a string and a sequence of characters and returns a copy of the +first argument with all occurrences of the characters in the second +argument replaced with the corresponding character entity returned by +escape-char. + + (defun escape (in to-escape) + (flet ((needs-escape-p (char) (find char to-escape))) + (with-output-to-string (out) + (loop for start = 0 then (1+ pos) + for pos = (position-if #'needs-escape-p in :start start) + do (write-sequence in out :start start :end pos) + when pos do (write-sequence (escape-char (char in pos)) out) + while pos)))) + + You can also define two parameters: *element-escapes*, which contains +the characters you need to escape in normal element data, and +*attribute-escapes*, which contains the set of characters to be escaped +in attribute values. + + (defparameter *element-escapes* "<>&") + (defparameter *attribute-escapes* "<>&\"'") + + Here are some examples: + + HTML> (escape "foo & bar" *element-escapes*) + "foo & bar" + HTML> (escape "foo & 'bar'" *element-escapes*) + "foo & 'bar'" + HTML> (escape "foo & 'bar'" *attribute-escapes*) + "foo & 'bar'" + + Finally, you'll need a variable, *escapes*, that will be bound to the +set of characters that need to be escaped. It's initially set to the +value of *element-escapes*, but when generating attributes, it will, as +you'll see, be rebound to the value of *attribute-escapes*. + + (defvar *escapes* *element-escapes*) + + +File: pcl.info, Node: 30-3-1, Next: 30-3, Prev: 30-3, Up: 30-3 + +The Package +=========== + +Since FOO is a low-level library, the package you develop it in doesn't +rely on much external code-just the usual dependency on names from the +COMMON-LISP package and, almost as usual, on the names of the +macro-writing macros from COM.GIGAMONKEYS.MACRO-UTILITIES. On the other +hand, the package needs to export all the names needed by code that uses +FOO. Here's the DEFPACKAGE from the source that you can download from +the book's Web site: + + (defpackage :com.gigamonkeys.html + (:use :common-lisp :com.gigamonkeys.macro-utilities) + (:export :with-html-output + :in-html-style + :define-html-macro + :html + :emit-html + :&attributes)) + + +File: pcl.info, Node: 30-4, Next: 30-5, Prev: 30-3, Up: Chapter 30 + +Indenting Printer +================= + +To handle generating nicely indented output, you can define a class +indenting-printer, which wraps around an output stream, and functions +that use an instance of that class to emit strings to the stream while +keeping track of when it's at the beginning of the line. The class +looks like this: + + (defclass indenting-printer () + ((out :accessor out :initarg :out) + (beginning-of-line-p :accessor beginning-of-line-p :initform t) + (indentation :accessor indentation :initform 0) + (indenting-p :accessor indenting-p :initform t))) + + The main function that operates on indenting-printers is emit, which +takes the printer and a string and emits the string to the printer's +output stream, keeping track of when it emits a newline so it can reset +the beginning-of-line-p slot. + + (defun emit (ip string) + (loop for start = 0 then (1+ pos) + for pos = (position #\Newline string :start start) + do (emit/no-newlines ip string :start start :end pos) + when pos do (emit-newline ip) + while pos)) + + To actually emit the string, it uses the function emit/no-newlines, +which emits any needed indentation, via the helper indent-if-necessary, +and then writes the string to the stream. This function can also be +called directly by other code to emit a string that's known not to +contain any newlines. + + (defun emit/no-newlines (ip string &key (start 0) end) + (indent-if-necessary ip) + (write-sequence string (out ip) :start start :end end) + (unless (zerop (- (or end (length string)) start)) + (setf (beginning-of-line-p ip) nil))) + + The helper indent-if-necessary checks beginning-of-line-p and +indenting-p to determine whether it needs to emit indentation and, if +they're both true, emits as many spaces as indicated by the value of +indentation. Code that uses the indenting-printer can control the +indentation by manipulating the indentation and indenting-p slots. +Incrementing and decrementing indentation changes the number of leading +spaces, while setting indenting-p to NIL can temporarily turn off +indentation. + + (defun indent-if-necessary (ip) + (when (and (beginning-of-line-p ip) (indenting-p ip)) + (loop repeat (indentation ip) do (write-char #\Space (out ip))) + (setf (beginning-of-line-p ip) nil))) + + The last two functions in the indenting-printer API are emit-newline +and emit-freshline, which are both used to emit a newline character, +similar to the ~% and ~& FORMAT directives. That is, the only +difference is that emit-newline always emits a newline, while +emit-freshline does so only if beginning-of-line-p is false. Thus, +multiple calls to emit-freshline without any intervening emits won't +result in a blank line. This is handy when one piece of code wants to +generate some output that should end with a newline while another piece +of code wants to generate some output that should start on a newline but +you don't want a blank line between the two bits of output. + + (defun emit-newline (ip) + (write-char #\Newline (out ip)) + (setf (beginning-of-line-p ip) t)) + + (defun emit-freshline (ip) + (unless (beginning-of-line-p ip) (emit-newline ip))) + + With those preliminaries out of the way, you're ready to get to the +guts of the FOO processor. + + +File: pcl.info, Node: 30-5, Next: 30-6, Prev: 30-4, Up: Chapter 30 + +HTML Processor Interface +======================== + +Now you're ready to define the interface that'll be used by the FOO +language processor to emit HTML. You can define this interface as a set +of generic functions because you'll need two implementations-one that +actually emits HTML and another that the html macro can use to collect a +list of actions that need to be performed, which can then be optimized +and compiled into code that emits the same output in a more efficient +way. I'll call this set of generic functions the backend interface. It +consists of the following eight generic functions: + + (defgeneric raw-string (processor string &optional newlines-p)) + + (defgeneric newline (processor)) + + (defgeneric freshline (processor)) + + (defgeneric indent (processor)) + + (defgeneric unindent (processor)) + + (defgeneric toggle-indenting (processor)) + + (defgeneric embed-value (processor value)) + + (defgeneric embed-code (processor code)) + + While several of these functions have obvious correspondence to +indenting-printer functions, it's important to understand that these +generic functions define the abstract operations that are used by the +FOO language processors and won't always be implemented in terms of +calls to the indenting-printer functions. + + That said, perhaps the easiest way to understand the semantics of +these abstract operations is to look at the concrete implementations of +the methods specialized on html-pretty-printer, the class used to +generate human-readable HTML. + + +File: pcl.info, Node: 30-6, Next: 30-7, Prev: 30-5, Up: Chapter 30 + +The Pretty Printer Backend +========================== + +You can start by defining a class with two slots-one to hold an instance +of indenting-printer and one to hold the tab width-the number of spaces +you want to increase the indentation for each level of nesting of HTML +elements. + + (defclass html-pretty-printer () + ((printer :accessor printer :initarg :printer) + (tab-width :accessor tab-width :initarg :tab-width :initform 2))) + + Now you can implement methods specialized on html-pretty-printer on +the eight generic functions that make up the backend interface. + + The FOO processors use the raw-string function to emit strings that +don't need character escaping, either because you actually want to emit +normally reserved characters or because all reserved characters have +already been escaped. Usually raw-string is invoked with strings that +don't contain newlines, so the default behavior is to use +emit/no-newlines unless the caller specifies a non-NIL newlines-p +argument. + + (defmethod raw-string ((pp html-pretty-printer) string &optional newlines-p) + (if newlines-p + (emit (printer pp) string) + (emit/no-newlines (printer pp) string))) + + The functions newline, freshline, indent, unindent, and +toggle-indenting implement fairly straightforward manipulations of the +underlying indenting-printer. The only wrinkle is that the HTML pretty +printer generates pretty output only when the dynamic variable *pretty* +is true. When it's NIL, you should generate compact HTML with no +unnecessary whitespace. So, these methods, with the exception of +newline, all check *pretty* before doing anything: (1) + + (defmethod newline ((pp html-pretty-printer)) + (emit-newline (printer pp))) + + (defmethod freshline ((pp html-pretty-printer)) + (when *pretty* (emit-freshline (printer pp)))) + + (defmethod indent ((pp html-pretty-printer)) + (when *pretty* + (incf (indentation (printer pp)) (tab-width pp)))) + + (defmethod unindent ((pp html-pretty-printer)) + (when *pretty* + (decf (indentation (printer pp)) (tab-width pp)))) + + (defmethod toggle-indenting ((pp html-pretty-printer)) + (when *pretty* + (with-slots (indenting-p) (printer pp) + (setf indenting-p (not indenting-p))))) + + Finally, the functions embed-value and embed-code are used only by +the FOO compiler-embed-value is used to generate code that'll emit the +value of a Common Lisp expression, while embed-code is used to embed a +bit of code to be run and its result discarded. In the interpreter, you +can't meaningfully evaluate embedded Lisp code, so the methods on these +functions always signal an error. + + (defmethod embed-value ((pp html-pretty-printer) value) + (error "Can't embed values when interpreting. Value: ~s" value)) + + (defmethod embed-code ((pp html-pretty-printer) code) + (error "Can't embed code when interpreting. Code: ~s" code)) + +* Menu: + +* 30-6-1:: Using Conditions to Have Your Cake and Eat It Too + + ---------- Footnotes ---------- + + (1) Another, more purely object-oriented, approach would be to define +two classes, perhaps html-pretty-printer and html-raw-printer, and then +define no-op methods specialized on html-raw-printer for the methods +that should do stuff only when *pretty* is true. However, in this case, +after defining all the no-op methods, you'd end up with more code, and +then you'd have the hassle of making sure you created an instance of the +right class at the right time. But in general, using polymorphism to +replace conditionals is a good strategy. + + +File: pcl.info, Node: 30-6-1, Next: 30-6, Prev: 30-6, Up: 30-6 + +Using Conditions to Have Your Cake and Eat It Too +================================================= + +An alternate approach would be to use EVAL to evaluate Lisp expressions +in the interpreter. The problem with this approach is that EVAL has no +access to the lexical environment. Thus, there's no way to make +something like this work: + + (let ((x 10)) (emit-html '(:p x))) + + when x is a lexical variable. The symbol x that's passed to +emit-html at runtime has no particular connection to the lexical +variable named with the same symbol. The Lisp compiler arranges for +references to x in the code to refer to the variable, but after the code +is compiled, there's no longer necessarily any association between the +name x and that variable. This is the main reason that when you think +EVAL is the solution to your problem, you're probably wrong. + + However, if x was a dynamic variable, declared with DEFVAR or +DEFPARAMETER (and likely named *x* instead of x), EVAL could get at its +value. Thus, it might be useful to allow the FOO interpreter to use +EVAL in some situations. But it's a bad idea to always use EVAL. You +can get the best of both worlds by combining the idea of using EVAL with +the condition system. + + First define some error classes that you can signal when embed-value +and embed-code are called in the interpreter. + + (define-condition embedded-lisp-in-interpreter (error) + ((form :initarg :form :reader form))) + + (define-condition value-in-interpreter (embedded-lisp-in-interpreter) () + (:report + (lambda (c s) + (format s "Can't embed values when interpreting. Value: ~s" (form c))))) + + (define-condition code-in-interpreter (embedded-lisp-in-interpreter) () + (:report + (lambda (c s) + (format s "Can't embed code when interpreting. Code: ~s" (form c))))) + + Now you can implement embed-value and embed-code to signal those +errors and provide a restart that'll evaluate the form with EVAL. + + (defmethod embed-value ((pp html-pretty-printer) value) + (restart-case (error 'value-in-interpreter :form value) + (evaluate () + :report (lambda (s) (format s "EVAL ~s in null lexical environment." value)) + (raw-string pp (escape (princ-to-string (eval value)) *escapes*) t)))) + + (defmethod embed-code ((pp html-pretty-printer) code) + (restart-case (error 'code-in-interpreter :form code) + (evaluate () + :report (lambda (s) (format s "EVAL ~s in null lexical environment." code)) + (eval code)))) + + Now you can do something like this: + + HTML> (defvar *x* 10) + *X* + HTML> (emit-html '(:p *x*)) + + and you'll get dropped into the debugger with this message: + + Can't embed values when interpreting. Value: *X* + [Condition of type VALUE-IN-INTERPRETER] + + Restarts: + 0: [EVALUATE] EVAL *X* in null lexical environment. + 1: [ABORT] Abort handling SLIME request. + 2: [ABORT] Abort entirely from this process. + + If you invoke the evaluate restart, embed-value will EVAL *x*, get +the value 10, and generate this HTML: + +

10

+ + Then, as a convenience, you can provide restart functions-functions +that invoke the evaluate restart-in certain situations. The evaluate +restart function unconditionally invokes the restart, while +eval-dynamic-variables and eval-code invoke it only if the form in the +condition is a dynamic variable or potential code. + + (defun evaluate (&optional condition) + (declare (ignore condition)) + (invoke-restart 'evaluate)) + + (defun eval-dynamic-variables (&optional condition) + (when (and (symbolp (form condition)) (boundp (form condition))) + (evaluate))) + + (defun eval-code (&optional condition) + (when (consp (form condition)) + (evaluate))) + + Now you can use HANDLER-BIND to set up a handler to automatically +invoke the evaluate restart for you. + + HTML> (handler-bind ((value-in-interpreter #'evaluate)) (emit-html '(:p *x*))) +

10

+ T + + Finally, you can define a macro to provide a nicer syntax for binding +handlers for the two kinds of errors. + + (defmacro with-dynamic-evaluation ((&key values code) &body body) + `(handler-bind ( + ,@(if values `((value-in-interpreter #'evaluate))) + ,@(if code `((code-in-interpreter #'evaluate)))) + ,@body)) + + With this macro defined, you can write this: + + HTML> (with-dynamic-evaluation (:values t) (emit-html '(:p *x*))) +

10

+ T + + +File: pcl.info, Node: 30-7, Next: 30-8, Prev: 30-6, Up: Chapter 30 + +The Basic Evaluation Rule +========================= + +Now to connect the FOO language to the processor interface, all you need +is a function that takes an object and processes it, invoking the +appropriate processor functions to generate HTML. For instance, when +given a simple form like this: + + (:p "Foo") + + this function might execute this sequence of calls on the processor: + + (freshline processor) + (raw-string processor "" nil) + (raw-string processor "Foo" nil) + (raw-string processor "

" nil) + (freshline processor) + + For now you can define a simple function that just checks whether a +form is, in fact, a legal FOO form and, if it is, hands it off to the +function process-sexp-html for processing. In the next chapter, you'll +add some bells and whistles to this function to allow it to handle +macros and special operators. But for now it looks like this: + + (defun process (processor form) + (if (sexp-html-p form) + (process-sexp-html processor form) + (error "Malformed FOO form: ~s" form))) + + The function sexp-html-p determines whether the given object is a +legal FOO expression, either a self-evaluating form or a properly +formatted cons. + + (defun sexp-html-p (form) + (or (self-evaluating-p form) (cons-form-p form))) + + Self-evaluating forms are easily handled: just convert to a string +with PRINC-TO-STRING and escape the characters in the variable +*escapes*, which, as you'll recall, is initially bound to the value of +*element-escapes*. Cons forms you pass off to process-cons-sexp-html. + + (defun process-sexp-html (processor form) + (if (self-evaluating-p form) + (raw-string processor (escape (princ-to-string form) *escapes*) t) + (process-cons-sexp-html processor form))) + + The function process-cons-sexp-html is then responsible for emitting +the opening tag, any attributes, the body, and the closing tag. The +main complication here is that to generate pretty HTML, you need to emit +fresh lines and adjust the indentation according to the type of the +element being emitted. You can categorize all the elements defined in +HTML into one of three categories: block, paragraph, and inline. Block +elements-such as body and ul-are emitted with fresh lines before and +after both their opening and closing tags and with their contents +indented one level. Paragraph elements-such as p, li, and +blockquote-are emitted with a fresh line before the opening tag and +after the closing tag. Inline elements are simply emitted in line. The +following three parameters list the elements of each type: + + (defparameter *block-elements* + '(:body :colgroup :dl :fieldset :form :head :html :map :noscript :object + :ol :optgroup :pre :script :select :style :table :tbody :tfoot :thead + :tr :ul)) + + (defparameter *paragraph-elements* + '(:area :base :blockquote :br :button :caption :col :dd :div :dt :h1 + :h2 :h3 :h4 :h5 :h6 :hr :input :li :link :meta :option :p :param + :td :textarea :th :title)) + + (defparameter *inline-elements* + '(:a :abbr :acronym :address :b :bdo :big :cite :code :del :dfn :em + :i :img :ins :kbd :label :legend :q :samp :small :span :strong :sub + :sup :tt :var)) + + The functions block-element-p and paragraph-element-p test whether a +given tag is a member of the corresponding list. (1) + + (defun block-element-p (tag) (find tag *block-elements*)) + + (defun paragraph-element-p (tag) (find tag *paragraph-elements*)) + + Two other categorizations with their own predicates are the elements +that are always empty, such as br and hr, and the three elements, pre, +style, and script, in which whitespace is supposed to be preserved. The +former are handled specially when generating regular HTML (in other +words, not XHTML) since they're not supposed to have a closing tag. And +when emitting the three tags in which whitespace is preserved, you can +temporarily turn off indentation so the pretty printer doesn't add any +spaces that aren't part of the element's actual contents. + + (defparameter *empty-elements* + '(:area :base :br :col :hr :img :input :link :meta :param)) + + (defparameter *preserve-whitespace-elements* '(:pre :script :style)) + + (defun empty-element-p (tag) (find tag *empty-elements*)) + + (defun preserve-whitespace-p (tag) (find tag *preserve-whitespace-elements*)) + + The last piece of information you need when generating HTML is +whether you're generating XHTML since that affects how you emit empty +elements. + + (defparameter *xhtml* nil) + + With all that information, you're ready to process a cons FOO form. +You use parse-cons-form to parse the list into three parts, the tag +symbol, a possibly empty plist of attribute key/value pairs, and a +possibly empty list of body forms. You then emit the opening tag, the +body, and the closing tag with the helper functions emit-open-tag, +emit-element-body, and emit-close-tag. + + (defun process-cons-sexp-html (processor form) + (when (string= *escapes* *attribute-escapes*) + (error "Can't use cons forms in attributes: ~a" form)) + (multiple-value-bind (tag attributes body) (parse-cons-form form) + (emit-open-tag processor tag body attributes) + (emit-element-body processor tag body) + (emit-close-tag processor tag body))) + + In emit-open-tag you have to call freshline when appropriate and then +emit the attributes with emit-attributes. You need to pass the +element's body to emit-open-tag so when it's emitting XHTML, it knows +whether to finish the tag with /> or >. + + (defun emit-open-tag (processor tag body-p attributes) + (when (or (paragraph-element-p tag) (block-element-p tag)) + (freshline processor)) + (raw-string processor (format nil "<~(~a~)" tag)) + (emit-attributes processor attributes) + (raw-string processor (if (and *xhtml* (not body-p)) "/>" ">"))) + + In emit-attributes the attribute names aren't evaluated since they +must be keyword symbols, but you should invoke the top-level process +function to evaluate the attribute values, binding *escapes* to +*attribute-escapes*. As a convenience for specifying boolean +attributes, whose value should be the name of the attribute, if the +value is T-not just any true value but actually T-then you replace the +value with the name of the attribute. (2) + + (defun emit-attributes (processor attributes) + (loop for (k v) on attributes by #'cddr do + (raw-string processor (format nil " ~(~a~)='" k)) + (let ((*escapes* *attribute-escapes*)) + (process processor (if (eql v t) (string-downcase k) v))) + (raw-string processor "'"))) + + Emitting the element's body is similar to emitting the attribute +values: you can loop through the body calling process to evaluate each +form. The rest of the code is dedicated to emitting fresh lines and +adjusting the indentation as appropriate for the type of element. + + (defun emit-element-body (processor tag body) + (when (block-element-p tag) + (freshline processor) + (indent processor)) + (when (preserve-whitespace-p tag) (toggle-indenting processor)) + (dolist (item body) (process processor item)) + (when (preserve-whitespace-p tag) (toggle-indenting processor)) + (when (block-element-p tag) + (unindent processor) + (freshline processor))) + + Finally, emit-close-tag, as you'd probably expect, emits the closing +tag (unless no closing tag is necessary, such as when the body is empty +and you're either emitting XHTML or the element is one of the special +empty elements). Regardless of whether you actually emit a close tag, +you need to emit a final fresh line for block and paragraph elements. + + (defun emit-close-tag (processor tag body-p) + (unless (and (or *xhtml* (empty-element-p tag)) (not body-p)) + (raw-string processor (format nil "" tag))) + (when (or (paragraph-element-p tag) (block-element-p tag)) + (freshline processor))) + + The function process is the basic FOO interpreter. To make it a bit +easier to use, you can define a function, emit-html, that invokes +process, passing it an html-pretty-printer and a form to evaluate. You +can define and use a helper function, get-pretty-printer, to get the +pretty printer, which returns the current value of *html-pretty-printer* +if it's bound; otherwise, it makes a new instance of html-pretty-printer +with *html-output* as its output stream. + + (defun emit-html (sexp) (process (get-pretty-printer) sexp)) + + (defun get-pretty-printer () + (or *html-pretty-printer* + (make-instance + 'html-pretty-printer + :printer (make-instance 'indenting-printer :out *html-output*)))) + + With this function, you can emit HTML to *html-output*. Rather than +expose the variable *html-output* as part of FOO's public API, you +should define a macro, with-html-output, that takes care of binding the +stream for you. It also lets you specify whether you want pretty HTML +output, defaulting to the value of the variable *pretty*. + + (defmacro with-html-output ((stream &key (pretty *pretty*)) &body body) + `(let* ((*html-output* ,stream) + (*pretty* ,pretty)) + ,@body)) + + So, if you wanted to use emit-html to generate HTML to a file, you +could write the following: + + (with-open-file (out "foo.html" :direction output) + (with-html-output (out :pretty t) + (emit-html *some-foo-expression*))) + + ---------- Footnotes ---------- + + (1) You don't need a predicate for *inline-elements* since you only +ever test for block and paragraph elements. I include the parameter +here for completeness. + + (2) While XHTML requires boolean attributes to be notated with their +name as the value to indicate a true value, in HTML it's also legal to +simply include the name of the attribute with no value, for example, +