add a lib for rendering text in sdl2 and rules to make in makefile

This commit is contained in:
2026-01-21 14:45:11 -06:00
parent f3c0aafe2f
commit e7d9c013f4
1754 changed files with 850297 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.1 KiB

View File

@@ -0,0 +1,8 @@
<svg xmlns="http://www.w3.org/2000/svg" width="256" height="256" viewBox="0 0 682.667 682.667">
<!-- Harf -->
<path d="M168.422 500.826c-23.477-.144-41.602-1.179-54.375-3.125-22.227-3.47-38.06-10.137-47.5-20-8.477-7.637-12.709-18.054-12.709-31.25 0-5.137.482-12.5 1.459-22.084a406.941 406.941 0 013.958-28.75c.69-4.023 1.315-7.636 1.875-10.833.417-2.22.97-4.024 1.667-5.417.69-1.386 1.458-2.187 2.291-2.396.834-.208 1.628.352 2.396 1.667.762 1.322 1.491 3.58 2.188 6.77a114.35 114.35 0 002.291 10.834c2.22 9.03 8.262 15.768 18.125 20.209 25 10.696 63.19 16.25 114.584 16.666 21.25-.273 40.97-1.041 59.166-2.291 18.607-1.387 39.584-4.649 62.917-9.792 23.333-5.137 41.908-10.762 55.73-16.875 13.814-6.107 25.585-12.708 35.312-19.792 7.636-5.553 11.458-10.898 11.458-16.041 0-2.084-1.98-4.545-5.938-7.396-3.958-2.845-6.562-4.271-7.812-4.271-2.363 1.113-5.143 1.98-8.333 2.604-3.197.625-6.114.8-8.75.52-7.364-.832-12.852-3.124-16.459-6.874-3.613-3.75-5.416-8.815-5.416-15.208 0-4.024 5.345-16.667 16.041-37.917 4.304-8.607 8.086-14.616 11.354-18.02 3.262-3.4 7.741-5.105 13.438-5.105 9.023 0 17.396 4.863 25.104 14.583 7.708 9.727 11.563 22.155 11.563 37.292 0 15-2.019 27.988-6.042 38.958-5 15-11.81 28.893-20.417 41.667-10.833 15.417-22.708 27.604-35.625 36.562-12.916 8.959-25.976 16.081-39.166 21.355-11.81 5.833-27.051 11.289-45.73 16.354-18.684 5.071-41.354 9.205-68.02 12.396-26.394 3.196-49.935 4.856-70.625 5zM430.222 133.162c1.107 4.03 1.074 8.196-.104 12.5-1.185 4.31-3.373 8.821-6.563 13.541-1.946 3.197-3.997 6.081-6.146 8.646a114.073 114.073 0 01-6.77 7.396c-.697.833-1.81 1.875-3.334 3.125a60.28 60.28 0 01-4.791 3.542c-1.667 1.113-3.269 1.914-4.792 2.396-1.53.488-2.643.312-3.333-.521-5-4.024-9.935-8.125-14.792-12.292-4.863-4.167-10.208-8.053-16.042-11.667-.976-.69-1.426-1.354-1.354-1.979.065-.625.45-1.562 1.146-2.812l33.333-50.834c.97-1.386 1.94-2.044 2.917-1.979.97.072 2.012.456 3.125 1.146a102.942 102.942 0 018.75 5.625 70.208 70.208 0 018.125 6.77 47.58 47.58 0 016.562 7.918c1.875 2.851 3.23 6.009 4.063 9.479zM599.13 319.784a589.994 589.994 0 01-9.583 16.354 123.488 123.488 0 01-11.146 15.52 97.741 97.741 0 01-13.542 13.23c-4.935 3.958-10.52 7.122-16.77 9.48-5.144 2.083-9.519 4.413-13.126 6.978-3.613 2.572-6.601 5.313-8.958 8.23-3.613 3.893-6.738 9.27-9.375 16.145a2240.458 2240.458 0 00-8.23 21.771 307.032 307.032 0 01-9.374 22.396c-3.405 7.292-7.676 13.3-12.813 18.02-7.083 7.5-17.571 12.573-31.458 15.21l-38.125 6.458c-1.113.143-2.467.247-4.063.312-1.601.072-2.747.04-3.437-.104-3.19-.97-4.167-3.086-2.917-6.354 1.25-3.262 4.512-6.563 9.792-9.896 10.69-7.083 19.232-12.813 25.625-17.188a6164.29 6164.29 0 0015.625-10.729c4.023-2.773 7.253-5.065 9.687-6.875a271.19 271.19 0 007.813-6.041c11.25-8.47 19.023-17.357 23.333-26.667 2.5-4.857 4.792-9.583 6.875-14.167a305.502 305.502 0 016.563-13.541 226.164 226.164 0 017.396-13.23c2.636-4.375 5.69-8.782 9.166-13.229 3.607-4.44 8.47-8.782 14.584-13.02 6.106-4.232 13.47-8.438 22.083-12.605.97-.416 2.565-1.354 4.792-2.812 2.22-1.459 4.928-3.783 8.125-6.98 3.19-3.19 6.836-7.532 10.937-13.02 4.095-5.482 8.438-12.461 13.021-20.938-1.667-1.666-6.458-3.958-14.375-6.875-7.917-3.053-15.417-4.791-22.5-5.208-6.53-.417-11.914.768-16.146 3.542-4.238 2.78-8.092 6.738-11.562 11.875-3.477 5.143-6.459 7.5-8.959 7.083-.833-.417-.976-2.149-.416-5.208a51.587 51.587 0 012.916-8.959c5.274-12.083 11.25-21.041 17.917-26.875 6.803-5.97 14.928-8.958 24.375-8.958 6.803 0 15.345 1.667 25.625 5 6.387 2.226 11.875 3.333 16.458 3.333 9.584 0 19.095-3.398 28.542-10.208 2.083 0 2.812 1.354 2.187 4.062-.625 2.709-1.562 5.873-2.812 9.48-1.113 3.613-1.875 6.041-2.292 7.291-2.083 6.53-3.75 10.352-5 11.459-1.25.976-4.863 3.613-10.833 7.916a42.505 42.505 0 00-8.646 8.646c-2.435 3.268-4.896 7.122-7.396 11.563-2.5 4.446-5.696 10.56-9.583 18.333zm0 0"/>
<!-- B -->
<path d="M229.342 300.489c2.084-8.887 4.375-16.667 6.875-23.333 2.5-6.667 4.935-12.188 7.292-16.563 2.363-4.375 4.655-7.74 6.875-10.104 2.226-2.357 4.167-3.75 5.833-4.167v13.75c0 17.5 4.649 29.727 13.959 36.667 5.69 3.75 11.354 5 16.979 3.75s9.928-4.167 12.916-8.75c2.982-4.583 5.521-9.96 7.605-16.146 2.083-6.178 4.095-9.27 6.041-9.27 2.22 0 3.125 2.226 2.709 6.666-.144 7.5-2.468 19.688-6.98 36.563-4.518 16.875-10.625 30.729-18.333 41.562-7.708 10.833-17.05 16.53-28.02 17.083-11.25-.416-20-6.731-26.25-18.958-5.69-12.083-8.19-27.708-7.5-46.875zM286.783 583.04c1.107 4.023 1.075 8.19-.104 12.5-1.185 4.303-3.372 8.815-6.562 13.542-1.947 3.19-3.998 6.074-6.146 8.645a117.59 117.59 0 01-6.771 7.396c-.697.834-1.81 1.875-3.333 3.125a61.927 61.927 0 01-4.792 3.542c-1.667 1.107-3.268 1.907-4.792 2.396-1.53.481-2.643.312-3.333-.521-5-4.03-9.935-8.125-14.792-12.292-4.856-4.166-10.208-8.06-16.041-11.666-.97-.697-1.42-1.355-1.354-1.98.071-.625.455-1.562 1.145-2.812l33.334-50.833c.97-1.394 1.94-2.051 2.916-1.98.97.066 2.012.45 3.125 1.146a102.94 102.94 0 018.75 5.625 69.555 69.555 0 018.125 6.771 47.057 47.057 0 016.563 7.917c1.875 2.845 3.229 6.002 4.062 9.479z"/>
<!-- uzz -->
<path d="M205.592 196.114c-5.976 36.667-11.354 64.03-16.146 82.083-4.791 18.06-9.134 31.843-13.02 41.355-3.894 9.518-8.164 16.77-12.813 21.77-4.655 5-13.197 9.76-25.625 14.271-12.435 4.518-22.812 7.676-31.146 9.48-8.333 1.81-13.541 2.78-15.625 2.916-2.083-.137-3.405-.52-3.958-1.146-.56-.625-.072-2.116 1.458-4.479 2.637-3.75 8.158-8.887 16.563-15.417 8.398-6.523 18.333-14.648 29.791-24.375 11.459-9.72 20.033-18.294 25.73-25.729 5.69-7.428 10.729-17.565 15.104-30.416 4.375-12.846 8.646-26.283 12.812-40.313 4.167-14.023 7.188-23.542 9.063-28.542 1.875-5 3.782-7.428 5.729-7.291 2.22.416 2.917 2.363 2.083 5.833zm14.792-114.375c1.107 4.03 1.074 8.197-.104 12.5-1.185 4.31-3.373 8.822-6.563 13.542-1.946 3.196-3.997 6.08-6.146 8.646a114.072 114.072 0 01-6.77 7.395c-.697.834-1.81 1.875-3.334 3.125a60.279 60.279 0 01-4.791 3.542c-1.667 1.113-3.269 1.914-4.792 2.396-1.53.488-2.643.312-3.333-.521-5-4.023-9.935-8.125-14.792-12.292-4.863-4.166-10.208-8.053-16.042-11.666-.976-.69-1.426-1.354-1.354-1.98.065-.624.45-1.562 1.146-2.812l33.333-50.833c.97-1.387 1.94-2.045 2.917-1.98.97.072 2.012.456 3.125 1.146a102.945 102.945 0 018.75 5.625 70.208 70.208 0 018.125 6.771 47.58 47.58 0 016.562 7.917c1.875 2.851 3.23 6.009 4.063 9.479zm0 0M256.217 250.489c-1.113 13.893-4.238 25.697-9.375 35.417-5.143 9.726-10.976 14.583-17.5 14.583 0-5.273 1.7-21.667 5.104-49.167 3.399-27.5 5.97-47.187 7.709-59.062 1.731-11.875 3.71-24.48 5.937-37.813 2.22-13.333 4.375-24.095 6.459-32.291a574.618 574.618 0 017.604-20.625 683.688 683.688 0 018.437-20.625c1.94-3.054 3.19-2.565 3.75 1.458-3.476 23.06-6.666 46.98-9.583 71.77-2.917 24.793-4.896 42.54-5.938 53.23-1.041 10.697-1.562 16.947-1.562 18.75-.697 11.393-1.042 19.518-1.042 24.375zm0 0"/>
</svg>

After

Width:  |  Height:  |  Size: 6.6 KiB

View File

@@ -0,0 +1,123 @@
# Process this file with automake to produce Makefile.in
# We require automake 1.6 at least.
AUTOMAKE_OPTIONS = 1.6
# This is a blank Makefile.am for using gtk-doc.
# Copy this to your project's API docs directory and modify the variables to
# suit your project. See the GTK+ Makefiles in gtk+/docs/reference for examples
# of using the various options.
# The name of the module, e.g. 'glib'.
DOC_MODULE=harfbuzz
# Uncomment for versioned docs and specify the version of the module, e.g. '2'.
#DOC_MODULE_VERSION=$(HB_VERSION_MAJOR)
# The top-level SGML file. You can change this if you want to.
DOC_MAIN_SGML_FILE=$(DOC_MODULE)-docs.xml
# Directories containing the source code.
# gtk-doc will search all .c and .h files beneath these paths
# for inline comments documenting functions and macros.
# e.g. DOC_SOURCE_DIR=$(top_srcdir)/gtk $(top_srcdir)/gdk
DOC_SOURCE_DIR=$(top_srcdir)/src $(top_builddir)/src
# Extra options to pass to gtkdoc-scangobj. Not normally needed.
SCANGOBJ_OPTIONS=
# Extra options to supply to gtkdoc-scan.
# e.g. SCAN_OPTIONS=--deprecated-guards="GTK_DISABLE_DEPRECATED"
SCAN_OPTIONS=--rebuild-types --deprecated-guards="HB_DISABLE_DEPRECATED" \
--ignore-decorators='HB_EXTERN|HB_DEPRECATED|HB_DEPRECATED_FOR()'
# Header files or dirs to ignore when scanning. Use base file/dir names
# e.g. IGNORE_HFILES=gtkdebug.h gtkintl.h private_code
IGNORE_HFILES=`cd $(top_srcdir)/src; find . -path './*/*.h' | sed 's@^.*/@@'`
IGNORE_HFILES+=hb-gobject.h hb-gobject-enums.h hb-gobject-structs.h
# Extra options to supply to gtkdoc-mkdb.
# e.g. MKDB_OPTIONS=--xml-mode --output-format=xml
MKDB_OPTIONS=--source-suffixes=h,cc --xml-mode --output-format=xml --ignore-files="$(IGNORE_HFILES)"
# Extra options to supply to gtkdoc-mktmpl
# e.g. MKTMPL_OPTIONS=--only-section-tmpl
MKTMPL_OPTIONS=
# Extra options to supply to gtkdoc-mkhtml
MKHTML_OPTIONS=
# Extra options to supply to gtkdoc-fixref. Not normally needed.
# e.g. FIXXREF_OPTIONS=--extra-dir=../gdk-pixbuf/html --extra-dir=../gdk/html
FIXXREF_OPTIONS=
# Used for dependencies. The docs will be rebuilt if any of these change.
# e.g. HFILE_GLOB=$(top_srcdir)/gtk/*.h
# e.g. CFILE_GLOB=$(top_srcdir)/gtk/*.c
HFILE_GLOB=$(top_srcdir)/src/hb.h $(top_srcdir)/src/hb-*.h
CFILE_GLOB=$(top_srcdir)/src/hb-*.cc
# Extra header to include when scanning, which are not under DOC_SOURCE_DIR
# e.g. EXTRA_HFILES=$(top_srcdir}/contrib/extra.h
# Images to copy into HTML directory.
# e.g. HTML_IMAGES=$(top_srcdir)/gtk/stock-icons/stock_about_24.png
HTML_IMAGES= \
HarfBuzz.png \
HarfBuzz.svg
# Extra SGML files that are included by $(DOC_MAIN_SGML_FILE).
# e.g. content_files=running.sgml building.sgml changes-2.0.sgml
content_files= \
usermanual-what-is-harfbuzz.xml \
usermanual-install-harfbuzz.xml \
usermanual-getting-started.xml \
usermanual-glyph-information.xml \
usermanual-shaping-concepts.xml \
usermanual-object-model.xml \
usermanual-buffers-language-script-and-direction.xml \
usermanual-fonts-and-faces.xml \
usermanual-opentype-features.xml \
usermanual-clusters.xml \
usermanual-utilities.xml \
usermanual-integration.xml \
version.xml
# SGML files where gtk-doc abbreviations (#GtkWidget) are expanded
# These files must be listed here *and* in content_files
# e.g. expand_content_files=running.sgml
expand_content_files=
# CFLAGS and LDFLAGS for compiling gtkdoc-scangobj with your library.
# Only needed if you are using gtkdoc-scangobj to dynamically query widget
# signals and properties.
# e.g. GTKDOC_CFLAGS=-I$(top_srcdir) -I$(top_builddir) $(GTK_DEBUG_FLAGS)
# e.g. GTKDOC_LIBS=$(top_builddir)/gtk/$(gtktargetlib)
GTKDOC_CFLAGS=
GTKDOC_LIBS=$(top_builddir)/src/libharfbuzz.la
if HAVE_GOBJECT
GTKDOC_LIBS+=$(top_builddir)/src/libharfbuzz-gobject.la
endif
# This includes the standard gtk-doc make rules, copied by gtkdocize.
include $(top_srcdir)/gtk-doc.make
# Other files to distribute
# e.g. EXTRA_DIST += version.xml.in
EXTRA_DIST += version.xml.in meson.build
# Files not to distribute
# for --rebuild-types in $(SCAN_OPTIONS), e.g. $(DOC_MODULE).types
# for --rebuild-sections in $(SCAN_OPTIONS) e.g. $(DOC_MODULE)-sections.txt
#DISTCLEANFILES +=
# Comment this out if you don't want 'make check' to test you doc status
# and run some sanity checks
if ENABLE_GTK_DOC
TESTS_ENVIRONMENT = cd $(srcdir) && \
DOC_MODULE=$(DOC_MODULE) DOC_MAIN_SGML_FILE=$(DOC_MAIN_SGML_FILE) \
SRCDIR=$(abs_srcdir) BUILDDIR=$(abs_builddir)
#TESTS = $(GTKDOC_CHECK)
endif
-include $(top_srcdir)/git.mk

View File

@@ -0,0 +1,259 @@
digraph {
graph [outputorder=edgefirst];
node [shape="record", fontname="Noto Sans Mono SemiBold", fontsize=15];
edge [fontname="Verdana", fontsize=12,labeldistance=7.5 ];
fontname="Verdana";
ranksep=0.02; nodesep=0.5;
subgraph {
ranksep="0.02 equally";
preprocessing[style=filled,fillcolor="lightgreen",fontname="Verdana",label="Glyph pre-processing"];
orthographic[style=filled,fillcolor="lightblue",fontname="Verdana",label="Orthographic Unit Shaping"];
reordering[style=filled, fillcolor="lightcoral",fontname="Verdana",label="Reordering group (USE)"];
topographic[style=filled,fillcolor="lightgoldenrod",fontname="Verdana",label="Topographical Features‡"];
typographic[style=filled,fillcolor="lightpink",fontname="Verdana",label="Typographic Presentation"];
positioning[style=filled,fillcolor="lightsalmon",fontname="Verdana",label="Positioning"];
preprocessing->reordering->orthographic->topographic->typographic->positioning;
}
decision1 [shape="diamond", label="Script\ndirection?",fontname="Verdana"];
rvrn->decision1;
ltrfeatures [label="{ltra|ltrm}", fillcolor="lightgreen",style="filled"];
{
rtlfeatures [label="{rtla|rtlm¹}", fillcolor="lightgreen",style="filled"];
}
{
rank=same;
fracfeatures [label="frac²|numr³|dnom⁴", fillcolor="lightpink",style="filled"];
fracnotes [fontname="Verdana",shape=plaintext,label=<<table border="0" cellborder="0" cellspacing="0">
<tr><td align="left">¹ rtlm is scoped to characters with a Unicode mirroring property</td></tr>
<tr><td align="left">² frac is scoped to numr + the slash + dnom</td></tr>
<tr><td align="left">³ numr is scoped to all decimal numbers before a U+2044 FRACTION SLASH.</td></tr>
<tr><td align="left">⁴ dnom is scoped to all decimal numbers after a U+2044 FRACTION SLASH.</td></tr>
</table>
>];
}
rand [fillcolor="lightpink",style="filled"];
decision1 -> ltrfeatures [label="Left-to-right"];
decision1 -> rtlfeatures [label="Right-to-left"];
decision1 -> fracfeatures [label="Other"];
ltrfeatures -> fracfeatures;
rtlfeatures -> fracfeatures;
fracfeatures->rand;
decision2 [shape="diamond", label="Script?",fontname="Verdana"];
{rank=same; HARF [label="{Harf|HARF}"]; notes;}
rand -> trak -> HARF -> decision2;
commonfeatures [shape=none,label=<<table border="0" cellspacing="0">
<tr>
<td border="1" bgcolor="lightsalmon">abvm</td>
<td border="1" bgcolor="lightsalmon">blwm</td>
<td border="1" bgcolor="lightgreen">ccmp</td>
<td border="1" bgcolor="lightgreen">locl</td>
<td border="1" bgcolor="lightsalmon">mark</td>
<td border="1" bgcolor="lightsalmon">mkmk</td>
<td border="1" bgcolor="lightpink">rlig</td>
</tr>
</table>>
];
decision3 [shape="diamond", label="Script\ndirection?",fontname="Verdana"];
BUZZ [label="{Buzz|BUZZ}"];
BUZZ -> commonfeatures -> decision3;
horizontalfeatures [
shape=none,label=<<table border="0" cellspacing="0">
<tr><td border="1" bgcolor="lightpink">calt <font face="Verdana">(not Hangul)</font></td></tr>
<tr><td border="1" bgcolor="lightpink">clig <font face="Verdana">(not Khmer)</font></td></tr>
<tr><td border="1" bgcolor="lightsalmon">curs</td></tr>
<tr><td border="1" bgcolor="lightsalmon">dist</td></tr>
<tr><td border="1" bgcolor="lightsalmon">kern</td></tr>
<tr><td border="1" bgcolor="lightpink">liga <font face="Verdana">(not Khmer)</font></td></tr>
<tr><td border="1" bgcolor="lightpink">rclt</td></tr>
</table>>
];
vert [label="vert",style=filled,fillcolor="lightpink"];
decision3 -> horizontalfeatures [label="Horizontal"];
decision3 -> vert [label="Vertical"];
discretionary [label="User-selected\ndiscretionary\nfeatures",fontname="Verdana"];
horizontalfeatures -> discretionary;
vert -> discretionary;
decision2->stch;
BUZZ;
subgraph shapers {
subgraph cluster_arabic {
bgcolor="lightyellow"
label="Arabic, Syriac";
stch [ style="filled", fillcolor="lightgreen",label="stch"];
ccmplocl [ style="filled", label="ccmp|locl", fillcolor="lightgreen"];
arabicfeatures [label="isol|fina|fin2|fin3|medi|med2|init", style="filled", fillcolor="lightgoldenrod"];
arabicfeatures2 [label="rclt|calt", style="filled",fillcolor="lightpink"];
rlig[style="filled",fillcolor="lightpink"];
mset [fillcolor="lightpink",style="filled"]
stch->ccmplocl->arabicfeatures->rlig->arabicfeatures2->mset;
}
mset->BUZZ:n;
subgraph cluster_hangul {
bgcolor="lightyellow"
label="Hangul";
hangulfeatures [label="ljmo|vjmo|tjmo", style="filled",fillcolor="lightgoldenrod"]
}
hangulfeatures->BUZZ:n;
subgraph cluster_indic {
label="Indic";
bgcolor="lightyellow"
// Preprocessing
loclccmpindic [label="locl†|ccmp†",style=filled,fillcolor="lightgreen"];
node[style=filled,fillcolor="lightgreen"];
nukt [label="nukt†"];
akhn [label="akhn†"];
loclccmpindic->indic_reorder_1->nukt->akhn;
indic_reorder_1[label="Initial reordering", fontname="Verdana",fillcolor="lightgrey",shape=ellipse,style=filled]
// Orthographic
node[style=filled,fillcolor="lightblue"]
rphf [label="rphf⁵"];
rkpf [label="rkpf†"];
pref [label="pref⁶"];
blwf [label="blwf⁷"];
abvf [label="abvf⁸"];
half [label="half⁹"];
pstf [label="pstf⁸"];
vatu [label="vatu†"];
cjct [label="cjct†"];
akhn ->rphf -> rkpf -> pref -> blwf -> abvf -> half -> pstf -> vatu -> cjct;
// Typographic presentation
indic_typographic[style=filled,fillcolor="lightpink",label="init|pres|abvs|blws|psts|haln"]
indic_reorder_2[label="Final reordering",fillcolor="lightgrey",fontname="Verdana", shape=ellipse,style=filled]
cjct->indic_reorder_2->indic_typographic;
notes2 [fontname="Verdana",shape=plaintext,style="",label=<<table border="0" cellborder="0" cellspacing="0">
<tr><td align="right">⁵ rphf is scoped to pre-base ra+halant sequences</td></tr>
<tr><td align="right">⁶ pref is scoped to the two glyphs after the base; outputs are reordered</td></tr>
<tr><td align="right">⁷ blwf is usually scoped to the whole syllable, except in Telugu and Kannada where it is post-base</td></tr>
<tr><td align="right">⁸ abvf and pstf are scoped to post-base</td></tr>
<tr><td align="right">⁹ half is scoped to pre-base</td></tr>
</table>
>];
indic_typographic -> notes2 [style=invis];
}
subgraph cluster_khmer {
label="Khmer";
bgcolor="lightyellow"
khmerbasic [style=filled,fillcolor="lightgreen",label="locl†|ccmp†|pref†|bwlf†|abvf†|pstf†|cfar†"]
khmerother [style=filled,fillcolor="lightpink",label="pres|abvs|blws|psts"]
khmerbasic -> khmerother -> khmerclig;
khmerclig [label="clig",style=filled,fillcolor="lightpink"];
}
subgraph cluster_myanmar {
label="Myanmar";
bgcolor="lightyellow"
loclccmpmyanmar [label="locl†|ccmp†",style=filled,fillcolor="lightgreen"];
rphfmymr [label="rphf†",style=filled,fillcolor="lightblue"]
prefmymr [label="pref†",style=filled,fillcolor="lightblue"]
blwfmymr [label="blwf†",style=filled,fillcolor="lightblue"]
pstfmymr [label="pstf†",style=filled,fillcolor="lightblue"]
myanmarother [label="pres|abvs|blws|psts",style=filled,fillcolor="lightpink"];
reorder_myanmar[label="Reordering", shape=ellipse,style=filled,fontname="Verdana"]
loclccmpmyanmar -> reorder_myanmar-> rphfmymr -> prefmymr -> blwfmymr -> pstfmymr -> myanmarother;
}
subgraph cluster_use {
label="Universal Shaping Engine"
bgcolor="lightyellow"
use_preprocessing [style=filled, label="locl†|ccmp†|nukt†|akhn†", fillcolor="lightgreen"];
// Reoredering
rphfuse [label="rphf¹⁰", style=filled, fillcolor="lightcoral"];
prefuse [label="pref¹¹", style=filled, fillcolor="lightcoral"];
// Orthographic
orthographicuse [label="rkrf†|abvf†|blwf†|half†|pstf†|vatu†|cjct†", style="filled", fillcolor="lightblue"];
topographicaluse [label="isol|init|medi|fina", style="filled", fillcolor="lightgoldenrod"];
typographicaluse [label="abvs|blws|haln|pres|psts", style="filled", fillcolor="lightpink"];
reorder_use[label="Reordering", shape=ellipse,style=filled,fontname="Verdana"]
use_preprocessing -> rphfuse -> prefuse->orthographicuse ->reorder_use -> topographicaluse -> typographicaluse;
notes3 [fontname="Verdana",shape=plaintext,label=<<table border="0" cellborder="0" cellspacing="0">
<tr><td align="left">¹⁰ Outputs are reordered as category R</td></tr>
<tr><td align="left">¹¹ Outputs are reordered to before base</td></tr>
</table>
>];
typographicaluse -> notes3 [style=invis];
}
}
indic_typographic->BUZZ:n;
typographicaluse->BUZZ:n;
khmerclig -> BUZZ:n;
myanmarother -> BUZZ:n;
decision2->hangulfeatures;
decision2->loclccmpindic;
decision2->khmerbasic;
decision2->loclccmpmyanmar;
decision2->use_preprocessing;
decision2->BUZZ [label=" Hebrew, Thai,\n Lao, other"];
notes [fontname="Verdana",shape=box,label=<<table border="0" cellborder="0" cellspacing="0">
<tr><td align="left">
<b>Indic</b> scripts are: Bengali, Devanagari,
Gujarati, Gurmukhi, Kannada,
Malayalam, Oriya, Tamil,
Telugu
</td></tr>
<tr><td align="left">
<b>USE</b> scripts are:
Adlam, Ahom, Balinese, Batak, Bhaiksuki, Brahmi, Buginese,
Buhid, Chakma, Cham, Chorasmian, Dives Akuru, Dogra, Duployan,
</td></tr>
<tr><td align="left">
Egyptian hieroglyphs, Elymaic, Grantha, Gunjala Ggondi, Hanifi Rohingya,
Hanunoo, Javanese, Kaithi, Kayah li, Kharoshthi, Khojki,
</td></tr>
<tr><td align="left">
Khudawadi, Lepcha, Limbu, Mahajani, Makasar, Mandaic, Manichaean,
Marchen, Masaram Gondi, Medefaidrin, Meetei Mayek, Miao, Modi,
</td></tr>
<tr><td align="left">
Mongolian, Multani, Nandinagari, Newa, Nko, Nyiakeng Puachue Hmong,
Old Sogdian, Pahawh Hmong, Phags Pa, Psalter Pahlavi, Rejang,
</td></tr>
<tr><td align="left">
Saurashtra, Sharada, Siddham, Sinhala, Sogdian, Soyombo, Sundanese,
Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham, Tai Viet,
</td></tr>
<tr><td align="left">
Takri, Tibetan, Tifinagh, Tirhuta, Wancho, Zanabazar square,
</td></tr>
</table>>]
footnote[fontname="Verdana",label=<<table border="0" cellborder="0" cellspacing="0">
<tr><td align="left">† Feature is scoped to each syllable</td></tr>
<tr><td align="left">‡ All topographic features are scoped based on topographic position</td></tr>
</table>>];
notes3->footnote[style=invis];
}

View File

@@ -0,0 +1,222 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<book id="index">
<bookinfo>
<title>HarfBuzz Manual</title>
<abstract>
<title>HarfBuzz</title>
<graphic fileref="HarfBuzz.png" format="PNG" align="center"/>
<para>
HarfBuzz is a text shaping library. Using the HarfBuzz library allows
programs to convert a sequence of Unicode input into
properly formatted and positioned glyph output&mdash;for any writing
system and language.
</para>
<para>
The canonical source-code tree is available at
<ulink url="https://github.com/harfbuzz/harfbuzz">github.com/harfbuzz/harfbuzz</ulink>.
See <xref linkend="download" endterm="download.title"/> for
release tarballs.
</para>
</abstract>
</bookinfo>
<part id="user-manual">
<title>User's manual</title>
<xi:include href="usermanual-what-is-harfbuzz.xml"/>
<xi:include href="usermanual-install-harfbuzz.xml"/>
<xi:include href="usermanual-getting-started.xml"/>
<xi:include href="usermanual-shaping-concepts.xml"/>
<xi:include href="usermanual-object-model.xml"/>
<xi:include href="usermanual-buffers-language-script-and-direction.xml"/>
<xi:include href="usermanual-fonts-and-faces.xml"/>
<xi:include href="usermanual-opentype-features.xml"/>
<xi:include href="usermanual-clusters.xml"/>
<xi:include href="usermanual-utilities.xml"/>
<xi:include href="usermanual-integration.xml"/>
</part>
<part id="reference-manual">
<partinfo>
<releaseinfo>
This document is for HarfBuzz &version;.
<!--The latest version of this documentation can be found on-line at
<ulink role="online-location" url="http://[SERVER]/libharfbuzz/index.html">http://[SERVER]/libharfbuzz/</ulink>.-->
</releaseinfo>
</partinfo>
<title>Reference manual</title>
<chapter id="core-api">
<title>Core API</title>
<xi:include href="xml/hb-blob.xml"/>
<xi:include href="xml/hb-buffer.xml"/>
<xi:include href="xml/hb-common.xml"/>
<xi:include href="xml/hb-features.xml"/>
<xi:include href="xml/hb-draw.xml"/>
<xi:include href="xml/hb-paint.xml"/>
<xi:include href="xml/hb-deprecated.xml"/>
<xi:include href="xml/hb-face.xml"/>
<xi:include href="xml/hb-font.xml"/>
<xi:include href="xml/hb-map.xml"/>
<xi:include href="xml/hb-set.xml"/>
<xi:include href="xml/hb-shape-plan.xml"/>
<xi:include href="xml/hb-shape.xml"/>
<xi:include href="xml/hb-unicode.xml"/>
<xi:include href="xml/hb-version.xml"/>
</chapter>
<chapter id="opentype-api">
<title>OpenType API</title>
<xi:include href="xml/hb-ot-color.xml"/>
<xi:include href="xml/hb-ot-font.xml"/>
<xi:include href="xml/hb-ot-layout.xml"/>
<xi:include href="xml/hb-ot-math.xml"/>
<xi:include href="xml/hb-ot-meta.xml"/>
<xi:include href="xml/hb-ot-metrics.xml"/>
<xi:include href="xml/hb-ot-name.xml"/>
<xi:include href="xml/hb-ot-shape.xml"/>
<xi:include href="xml/hb-ot-var.xml"/>
</chapter>
<chapter id="apple-advanced-typography-api">
<title>Apple Advanced Typography API</title>
<xi:include href="xml/hb-aat-layout.xml"/>
</chapter>
<chapter id="integration-api">
<title>Integration API</title>
<xi:include href="xml/hb-coretext.xml"/>
<xi:include href="xml/hb-ft.xml"/>
<xi:include href="xml/hb-glib.xml"/>
<xi:include href="xml/hb-graphite2.xml"/>
<xi:include href="xml/hb-icu.xml"/>
<xi:include href="xml/hb-uniscribe.xml"/>
<xi:include href="xml/hb-gdi.xml"/>
<xi:include href="xml/hb-directwrite.xml"/>
<xi:include href="xml/hb-cairo.xml"/>
</chapter>
<chapter id="style-api">
<title>Style API</title>
<xi:include href="xml/hb-style.xml"/>
</chapter>
<chapter id="subset-api">
<title>Subset API</title>
<xi:include href="xml/hb-subset.xml"/>
</chapter>
<!--chapter id="object-tree">
<title>Object Hierarchy</title>
<xi:include href="xml/tree_index.sgml"/>
</chapter-->
<index id="api-index-full"><title>API Index</title><xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include></index>
<index id="deprecated-api-index"><title>Index of deprecated API</title><xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include></index>
<index id="api-index-8-1-0"><title>Index of new symbols in 8.1.0</title><xi:include href="xml/api-index-8.1.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-8-0-0"><title>Index of new symbols in 8.0.0</title><xi:include href="xml/api-index-8.0.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-7-3-0"><title>Index of new symbols in 7.3.0</title><xi:include href="xml/api-index-7.3.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-7-1-0"><title>Index of new symbols in 7.1.0</title><xi:include href="xml/api-index-7.1.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-7-0-0"><title>Index of new symbols in 7.0.0</title><xi:include href="xml/api-index-7.0.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-6-0-0"><title>Index of new symbols in 6.0.0</title><xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-5-3-0"><title>Index of new symbols in 5.3.0</title><xi:include href="xml/api-index-5.3.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-5-0-0"><title>Index of new symbols in 5.0.0</title><xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-4-4-0"><title>Index of new symbols in 4.4.0</title><xi:include href="xml/api-index-4.4.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-4-3-0"><title>Index of new symbols in 4.3.0</title><xi:include href="xml/api-index-4.3.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-4-2-0"><title>Index of new symbols in 4.2.0</title><xi:include href="xml/api-index-4.2.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-4-1-0"><title>Index of new symbols in 4.1.0</title><xi:include href="xml/api-index-4.1.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-4-0-0"><title>Index of new symbols in 4.0.0</title><xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-3-4-0"><title>Index of new symbols in 3.4.0</title><xi:include href="xml/api-index-3.4.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-3-3-0"><title>Index of new symbols in 3.3.0</title><xi:include href="xml/api-index-3.3.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-3-1-0"><title>Index of new symbols in 3.1.0</title><xi:include href="xml/api-index-3.1.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-3-0-0"><title>Index of new symbols in 3.0.0</title><xi:include href="xml/api-index-3.0.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-9-1"><title>Index of new symbols in 2.9.1</title><xi:include href="xml/api-index-2.9.1.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-9-0"><title>Index of new symbols in 2.9.0</title><xi:include href="xml/api-index-2.9.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-8-2"><title>Index of new symbols in 2.8.2</title><xi:include href="xml/api-index-2.8.2.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-7-3"><title>Index of new symbols in 2.7.3</title><xi:include href="xml/api-index-2.7.3.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-6-8"><title>Index of new symbols in 2.6.8</title><xi:include href="xml/api-index-2.6.8.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-6-5"><title>Index of new symbols in 2.6.5</title><xi:include href="xml/api-index-2.6.5.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-6-3"><title>Index of new symbols in 2.6.3</title><xi:include href="xml/api-index-2.6.3.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-6-0"><title>Index of new symbols in 2.6.0</title><xi:include href="xml/api-index-2.6.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-5-0"><title>Index of new symbols in 2.5.0</title><xi:include href="xml/api-index-2.5.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-4-0"><title>Index of new symbols in 2.4.0</title><xi:include href="xml/api-index-2.4.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-3-0"><title>Index of new symbols in 2.3.0</title><xi:include href="xml/api-index-2.3.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-2-0"><title>Index of new symbols in 2.2.0</title><xi:include href="xml/api-index-2.2.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-1-0"><title>Index of new symbols in 2.1.0</title><xi:include href="xml/api-index-2.1.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-2-0-0"><title>Index of new symbols in 2.0.0</title><xi:include href="xml/api-index-2.0.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-9-0"><title>Index of new symbols in 1.9.0</title><xi:include href="xml/api-index-1.9.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-8-6"><title>Index of new symbols in 1.8.6</title><xi:include href="xml/api-index-1.8.6.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-8-5"><title>Index of new symbols in 1.8.5</title><xi:include href="xml/api-index-1.8.5.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-8-1"><title>Index of new symbols in 1.8.1</title><xi:include href="xml/api-index-1.8.1.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-8-0"><title>Index of new symbols in 1.8.0</title><xi:include href="xml/api-index-1.8.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-7-7"><title>Index of new symbols in 1.7.7</title><xi:include href="xml/api-index-1.7.7.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-7-2"><title>Index of new symbols in 1.7.2</title><xi:include href="xml/api-index-1.7.2.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-6-0"><title>Index of new symbols in 1.6.0</title><xi:include href="xml/api-index-1.6.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-5-0"><title>Index of new symbols in 1.5.0</title><xi:include href="xml/api-index-1.5.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-4-3"><title>Index of new symbols in 1.4.3</title><xi:include href="xml/api-index-1.4.3.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-4-2"><title>Index of new symbols in 1.4.2</title><xi:include href="xml/api-index-1.4.2.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-4-0"><title>Index of new symbols in 1.4.0</title><xi:include href="xml/api-index-1.4.0.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-3-3"><title>Index of new symbols in 1.3.3</title><xi:include href="xml/api-index-1.3.3.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-2-3"><title>Index of new symbols in 1.2.3</title><xi:include href="xml/api-index-1.2.3.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-1-3"><title>Index of new symbols in 1.1.3</title><xi:include href="xml/api-index-1.1.3.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-1-2"><title>Index of new symbols in 1.1.2</title><xi:include href="xml/api-index-1.1.2.xml"><xi:fallback /></xi:include></index>
<index id="api-index-1-0-5"><title>Index of new symbols in 1.0.5</title><xi:include href="xml/api-index-1.0.5.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-42"><title>Index of new symbols in 0.9.42</title><xi:include href="xml/api-index-0.9.42.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-41"><title>Index of new symbols in 0.9.41</title><xi:include href="xml/api-index-0.9.41.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-39"><title>Index of new symbols in 0.9.39</title><xi:include href="xml/api-index-0.9.39.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-38"><title>Index of new symbols in 0.9.38</title><xi:include href="xml/api-index-0.9.38.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-33"><title>Index of new symbols in 0.9.33</title><xi:include href="xml/api-index-0.9.33.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-31"><title>Index of new symbols in 0.9.31</title><xi:include href="xml/api-index-0.9.31.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-30"><title>Index of new symbols in 0.9.30</title><xi:include href="xml/api-index-0.9.30.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-28"><title>Index of new symbols in 0.9.28</title><xi:include href="xml/api-index-0.9.28.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-26"><title>Index of new symbols in 0.9.26</title><xi:include href="xml/api-index-0.9.26.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-22"><title>Index of new symbols in 0.9.22</title><xi:include href="xml/api-index-0.9.22.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-21"><title>Index of new symbols in 0.9.21</title><xi:include href="xml/api-index-0.9.21.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-20"><title>Index of new symbols in 0.9.20</title><xi:include href="xml/api-index-0.9.20.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-11"><title>Index of new symbols in 0.9.11</title><xi:include href="xml/api-index-0.9.11.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-10"><title>Index of new symbols in 0.9.10</title><xi:include href="xml/api-index-0.9.10.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-8"><title>Index of new symbols in 0.9.8</title><xi:include href="xml/api-index-0.9.8.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-7"><title>Index of new symbols in 0.9.7</title><xi:include href="xml/api-index-0.9.7.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-5"><title>Index of new symbols in 0.9.5</title><xi:include href="xml/api-index-0.9.5.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-9-2"><title>Index of new symbols in 0.9.2</title><xi:include href="xml/api-index-0.9.2.xml"><xi:fallback /></xi:include></index>
<index id="api-index-0-6-0"><title>Index of new symbols in 0.6.0</title><xi:include href="xml/api-index-0.6.0.xml"><xi:fallback /></xi:include></index>
<xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include>
</part>
<note>
<para>
The current HarfBuzz codebase is stable
and under active maintenance. This is what is used in latest
versions of Firefox, GNOME, ChromeOS, Chrome, LibreOffice,
XeTeX, Android, and KDE, among other places.
</para>
<para>
Prior to 2012, the original HarfBuzz codebase (which, these days, is
referred to as <emphasis>harfbuzz-old</emphasis>) was derived from code
in <ulink url="http://freetype.org/">FreeType</ulink>,
<ulink url="http://pango.org/">Pango</ulink>, and
<ulink url="http://qt-project.org/">Qt</ulink>.
It is <emphasis>not</emphasis> actively developed or maintained, and is
extremely buggy. All users of harfbuzz-old are encouraged to switch over
to the new HarfBuzz as soon as possible.
</para>
<para>
To make this distinction clearer in discussions, the current HarfBuzz
codebase is sometimes referred to as <emphasis>harfbuzz-ng</emphasis>.
</para>
<para>
For reference purposes, the harfbuzz-old source tree is archived
<ulink url="http://cgit.freedesktop.org/harfbuzz.old/">here</ulink>.
There are no release tarballs of harfbuzz-old whatsoever.
</para>
</note>
</book>

View File

View File

@@ -0,0 +1,920 @@
<SECTION>
<FILE>hb-aat-layout</FILE>
HB_AAT_LAYOUT_NO_SELECTOR_INDEX
hb_aat_layout_feature_type_t
hb_aat_layout_feature_selector_t
hb_aat_layout_feature_selector_info_t
hb_aat_layout_feature_type_get_name_id
hb_aat_layout_feature_type_get_selector_infos
hb_aat_layout_get_feature_types
hb_aat_layout_has_positioning
hb_aat_layout_has_substitution
hb_aat_layout_has_tracking
</SECTION>
<SECTION>
<FILE>hb-blob</FILE>
hb_blob_create
hb_blob_create_or_fail
hb_blob_create_from_file
hb_blob_create_from_file_or_fail
hb_blob_create_sub_blob
hb_blob_copy_writable_or_fail
hb_blob_get_empty
hb_blob_reference
hb_blob_destroy
hb_blob_set_user_data
hb_blob_get_user_data
hb_blob_make_immutable
hb_blob_is_immutable
hb_blob_get_data
hb_blob_get_data_writable
hb_blob_get_length
hb_blob_t
hb_memory_mode_t
</SECTION>
<SECTION>
<FILE>hb-buffer</FILE>
hb_buffer_create
hb_buffer_allocation_successful
hb_buffer_create_similar
hb_buffer_get_empty
hb_buffer_reference
hb_buffer_destroy
hb_buffer_set_user_data
hb_buffer_get_user_data
hb_buffer_reset
hb_buffer_clear_contents
hb_buffer_pre_allocate
hb_buffer_add
hb_buffer_add_codepoints
hb_buffer_add_utf32
hb_buffer_add_utf16
hb_buffer_add_utf8
hb_buffer_add_latin1
hb_buffer_append
hb_buffer_set_content_type
hb_buffer_get_content_type
hb_buffer_set_direction
hb_buffer_get_direction
hb_buffer_set_script
hb_buffer_get_script
hb_buffer_set_language
hb_buffer_get_language
hb_buffer_set_flags
hb_buffer_get_flags
hb_buffer_set_cluster_level
hb_buffer_get_cluster_level
hb_buffer_set_length
hb_buffer_get_length
hb_buffer_set_segment_properties
hb_buffer_get_segment_properties
hb_buffer_guess_segment_properties
hb_buffer_set_unicode_funcs
hb_buffer_get_unicode_funcs
hb_buffer_get_glyph_infos
hb_glyph_info_get_glyph_flags
hb_buffer_get_glyph_positions
hb_buffer_has_positions
hb_buffer_set_invisible_glyph
hb_buffer_get_invisible_glyph
hb_buffer_set_not_found_glyph
hb_buffer_get_not_found_glyph
hb_buffer_set_replacement_codepoint
hb_buffer_get_replacement_codepoint
hb_buffer_normalize_glyphs
hb_buffer_reverse
hb_buffer_reverse_range
hb_buffer_reverse_clusters
hb_buffer_serialize
hb_buffer_serialize_glyphs
hb_buffer_deserialize_glyphs
hb_buffer_serialize_unicode
hb_buffer_deserialize_unicode
hb_buffer_serialize_format_from_string
hb_buffer_serialize_format_to_string
hb_buffer_serialize_list_formats
hb_segment_properties_equal
hb_segment_properties_hash
hb_segment_properties_overlay
hb_buffer_diff
hb_buffer_message_func_t
hb_buffer_set_message_func
HB_SEGMENT_PROPERTIES_DEFAULT
HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT
hb_buffer_t
hb_glyph_info_t
hb_glyph_flags_t
hb_glyph_position_t
hb_buffer_content_type_t
hb_buffer_flags_t
hb_buffer_cluster_level_t
hb_segment_properties_t
hb_buffer_serialize_format_t
hb_buffer_serialize_flags_t
hb_buffer_diff_flags_t
</SECTION>
<SECTION>
<FILE>hb-common</FILE>
HB_TAG
HB_UNTAG
hb_tag_from_string
hb_tag_to_string
hb_direction_from_string
hb_direction_to_string
HB_DIRECTION_REVERSE
HB_DIRECTION_IS_BACKWARD
HB_DIRECTION_IS_FORWARD
HB_DIRECTION_IS_HORIZONTAL
HB_DIRECTION_IS_VALID
HB_DIRECTION_IS_VERTICAL
hb_script_from_iso15924_tag
hb_script_to_iso15924_tag
hb_script_from_string
hb_script_get_horizontal_direction
hb_language_from_string
hb_language_to_string
hb_language_get_default
hb_language_matches
hb_feature_from_string
hb_feature_to_string
hb_variation_from_string
hb_variation_to_string
hb_bool_t
hb_codepoint_t
HB_CODEPOINT_INVALID
hb_destroy_func_t
hb_direction_t
hb_language_t
hb_feature_t
hb_variation_t
hb_mask_t
hb_position_t
hb_tag_t
hb_script_t
hb_user_data_key_t
HB_TAG_NONE
HB_TAG_MAX
HB_TAG_MAX_SIGNED
HB_LANGUAGE_INVALID
HB_FEATURE_GLOBAL_END
HB_FEATURE_GLOBAL_START
<SUBSECTION Private>
HB_BEGIN_DECLS
HB_END_DECLS
hb_var_int_t
hb_var_num_t
int16_t
int32_t
int64_t
int8_t
uint16_t
uint32_t
uint64_t
uint8_t
HB_EXTERN
HB_DEPRECATED
HB_DEPRECATED_FOR
<SUBSECTION Private>
HB_H_IN
HB_OT_H_IN
HB_AAT_H_IN
</SECTION>
<SECTION>
<FILE>hb-features</FILE>
HB_HAS_CAIRO
HB_HAS_CORETEXT
HB_HAS_DIRECTWRITE
HB_HAS_FREETYPE
HB_HAS_GDI
HB_HAS_GLIB
HB_HAS_GOBJECT
HB_HAS_GRAPHITE
HB_HAS_ICU
HB_HAS_UNISCRIBE
HB_HAS_WASM
</SECTION>
<SECTION>
<FILE>hb-draw</FILE>
hb_draw_funcs_create
hb_draw_funcs_get_empty
hb_draw_funcs_reference
hb_draw_funcs_destroy
hb_draw_funcs_set_user_data
hb_draw_funcs_get_user_data
hb_draw_funcs_make_immutable
hb_draw_funcs_is_immutable
hb_draw_move_to_func_t
hb_draw_funcs_set_move_to_func
hb_draw_line_to_func_t
hb_draw_funcs_set_line_to_func
hb_draw_quadratic_to_func_t
hb_draw_funcs_set_quadratic_to_func
hb_draw_cubic_to_func_t
hb_draw_funcs_set_cubic_to_func
hb_draw_close_path_func_t
hb_draw_funcs_set_close_path_func
hb_draw_move_to
hb_draw_line_to
hb_draw_quadratic_to
hb_draw_cubic_to
hb_draw_close_path
HB_DRAW_STATE_DEFAULT
hb_draw_funcs_t
hb_draw_state_t
</SECTION>
<SECTION>
<FILE>hb-paint</FILE>
hb_paint_funcs_t
hb_paint_funcs_create
hb_paint_funcs_get_empty
hb_paint_funcs_reference
hb_paint_funcs_destroy
hb_paint_funcs_set_user_data
hb_paint_funcs_get_user_data
hb_paint_funcs_make_immutable
hb_paint_funcs_is_immutable
hb_paint_push_transform_func_t
hb_paint_funcs_set_push_transform_func
hb_paint_pop_transform_func_t
hb_paint_funcs_set_pop_transform_func
hb_paint_push_clip_glyph_func_t
hb_paint_funcs_set_push_clip_glyph_func
hb_paint_push_clip_rectangle_func_t
hb_paint_funcs_set_push_clip_rectangle_func
hb_paint_pop_clip_func_t
hb_paint_funcs_set_pop_clip_func
hb_paint_color_func_t
hb_paint_funcs_set_color_func
HB_PAINT_IMAGE_FORMAT_PNG
HB_PAINT_IMAGE_FORMAT_SVG
HB_PAINT_IMAGE_FORMAT_BGRA
hb_paint_image_func_t
hb_paint_funcs_set_image_func
hb_color_line_t
hb_color_stop_t
hb_color_line_get_color_stops_func_t
hb_color_line_get_color_stops
hb_paint_extend_t
hb_color_line_get_extend_func_t
hb_color_line_get_extend
hb_paint_linear_gradient_func_t
hb_paint_funcs_set_linear_gradient_func
hb_paint_radial_gradient_func_t
hb_paint_funcs_set_radial_gradient_func
hb_paint_sweep_gradient_func_t
hb_paint_funcs_set_sweep_gradient_func
hb_paint_composite_mode_t
hb_paint_push_group_func_t
hb_paint_funcs_set_push_group_func
hb_paint_pop_group_func_t
hb_paint_funcs_set_pop_group_func
hb_paint_custom_palette_color_func_t
hb_paint_funcs_set_custom_palette_color_func
hb_paint_push_transform
hb_paint_pop_transform
hb_paint_push_clip_glyph
hb_paint_push_clip_rectangle
hb_paint_pop_clip
hb_paint_color
hb_paint_image
hb_paint_linear_gradient
hb_paint_radial_gradient
hb_paint_sweep_gradient
hb_paint_push_group
hb_paint_pop_group
hb_paint_custom_palette_color
</SECTION>
<SECTION>
<FILE>hb-deprecated</FILE>
HB_BUFFER_FLAGS_DEFAULT
HB_BUFFER_SERIALIZE_FLAGS_DEFAULT
HB_SCRIPT_CANADIAN_ABORIGINAL
hb_font_funcs_set_glyph_func
hb_font_get_glyph_func_t
HB_MATH_GLYPH_PART_FLAG_EXTENDER
HB_OT_MATH_SCRIPT
hb_ot_layout_table_choose_script
hb_ot_layout_table_find_script
hb_ot_tag_from_language
hb_ot_tags_from_script
HB_OT_VAR_NO_AXIS_INDEX
hb_ot_var_axis_t
hb_ot_var_find_axis
hb_ot_var_get_axes
hb_unicode_eastasian_width_func_t
hb_unicode_eastasian_width
hb_unicode_funcs_set_eastasian_width_func
HB_UNICODE_MAX_DECOMPOSITION_LEN
hb_unicode_decompose_compatibility_func_t
hb_unicode_decompose_compatibility
hb_unicode_funcs_set_decompose_compatibility_func
HB_UNICODE_COMBINING_CLASS_CCC133
hb_font_funcs_set_glyph_v_kerning_func
hb_font_get_glyph_shape
hb_font_get_glyph_shape_func_t
hb_font_funcs_set_glyph_shape_func
hb_font_get_glyph_v_kerning
hb_font_get_glyph_v_kerning_func_t
</SECTION>
<SECTION>
<FILE>hb-coretext</FILE>
HB_CORETEXT_TAG_KERX
HB_CORETEXT_TAG_MORT
HB_CORETEXT_TAG_MORX
hb_coretext_face_create
hb_coretext_font_create
hb_coretext_face_get_cg_font
hb_coretext_font_get_ct_font
</SECTION>
<SECTION>
<FILE>hb-directwrite</FILE>
hb_directwrite_face_create
hb_directwrite_face_get_font_face
</SECTION>
<SECTION>
<FILE>hb-face</FILE>
hb_face_count
hb_face_t
hb_face_create
hb_face_create_for_tables
hb_face_get_empty
hb_face_reference
hb_face_destroy
hb_face_set_user_data
hb_face_get_user_data
hb_face_make_immutable
hb_face_is_immutable
hb_face_get_table_tags
hb_face_set_glyph_count
hb_face_get_glyph_count
hb_face_set_index
hb_face_get_index
hb_face_set_upem
hb_face_get_upem
hb_face_reference_blob
hb_face_reference_table
hb_face_collect_unicodes
hb_face_collect_nominal_glyph_mapping
hb_face_collect_variation_selectors
hb_face_collect_variation_unicodes
hb_face_builder_create
hb_face_builder_add_table
hb_face_builder_sort_tables
</SECTION>
<SECTION>
<FILE>hb-font</FILE>
hb_font_add_glyph_origin_for_direction
hb_font_create
hb_font_create_sub_font
hb_font_get_empty
hb_font_reference
hb_font_destroy
hb_font_set_user_data
hb_font_get_user_data
hb_font_make_immutable
hb_font_is_immutable
hb_font_set_face
hb_font_get_face
hb_font_get_glyph
hb_font_get_glyph_advance_for_direction
hb_font_get_glyph_advances_for_direction
hb_font_get_glyph_contour_point
hb_font_get_glyph_contour_point_for_origin
hb_font_get_glyph_extents
hb_font_get_glyph_extents_for_origin
hb_font_get_glyph_from_name
hb_font_get_glyph_h_advance
hb_font_get_glyph_v_advance
hb_font_get_glyph_h_advances
hb_font_get_glyph_v_advances
hb_font_get_glyph_h_kerning
hb_font_get_glyph_kerning_for_direction
hb_font_get_glyph_h_origin
hb_font_get_glyph_v_origin
hb_font_get_glyph_origin_for_direction
hb_font_get_glyph_name
hb_font_draw_glyph
hb_font_paint_glyph
hb_font_get_nominal_glyph
hb_font_get_nominal_glyphs
hb_font_get_variation_glyph
hb_font_set_parent
hb_font_get_parent
hb_font_set_ppem
hb_font_get_ppem
hb_font_set_ptem
hb_font_get_ptem
hb_font_set_scale
hb_font_get_scale
hb_font_get_synthetic_bold
hb_font_set_synthetic_bold
hb_font_set_synthetic_slant
hb_font_get_synthetic_slant
hb_font_set_variations
hb_font_set_variation
HB_FONT_NO_VAR_NAMED_INSTANCE
hb_font_set_var_named_instance
hb_font_get_var_named_instance
hb_font_set_var_coords_design
hb_font_get_var_coords_design
hb_font_set_var_coords_normalized
hb_font_get_var_coords_normalized
hb_font_glyph_from_string
hb_font_glyph_to_string
hb_font_get_serial
hb_font_changed
hb_font_set_funcs
hb_font_set_funcs_data
hb_font_subtract_glyph_origin_for_direction
hb_font_funcs_create
hb_font_funcs_get_empty
hb_font_funcs_reference
hb_font_funcs_destroy
hb_font_funcs_set_user_data
hb_font_funcs_get_user_data
hb_font_funcs_make_immutable
hb_font_funcs_is_immutable
hb_font_get_glyph_contour_point_func_t
hb_font_funcs_set_glyph_contour_point_func
hb_font_get_glyph_extents_func_t
hb_font_funcs_set_glyph_extents_func
hb_font_get_glyph_from_name_func_t
hb_font_funcs_set_glyph_from_name_func
hb_font_get_glyph_advance_func_t
hb_font_get_glyph_h_advance_func_t
hb_font_funcs_set_glyph_h_advance_func
hb_font_get_glyph_v_advance_func_t
hb_font_funcs_set_glyph_v_advance_func
hb_font_get_glyph_advances_func_t
hb_font_get_glyph_h_advances_func_t
hb_font_funcs_set_glyph_h_advances_func
hb_font_get_glyph_v_advances_func_t
hb_font_funcs_set_glyph_v_advances_func
hb_font_get_glyph_kerning_func_t
hb_font_get_glyph_h_kerning_func_t
hb_font_funcs_set_glyph_h_kerning_func
hb_font_get_glyph_origin_func_t
hb_font_get_glyph_h_origin_func_t
hb_font_funcs_set_glyph_h_origin_func
hb_font_get_glyph_v_origin_func_t
hb_font_funcs_set_glyph_v_origin_func
hb_font_get_glyph_name_func_t
hb_font_funcs_set_glyph_name_func
hb_font_draw_glyph_func_t
hb_font_funcs_set_draw_glyph_func
hb_font_paint_glyph_func_t
hb_font_funcs_set_paint_glyph_func
hb_font_get_nominal_glyph_func_t
hb_font_funcs_set_nominal_glyph_func
hb_font_get_nominal_glyphs_func_t
hb_font_funcs_set_nominal_glyphs_func
hb_font_get_variation_glyph_func_t
hb_font_funcs_set_variation_glyph_func
hb_font_funcs_t
hb_font_t
hb_reference_table_func_t
hb_font_get_font_extents_func_t
hb_font_get_font_h_extents_func_t
hb_font_funcs_set_font_h_extents_func
hb_font_get_font_v_extents_func_t
hb_font_funcs_set_font_v_extents_func
hb_font_get_h_extents
hb_font_get_v_extents
hb_font_get_extents_for_direction
hb_font_extents_t
hb_glyph_extents_t
</SECTION>
<SECTION>
<FILE>hb-ft</FILE>
hb_ft_face_create
hb_ft_face_create_cached
hb_ft_face_create_referenced
hb_ft_font_create
hb_ft_font_create_referenced
hb_ft_font_changed
hb_ft_font_get_face
hb_ft_font_lock_face
hb_ft_font_unlock_face
hb_ft_font_set_load_flags
hb_ft_font_get_load_flags
hb_ft_font_set_funcs
hb_ft_hb_font_changed
</SECTION>
<SECTION>
<FILE>hb-gdi</FILE>
hb_gdi_face_create
</SECTION>
<SECTION>
<FILE>hb-glib</FILE>
hb_glib_get_unicode_funcs
hb_glib_script_from_script
hb_glib_script_to_script
hb_glib_blob_create
</SECTION>
<SECTION>
<FILE>hb-graphite2</FILE>
HB_GRAPHITE2_TAG_SILF
hb_graphite2_face_get_gr_face
hb_graphite2_font_get_gr_font
</SECTION>
<SECTION>
<FILE>hb-icu</FILE>
hb_icu_get_unicode_funcs
hb_icu_script_from_script
hb_icu_script_to_script
</SECTION>
<SECTION>
<FILE>hb-map</FILE>
hb_map_create
hb_map_allocation_successful
hb_map_copy
hb_map_clear
hb_map_get_empty
hb_map_reference
hb_map_destroy
hb_map_set_user_data
hb_map_get_user_data
hb_map_set
hb_map_get
hb_map_del
hb_map_has
hb_map_get_population
hb_map_is_empty
hb_map_is_equal
hb_map_hash
hb_map_update
hb_map_next
hb_map_keys
hb_map_values
HB_MAP_VALUE_INVALID
hb_map_t
</SECTION>
<SECTION>
<FILE>hb-ot-color</FILE>
HB_COLOR
hb_color_get_alpha
hb_color_get_blue
hb_color_get_green
hb_color_get_red
hb_ot_color_glyph_get_layers
hb_ot_color_glyph_reference_png
hb_ot_color_glyph_reference_svg
hb_ot_color_has_layers
hb_ot_color_has_paint
hb_ot_color_glyph_has_paint
hb_ot_color_has_palettes
hb_ot_color_has_png
hb_ot_color_has_svg
hb_ot_color_palette_color_get_name_id
hb_ot_color_palette_get_colors
hb_ot_color_palette_get_count
hb_ot_color_palette_get_flags
hb_ot_color_palette_get_name_id
hb_color_t
hb_ot_color_layer_t
hb_ot_color_palette_flags_t
</SECTION>
<SECTION>
<FILE>hb-ot-font</FILE>
hb_ot_font_set_funcs
</SECTION>
<SECTION>
<FILE>hb-ot-name</FILE>
hb_ot_name_list_names
hb_ot_name_get_utf16
hb_ot_name_get_utf32
hb_ot_name_get_utf8
hb_ot_name_id_t
hb_ot_name_id_predefined_t
hb_ot_name_entry_t
</SECTION>
<SECTION>
<FILE>hb-ot-layout</FILE>
hb_ot_tag_to_language
hb_ot_tag_to_script
hb_ot_tags_from_script_and_language
hb_ot_tags_to_script_and_language
hb_ot_layout_collect_lookups
hb_ot_layout_collect_features
hb_ot_layout_collect_features_map
hb_ot_layout_feature_get_characters
hb_ot_layout_feature_get_lookups
hb_ot_layout_feature_get_name_ids
hb_ot_layout_feature_with_variations_get_lookups
hb_ot_layout_get_attach_points
hb_ot_layout_get_font_extents
hb_ot_layout_get_font_extents2
hb_ot_layout_get_horizontal_baseline_tag_for_script
hb_ot_layout_get_baseline
hb_ot_layout_get_baseline2
hb_ot_layout_get_baseline_with_fallback
hb_ot_layout_get_baseline_with_fallback2
hb_ot_layout_get_glyph_class
hb_ot_layout_get_glyphs_in_class
hb_ot_layout_get_ligature_carets
hb_ot_layout_get_size_params
hb_ot_layout_has_glyph_classes
hb_ot_layout_has_positioning
hb_ot_layout_has_substitution
hb_ot_layout_language_find_feature
hb_ot_layout_language_get_feature_indexes
hb_ot_layout_language_get_feature_tags
hb_ot_layout_language_get_required_feature
hb_ot_layout_lookup_collect_glyphs
hb_ot_layout_lookup_get_glyph_alternates
hb_ot_layout_lookup_get_optical_bound
hb_ot_layout_lookup_substitute_closure
hb_ot_layout_lookups_substitute_closure
hb_ot_layout_lookup_would_substitute
hb_ot_layout_script_find_language
hb_ot_layout_script_get_language_tags
hb_ot_layout_script_select_language
hb_ot_layout_script_select_language2
hb_ot_layout_table_find_feature_variations
hb_ot_layout_table_get_feature_tags
hb_ot_layout_table_get_script_tags
hb_ot_layout_table_get_lookup_count
hb_ot_layout_table_select_script
hb_ot_shape_plan_collect_lookups
hb_ot_layout_language_get_required_feature_index
HB_OT_MAX_TAGS_PER_LANGUAGE
HB_OT_MAX_TAGS_PER_SCRIPT
HB_OT_TAG_DEFAULT_LANGUAGE
HB_OT_TAG_DEFAULT_SCRIPT
HB_OT_LAYOUT_DEFAULT_LANGUAGE_INDEX
HB_OT_LAYOUT_NO_FEATURE_INDEX
HB_OT_LAYOUT_NO_SCRIPT_INDEX
HB_OT_LAYOUT_NO_VARIATIONS_INDEX
HB_OT_TAG_BASE
HB_OT_TAG_GDEF
HB_OT_TAG_GPOS
HB_OT_TAG_GSUB
HB_OT_TAG_JSTF
hb_ot_layout_baseline_tag_t
hb_ot_layout_glyph_class_t
</SECTION>
<SECTION>
<FILE>hb-ot-math</FILE>
hb_ot_math_has_data
hb_ot_math_get_constant
hb_ot_math_get_glyph_italics_correction
hb_ot_math_get_glyph_top_accent_attachment
hb_ot_math_get_glyph_kerning
hb_ot_math_get_glyph_kernings
hb_ot_math_is_glyph_extended_shape
hb_ot_math_get_glyph_variants
hb_ot_math_get_min_connector_overlap
hb_ot_math_get_glyph_assembly
HB_OT_TAG_MATH
HB_OT_TAG_MATH_SCRIPT
hb_ot_math_constant_t
hb_ot_math_kern_t
hb_ot_math_kern_entry_t
hb_ot_math_glyph_variant_t
hb_ot_math_glyph_part_flags_t
hb_ot_math_glyph_part_t
</SECTION>
<SECTION>
<FILE>hb-ot-meta</FILE>
hb_ot_meta_get_entry_tags
hb_ot_meta_reference_entry
hb_ot_meta_tag_t
</SECTION>
<SECTION>
<FILE>hb-ot-metrics</FILE>
hb_ot_metrics_get_position
hb_ot_metrics_get_position_with_fallback
hb_ot_metrics_get_variation
hb_ot_metrics_get_x_variation
hb_ot_metrics_get_y_variation
hb_ot_metrics_tag_t
</SECTION>
<SECTION>
<FILE>hb-ot-shape</FILE>
hb_ot_shape_glyphs_closure
</SECTION>
<SECTION>
<FILE>hb-ot-var</FILE>
hb_ot_var_has_data
hb_ot_var_find_axis_info
hb_ot_var_get_axis_count
hb_ot_var_get_axis_infos
hb_ot_var_get_named_instance_count
hb_ot_var_named_instance_get_subfamily_name_id
hb_ot_var_named_instance_get_postscript_name_id
hb_ot_var_named_instance_get_design_coords
hb_ot_var_normalize_variations
hb_ot_var_normalize_coords
HB_OT_TAG_VAR_AXIS_ITALIC
HB_OT_TAG_VAR_AXIS_OPTICAL_SIZE
HB_OT_TAG_VAR_AXIS_SLANT
HB_OT_TAG_VAR_AXIS_WEIGHT
HB_OT_TAG_VAR_AXIS_WIDTH
hb_ot_var_axis_flags_t
hb_ot_var_axis_info_t
</SECTION>
<SECTION>
<FILE>hb-set</FILE>
hb_set_create
hb_set_allocation_successful
hb_set_copy
hb_set_get_empty
hb_set_reference
hb_set_destroy
hb_set_set_user_data
hb_set_get_user_data
hb_set_clear
hb_set_set
hb_set_has
hb_set_add
hb_set_add_range
hb_set_add_sorted_array
hb_set_del
hb_set_del_range
hb_set_get_max
hb_set_get_min
hb_set_get_population
hb_set_is_empty
hb_set_hash
hb_set_subtract
hb_set_intersect
hb_set_union
hb_set_symmetric_difference
hb_set_invert
hb_set_is_inverted
hb_set_is_equal
hb_set_is_subset
hb_set_next
hb_set_next_range
hb_set_next_many
hb_set_previous
hb_set_previous_range
HB_SET_VALUE_INVALID
hb_set_t
</SECTION>
<SECTION>
<FILE>hb-shape</FILE>
hb_shape
hb_shape_full
hb_shape_justify
hb_shape_list_shapers
</SECTION>
<SECTION>
<FILE>hb-shape-plan</FILE>
hb_shape_plan_create
hb_shape_plan_create_cached
hb_shape_plan_create2
hb_shape_plan_create_cached2
hb_shape_plan_get_empty
hb_shape_plan_reference
hb_shape_plan_destroy
hb_shape_plan_set_user_data
hb_shape_plan_get_user_data
hb_shape_plan_execute
hb_shape_plan_get_shaper
hb_shape_plan_t
</SECTION>
<SECTION>
<FILE>hb-unicode</FILE>
hb_unicode_general_category
hb_unicode_combining_class
hb_unicode_mirroring
hb_unicode_script
hb_unicode_compose
hb_unicode_decompose
hb_unicode_funcs_create
hb_unicode_funcs_get_empty
hb_unicode_funcs_reference
hb_unicode_funcs_destroy
hb_unicode_funcs_set_user_data
hb_unicode_funcs_get_user_data
hb_unicode_funcs_make_immutable
hb_unicode_funcs_is_immutable
hb_unicode_funcs_get_default
hb_unicode_funcs_get_parent
hb_unicode_general_category_func_t
hb_unicode_funcs_set_general_category_func
hb_unicode_combining_class_func_t
hb_unicode_funcs_set_combining_class_func
hb_unicode_mirroring_func_t
hb_unicode_funcs_set_mirroring_func
hb_unicode_script_func_t
hb_unicode_funcs_set_script_func
hb_unicode_compose_func_t
hb_unicode_funcs_set_compose_func
hb_unicode_decompose_func_t
hb_unicode_funcs_set_decompose_func
HB_UNICODE_MAX
hb_unicode_combining_class_t
hb_unicode_general_category_t
hb_unicode_funcs_t
</SECTION>
<SECTION>
<FILE>hb-uniscribe</FILE>
hb_uniscribe_font_get_hfont
hb_uniscribe_font_get_logfontw
</SECTION>
<SECTION>
<FILE>hb-version</FILE>
HB_VERSION_ATLEAST
hb_version
hb_version_atleast
hb_version_string
HB_VERSION_MAJOR
HB_VERSION_MICRO
HB_VERSION_MINOR
HB_VERSION_STRING
</SECTION>
<SECTION>
<FILE>hb-style</FILE>
hb_style_tag_t
hb_style_get_value
</SECTION>
<SECTION>
<FILE>hb-subset</FILE>
hb_subset_input_create_or_fail
hb_subset_input_reference
hb_subset_input_destroy
hb_subset_input_set_user_data
hb_subset_input_get_user_data
hb_subset_input_keep_everything
hb_subset_input_set_flags
hb_subset_input_get_flags
hb_subset_input_unicode_set
hb_subset_input_glyph_set
hb_subset_input_set
hb_subset_input_old_to_new_glyph_mapping
hb_subset_input_pin_axis_location
hb_subset_input_pin_axis_to_default
hb_subset_or_fail
hb_subset_plan_create_or_fail
hb_subset_plan_reference
hb_subset_plan_destroy
hb_subset_plan_set_user_data
hb_subset_plan_get_user_data
hb_subset_plan_execute_or_fail
hb_subset_plan_unicode_to_old_glyph_mapping
hb_subset_plan_new_to_old_glyph_mapping
hb_subset_plan_old_to_new_glyph_mapping
hb_subset_preprocess
hb_subset_flags_t
hb_subset_input_t
hb_subset_sets_t
hb_subset_plan_t
<SUBSECTION Private>
hb_link_t
hb_object_t
hb_subset_repack_or_fail
hb_subset_input_override_name_table
hb_subset_input_set_axis_range
</SECTION>
<SECTION>
<FILE>hb-cairo</FILE>
hb_cairo_font_face_create_for_font
hb_cairo_font_face_get_font
hb_cairo_font_face_create_for_face
hb_cairo_font_face_get_face
hb_cairo_font_init_func_t
hb_cairo_font_face_set_font_init_func
hb_cairo_scaled_font_get_font
hb_cairo_font_face_set_scale_factor
hb_cairo_font_face_get_scale_factor
hb_cairo_glyphs_from_buffer
</SECTION>

View File

@@ -0,0 +1,65 @@
if not find_program('gtkdoc-scan', required: get_option('docs')).found()
message('Not building documentation as gtk-doc was not found')
subdir_done()
endif
conf.set('HAVE_GTK_DOC', 1)
gnome = import('gnome')
docconf = configuration_data()
docconf.set('HB_VERSION', meson.project_version())
version_xml = configure_file(input: 'version.xml.in',
output: 'version.xml',
configuration: docconf)
content_files = [
'usermanual-what-is-harfbuzz.xml',
'usermanual-install-harfbuzz.xml',
'usermanual-getting-started.xml',
'usermanual-glyph-information.xml',
'usermanual-shaping-concepts.xml',
'usermanual-object-model.xml',
'usermanual-buffers-language-script-and-direction.xml',
'usermanual-fonts-and-faces.xml',
'usermanual-opentype-features.xml',
'usermanual-clusters.xml',
'usermanual-utilities.xml',
'usermanual-integration.xml',
version_xml,
]
html_images = [
'HarfBuzz.png',
'HarfBuzz.svg',
]
ignore_headers = [
'hb-features.h',
'hb-gobject.h',
'hb-gobject-enums.h',
'hb-gobject-enums-tmp.h',
'hb-gobject-structs.h',
'hb-wasm-api.h',
]
gnome.gtkdoc('harfbuzz',
main_sgml: 'harfbuzz-docs.xml',
src_dir: [meson.current_source_dir() / '..' / 'src',
meson.current_build_dir() / '..' / 'src',
],
scan_args: ['--deprecated-guards=HB_DISABLE_DEPRECATED',
'--ignore-decorators=HB_EXTERN|HB_DEPRECATED|HB_DEPRECATED_FOR()',
],
mkdb_args: ['--source-suffixes=h,cc',
'--xml-mode',
'--output-format=xml',
],
content_files: content_files,
html_assets: html_images,
ignore_headers: ignore_headers,
dependencies: [libharfbuzz_dep],
install: true,
check: get_option('doc_tests'),
)

View File

@@ -0,0 +1,294 @@
# Introduction
Several tables in the opentype format are formed internally by a graph of subtables. Parent node's
reference their children through the use of positive offsets, which are typically 16 bits wide.
Since offsets are always positive this forms a directed acyclic graph. For storage in the font file
the graph must be given a topological ordering and then the subtables packed in serial according to
that ordering. Since 16 bit offsets have a maximum value of 65,535 if the distance between a parent
subtable and a child is more then 65,535 bytes then it's not possible for the offset to encode that
edge.
For many fonts with complex layout rules (such as Arabic) it's not unusual for the tables containing
layout rules ([GSUB/GPOS](https://docs.microsoft.com/en-us/typography/opentype/spec/gsub)) to be
larger than 65kb. As a result these types of fonts are susceptible to offset overflows when
serializing to the binary font format.
Offset overflows can happen for a variety of reasons and require different strategies to resolve:
* Simple overflows can often be resolved with a different topological ordering.
* If a subtable has many parents this can result in the link from furthest parent(s)
being at risk for overflows. In these cases it's possible to duplicate the shared subtable which
allows it to be placed closer to it's parent.
* If subtables exist which are themselves larger than 65kb it's not possible for any offsets to point
past them. In these cases the subtable can usually be split into two smaller subtables to allow
for more flexibility in the ordering.
* In GSUB/GPOS overflows from Lookup subtables can be resolved by changing the Lookup to an extension
lookup which uses a 32 bit offset instead of 16 bit offset.
In general there isn't a simple solution to produce an optimal topological ordering for a given graph.
Finding an ordering which doesn't overflow is a NP hard problem. Existing solutions use heuristics
which attempt a combination of the above strategies to attempt to find a non-overflowing configuration.
The harfbuzz subsetting library
[includes a repacking algorithm](https://github.com/harfbuzz/harfbuzz/blob/main/src/hb-repacker.hh)
which is used to resolve offset overflows that are present in the subsetted tables it produces. This
document provides a deep dive into how the harfbuzz repacking algorithm works.
Other implementations exist, such as in
[fontTools](https://github.com/fonttools/fonttools/blob/7af43123d49c188fcef4e540fa94796b3b44e858/Lib/fontTools/ttLib/tables/otBase.py#L72), however these are not covered in this document.
# Foundations
There's four key pieces to the harfbuzz approach:
* Subtable Graph: a table's internal structure is abstracted out into a lightweight graph
representation where each subtable is a node and each offset forms an edge. The nodes only need
to know how many bytes the corresponding subtable occupies. This lightweight representation can
be easily modified to test new ordering's and strategies as the repacking algorithm iterates.
* [Topological sorting algorithm](https://en.wikipedia.org/wiki/Topological_sorting): an algorithm
which given a graph gives a linear sorting of the nodes such that all offsets will be positive.
* Overflow check: given a graph and a topological sorting it checks if there will be any overflows
in any of the offsets. If there are overflows it returns a list of (parent, child) tuples that
will overflow. Since the graph has information on the size of each subtable it's straightforward
to calculate the final position of each subtable and then check if any offsets to it will
overflow.
* Content Aware Preprocessing: if the overflow resolver is aware of the format of the underlying
tables (eg. GSUB, GPOS) then in some cases preprocessing can be done to increase the chance of
successfully packing the graph. For example for GSUB and GPOS we can preprocess the graph and
promote lookups to extension lookups (upgrades a 16 bit offset to 32 bits) or split large lookup
subtables into two or more pieces.
* Offset resolution strategies: given a particular occurrence of an overflow these strategies
modify the graph to attempt to resolve the overflow.
# High Level Algorithm
```
def repack(graph):
graph.topological_sort()
if (graph.will_overflow())
preprocess(graph)
assign_spaces(graph)
graph.topological_sort()
while (overflows = graph.will_overflow()):
for overflow in overflows:
apply_offset_resolution_strategy (overflow, graph)
graph.topological_sort()
```
The actual code for this processing loop can be found in the function hb_resolve_overflows () of
[hb-repacker.hh](https://github.com/harfbuzz/harfbuzz/blob/main/src/hb-repacker.hh).
# Topological Sorting Algorithms
The harfbuzz repacker uses two different algorithms for topological sorting:
* [Kahn's Algorithm](https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm)
* Sorting by shortest distance
Kahn's algorithm is approximately twice as fast as the shortest distance sort so that is attempted
first (only on the first topological sort). If it fails to eliminate overflows then shortest distance
sort will be used for all subsequent topological sorting operations.
## Shortest Distance Sort
This algorithm orders the nodes based on total distance to each node. Nodes with a shorter distance
are ordered first.
The "weight" of an edge is the sum of the size of the sub-table being pointed to plus 2^16 for a 16 bit
offset and 2^32 for a 32 bit offset.
The distance of a node is the sum of all weights along the shortest path from the root to that node
plus a priority modifier (used to change where nodes are placed by moving increasing or
decreasing the effective distance). Ties between nodes with the same distance are broken based
on the order of the offset in the sub table bytes.
The shortest distance to each node is determined using
[Djikstra's algorithm](https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm). Then the topological
ordering is produce by applying a modified version of Kahn's algorithm that uses a priority queue
based on the shortest distance to each node.
## Optimizing the Sorting
The topological sorting operation is the core of the repacker and is run on each iteration so it needs
to be as fast as possible. There's a few things that are done to speed up subsequent sorting
operations:
* The number of incoming edges to each node is cached. This is required by the Kahn's algorithm
portion of both sorts. Where possible when the graph is modified we manually update the cached
edge counts of affected nodes.
* The distance to each node is cached. Where possible when the graph is modified we manually update
the cached distances of any affected nodes.
Caching these values allows the repacker to avoid recalculating them for the full graph on each
iteration.
The other important factor to speed is a fast priority queue which is a core datastructure to
the topological sorting algorithm. Currently a basic heap based queue is used. Heap based queue's
don't support fast priority decreases, but that can be worked around by just adding redundant entries
to the priority queue and filtering the older ones out when poppping off entries. This is based
on the recommendations in
[a study of the practical performance of priority queues in Dijkstra's algorithm](https://www3.cs.stonybrook.edu/~rezaul/papers/TR-07-54.pdf)
## Special Handling of 32 bit Offsets
If a graph contains multiple 32 bit offsets then the shortest distance sorting will be likely be
suboptimal. For example consider the case where a graph contains two 32 bit offsets that each point
to a subgraph which are not connected to each other. The shortest distance sort will interleave the
subtables of the two subgraphs, potentially resulting in overflows. Since each of these subgraphs are
independent of each other, and 32 bit offsets can point extremely long distances a better strategy is
to pack the first subgraph in it's entirety and then have the second subgraph packed after with the 32
bit offset pointing over the first subgraph. For example given the graph:
```
a--- b -- d -- f
\
\_ c -- e -- g
```
Where the links from a to b and a to c are 32 bit offsets, the shortest distance sort would be:
```
a, b, c, d, e, f, g
```
If nodes d and e have a combined size greater than 65kb then the offset from d to f will overflow.
A better ordering is:
```
a, b, d, f, c, e, g
```
The ability for 32 bit offsets to point long distances is utilized to jump over the subgraph of
b which gives the remaining 16 bit offsets a better chance of not overflowing.
The above is an ideal situation where the subgraphs are disconnected from each other, in practice
this is often not this case. So this idea can be generalized as follows:
If there is a subgraph that is only reachable from one or more 32 bit offsets, then:
* That subgraph can be treated as an independent unit and all nodes of the subgraph packed in isolation
from the rest of the graph.
* In a table that occupies less than 4gb of space (in practice all fonts), that packed independent
subgraph can be placed anywhere after the parent nodes without overflowing the 32 bit offsets from
the parent nodes.
The sorting algorithm incorporates this via a "space" modifier that can be applied to nodes in the
graph. By default all nodes are treated as being in space zero. If a node is given a non-zero space, n,
then the computed distance to the node will be modified by adding `n * 2^32`. This will cause that
node and it's descendants to be packed between all nodes in space n-1 and space n+1. Resulting in a
topological sort like:
```
| space 0 subtables | space 1 subtables | .... | space n subtables |
```
The assign_spaces() step in the high level algorithm is responsible for identifying independent
subgraphs and assigning unique spaces to each one. More information on the space assignment can be
found in the next section.
# Graph Preprocessing
For certain table types we can preprocess and modify the graph structure to reduce the occurences
of overflows. Currently the repacker implements preprocessing only for GPOS and GSUB tables.
## GSUB/GPOS Table Splitting
The GSUB/GPOS preprocessor scans each lookup subtable and determines if the subtable's children are
so large that no overflow resolution is possible (for example a single subtable that exceeds 65kb
cannot be pointed over). When such cases are detected table splitting is invoked:
* The subtable is first analyzed to determine the smallest number of split points that will allow
for successful offset overflow resolution.
* Then the subtable in the graph representation is modified to actually perform the split at the
previously computed split points. At a high level splits are done by inserting new subtables
which contain a subset of the data of the original subtable and then shrinking the original subtable.
Table splitting must be aware of the underlying format of each subtable type and thus needs custom
code for each subtable type. Currently subtable splitting is only supported for GPOS subtable types.
## GSUB/GPOS Extension Lookup Promotion
In GSUB/GPOS tables lookups can be regular lookups which use 16 bit offsets to the children subtables
or extension lookups which use 32 bit offsets to the children subtables. If the sub graph of all
regular lookups is too large then it can be difficult to find an overflow free configuration. This
can be remedied by promoting one or more regular lookups to extension lookups.
During preprocessing the graph is scanned to determine the size of the subgraph of regular lookups.
If the graph is found to be too big then the analysis finds a set of lookups to promote to reduce
the subgraph size. Lastly the graph is modified to convert those lookups to extension lookups.
# Offset Resolution Strategies
## Space Assignment
The goal of space assignment is to find connected subgraphs that are only reachable via 32 bit offsets
and then assign each such subgraph to a unique non-zero space. The algorithm is roughly:
1. Collect the set, `S`, of nodes that are children of 32 bit offsets.
2. Do a directed traversal from each node in `S` and collect all encountered nodes into set `T`.
Mark all nodes in the graph that are not in `T` as being in space 0.
3. Set `next_space = 1`.
4. While set `S` is not empty:
a. Pick a node `n` in set `S` then perform an undirected graph traversal and find the set `Q` of
nodes that are reachable from `n`.
b. During traversal if a node, `m`, has a edge to a node in space 0 then `m` must be duplicated
to disconnect it from space 0.
d. Remove all nodes in `Q` from `S` and assign all nodes in `Q` to `next_space`.
c. Increment `next_space` by one.
## Manual Iterative Resolutions
For each overflow in each iteration the algorithm will attempt to apply offset overflow resolution
strategies to eliminate the overflow. The type of strategy applied is dependent on the characteristics
of the overflowing link:
* If the overflowing offset is inside a space other than space 0 and the subgraph space has more
than one 32 bit offset pointing into the subgraph then subdivide the space by moving subgraph
from one of the 32 bit offsets into a new space via the duplication of shared nodes.
* If the overflowing offset is pointing to a subtable with more than one incoming edge: duplicate
the node so that the overflowing offset is pointing at it's own copy of that node.
* Otherwise, attempt to move the child subtable closer to it's parent. This is accomplished by
raising the priority of all children of the parent. Next time the topological sort is run the
children will be ordered closer to the parent.
# Test Cases
The harfbuzz repacker has tests defined using generic graphs: https://github.com/harfbuzz/harfbuzz/blob/main/src/test-repacker.cc
# Future Improvements
Currently for GPOS tables the repacker implementation is sufficient to handle both subsetting and the
general case of font compilation repacking. However for GSUB the repacker is only sufficient for
subsetting related overflows. To enable general case repacking of GSUB, support for splitting of
GSUB subtables will need to be added. Other table types such as COLRv1 shouldn't require table
splitting due to the wide use of 24 bit offsets throughout the table.
Beyond subtable splitting there are a couple of "nice to have" improvements, but these are not required
to support the general case:
* Extension demotion: currently extension promotion is supported but in some cases if the non-extension
subgraph is underfilled then packed size can be reduced by demoting extension lookups back to regular
lookups.
* Currently only children nodes are moved to resolve offsets. However, in many cases moving a parent
node closer to it's children will have less impact on the size of other offsets. Thus the algorithm
should use a heuristic (based on parent and child subtable sizes) to decide if the children's
priority should be increased or the parent's priority decreased.

View File

@@ -0,0 +1,178 @@
# Introduction
In hb-subset serialization is the process of writing the subsetted font
tables out to actual bytes in the final format. All serialization works
through an object called the serialize context
([hb_serialize_context_t](https://github.com/harfbuzz/harfbuzz/blob/main/src/hb-serialize.hh)).
Internally the serialize context holds a fixed size memory buffer. For simple
tables the final bytes are written into the buffer sequentially to produce
the final serialized bytes.
## Simple Tables
Simple tables are tables that do not use offset graphs.
To write a struct into the serialization context, first you call an
allocation method on the context which requests a writable array of bytes of
a fixed size. If the requested array will not exceed the bounds of the fixed
buffer the serializer will return a pointer to the next unwritten portion
of the buffer. Then the struct is cast onto the returned pointer and values
are written to the structs fields.
Internally the serialization context ends up looking like:
```
+-------+-------+-----+-------+--------------+
| Obj 1 | Obj 2 | ... | Obj N | Unused Space |
+-------+-------+-----+-------+--------------+
```
Here Obj N, is the object currently being written.
## Complex Tables
Complex tables are made up of graphs of objects, where offset's are used
to form the edges of the graphs. Each object is a continuous slice of bytes
that contains zero or more offsets pointing to more objects.
In this case the serialization buffer has a different layout:
```
|- in progress objects -| |--- packed objects --|
+-----------+-----------+--------------+-------+-----+-------+
| Obj n+2 | Obj n+1 | Unused Space | Obj n | ... | Obj 0 |
+-----------+-----------+--------------+-------+-----+-------+
|-----------------------> <---------------------|
```
The buffer holds two stacks:
1. In progress objects are held in a stack starting from the start of buffer
that grows towards the end of the buffer.
2. Packed objects are held in a stack that starts at the end of the buffer
and grows towards the start of the buffer.
Once the object on the top of the in progress stack is finished being written
its bytes are popped from the in progress stack and copied to the top of
the packed objects stack. In the example above, finalizing Obj n+1
would result in the following state:
```
+---------+--------------+---------+-------+-----+-------+
| Obj n+2 | Unused Space | Obj n+1 | Obj n | ... | Obj 0 |
+---------+--------------+---------+-------+-----+-------+
```
Each packed object is associated with an ID, it's zero based position in the packed
objects stack. In this example Obj 0, would have an ID of 0.
During serialization offsets that link from one object to another are stored
using object ids. The serialize context maintains a list of links between
objects. Each link records the parent object id, the child object id, the position
of the offset field within the parent object, and the width of the offset.
Links are always added to the current in progress object and you can only link too
objects that have been packed and thus have an ID.
### Object De-duplication
An important optimization in packing offset graphs is de-duplicating equivalent objects. If you
have two or more parent objects that point to child objects that are equivalent then you only need
to encode the child once and can have the parents point to the same child. This can significantly
reduce the final size of a serialized graph.
During packing of an inprogress object the serialization context checks if any existing packed
objects are equivalent to the object being packed. Here equivalence means the object has the
exact same bytes and all of it's links are equivalent. If an equivalent object is found the
in progress object is discarded and not copied to the packed object stack. The object id of
the equivalent object is instead returned. Thus parent objects will then link to the existing
equivalent object.
To find equivalent objects the serialization context maintains a hashmap from object to the canonical
object id.
### Link Resolution
Once all objects have been packed the next step is to assign actual values to all of the offset
fields. Prior to this point all links in the graph have been recorded using object id's. For each
link the resolver computes the offset between the parent and child and writes the offset into
the serialization buffer at the appropriate location.
### Offset Overflow Resolution
If during link resolution the resolver finds that an offsets value would exceed what can be encoded
in that offset field link resolution is aborted and the offset overflow resolver is invoked.
That process is documented [here](reapcker.md).
### Example of Complex Serialization
If we wanted to serialize the following graph:
```
a--b--d
\ /
c
```
Serializer would be called like this:
```c++
hb_serialize_context_t ctx;
struct root {
char name;
Offset16To<child> child_1;
Offset16To<child> child_2;
}
struct child {
char name;
Offset16To<char> leaf;
}
// Object A.
ctx->push();
root* a = ctx->start_embed<root> ();
ctx->extend_min (a);
a->name = 'a';
// Object B.
ctx->push();
child* b = ctx->start_embed<child> ();
ctx->extend_min (b);
b->name = 'b';
// Object D.
ctx->push();
*ctx->allocate_size<char> (1) = 'd';
unsigned d_id = ctx->pop_pack ();
ctx->add_link (b->leaf, d_id);
unsigned b_id = ctx->pop_pack ();
// Object C
ctx->push();
child* c = ctx->start_embed<child> ();
ctx->extend_min (c);
c->name = 'c';
// Object D.
ctx->push();
*ctx->allocate_size<char> (1) = 'd';
d_id = ctx->pop_pack (); // Serializer will automatically de-dup this with the previous 'd'
ctx->add_link (c->leaf, d_id);
unsigned c_id = ctx->pop_pack ();
// Object A's links:
ctx->add_link (a->child_1, b_id);
ctx->add_link (a->child_2, c_id);
ctx->pop_pack ();
ctx->end_serialize ();
```

View File

@@ -0,0 +1,228 @@
# Introduction
Subset preprocessing is a mechanism which can significantly speed up font subsetting operations.
It works by prepopulating datastructures from the source font which can be used in later subsetting
operations to more quickly produce the subset. Preprocessing is useful in cases where multiple subsets
will be cut from the same source font.
# Usage
```c++
hb_face_t* preprocessed = hb_subset_preprocess (source_face);
...
hb_face_t* subset = hb_subset_or_fail (preprocessed, subset_input);
```
# Additional Details
* A subset produced from a preprocessed face should be identical to a subset produced from only the
original face. The preprocessor does not change the functionality of the subsetter, just speeds
things up.
* The preprocessing operation may take longer than the time it takes to produce a subset from the
source font. Thus the main performance gains are made when a preprocessed face is reused for
multiple subsetting operations.
* Currently the largest performance gains are seen when using a preprocessed face for CFF subsetting.
* The preprocessed face may contain references to the memory backing the source face. If this memory
is fully owned by a harfbuzz hb_blob_t* then it will automatically be kept alive for the lifetime
of the preprocessed face. However, if this memory is not fully owned by a harfbuzz hb_blob_t* then
it is necessary to ensure that the memory is kept alive for the lifetime of the preprocessed face.
# Performance Improvements
Here is the performance difference of producing a subset with a preprocessed face vs producing
a subset with the source face:
Benchmark | Delta Time (%)
----------|-----------------
BM_subset/subset_glyphs/Roboto-Regular.ttf/10_median|-56%
BM_subset/subset_glyphs/Roboto-Regular.ttf/64_median|-33%
BM_subset/subset_glyphs/Roboto-Regular.ttf/512_median|-28%
BM_subset/subset_glyphs/Roboto-Regular.ttf/1000_median|-11%
BM_subset/subset_glyphs/Roboto-Regular.ttf/nohinting/10_median|-56%
BM_subset/subset_glyphs/Roboto-Regular.ttf/nohinting/64_median|-33%
BM_subset/subset_glyphs/Roboto-Regular.ttf/nohinting/512_median|-21%
BM_subset/subset_glyphs/Roboto-Regular.ttf/nohinting/1000_median|-9%
BM_subset/subset_glyphs/Amiri-Regular.ttf/10_median|-67%
BM_subset/subset_glyphs/Amiri-Regular.ttf/64_median|-48%
BM_subset/subset_glyphs/Amiri-Regular.ttf/512_median|-21%
BM_subset/subset_glyphs/Amiri-Regular.ttf/4096_median|-9%
BM_subset/subset_glyphs/Amiri-Regular.ttf/nohinting/10_median|-66%
BM_subset/subset_glyphs/Amiri-Regular.ttf/nohinting/64_median|-50%
BM_subset/subset_glyphs/Amiri-Regular.ttf/nohinting/512_median|-8%
BM_subset/subset_glyphs/Amiri-Regular.ttf/nohinting/4096_median|-9%
BM_subset/subset_glyphs/NotoNastaliqUrdu-Regular.ttf/10_median|-85%
BM_subset/subset_glyphs/NotoNastaliqUrdu-Regular.ttf/64_median|-71%
BM_subset/subset_glyphs/NotoNastaliqUrdu-Regular.ttf/512_median|-3%
BM_subset/subset_glyphs/NotoNastaliqUrdu-Regular.ttf/1400_median|4%
BM_subset/subset_glyphs/NotoNastaliqUrdu-Regular.ttf/nohinting/10_median|-84%
BM_subset/subset_glyphs/NotoNastaliqUrdu-Regular.ttf/nohinting/64_median|-72%
BM_subset/subset_glyphs/NotoNastaliqUrdu-Regular.ttf/nohinting/512_median|0%
BM_subset/subset_glyphs/NotoNastaliqUrdu-Regular.ttf/nohinting/1400_median|0%
BM_subset/subset_glyphs/NotoSansDevanagari-Regular.ttf/10_median|-30%
BM_subset/subset_glyphs/NotoSansDevanagari-Regular.ttf/64_median|-24%
BM_subset/subset_glyphs/NotoSansDevanagari-Regular.ttf/512_median|-3%
BM_subset/subset_glyphs/NotoSansDevanagari-Regular.ttf/1000_median|-3%
BM_subset/subset_glyphs/NotoSansDevanagari-Regular.ttf/nohinting/10_median|-30%
BM_subset/subset_glyphs/NotoSansDevanagari-Regular.ttf/nohinting/64_median|-24%
BM_subset/subset_glyphs/NotoSansDevanagari-Regular.ttf/nohinting/512_median|-3%
BM_subset/subset_glyphs/NotoSansDevanagari-Regular.ttf/nohinting/1000_median|-5%
BM_subset/subset_glyphs/Mplus1p-Regular.ttf/10_median|-96%
BM_subset/subset_glyphs/Mplus1p-Regular.ttf/64_median|-90%
BM_subset/subset_glyphs/Mplus1p-Regular.ttf/512_median|-74%
BM_subset/subset_glyphs/Mplus1p-Regular.ttf/4096_median|-25%
BM_subset/subset_glyphs/Mplus1p-Regular.ttf/10000_median|-23%
BM_subset/subset_glyphs/Mplus1p-Regular.ttf/nohinting/10_median|-95%
BM_subset/subset_glyphs/Mplus1p-Regular.ttf/nohinting/64_median|-90%
BM_subset/subset_glyphs/Mplus1p-Regular.ttf/nohinting/512_median|-73%
BM_subset/subset_glyphs/Mplus1p-Regular.ttf/nohinting/4096_median|-24%
BM_subset/subset_glyphs/Mplus1p-Regular.ttf/nohinting/10000_median|-11%
BM_subset/subset_glyphs/SourceHanSans-Regular_subset.otf/10_median|-84%
BM_subset/subset_glyphs/SourceHanSans-Regular_subset.otf/64_median|-77%
BM_subset/subset_glyphs/SourceHanSans-Regular_subset.otf/512_median|-70%
BM_subset/subset_glyphs/SourceHanSans-Regular_subset.otf/4096_median|-80%
BM_subset/subset_glyphs/SourceHanSans-Regular_subset.otf/10000_median|-86%
BM_subset/subset_glyphs/SourceHanSans-Regular_subset.otf/nohinting/10_median|-84%
BM_subset/subset_glyphs/SourceHanSans-Regular_subset.otf/nohinting/64_median|-78%
BM_subset/subset_glyphs/SourceHanSans-Regular_subset.otf/nohinting/512_median|-71%
BM_subset/subset_glyphs/SourceHanSans-Regular_subset.otf/nohinting/4096_median|-86%
BM_subset/subset_glyphs/SourceHanSans-Regular_subset.otf/nohinting/10000_median|-88%
BM_subset/subset_glyphs/SourceSansPro-Regular.otf/10_median|-59%
BM_subset/subset_glyphs/SourceSansPro-Regular.otf/64_median|-55%
BM_subset/subset_glyphs/SourceSansPro-Regular.otf/512_median|-67%
BM_subset/subset_glyphs/SourceSansPro-Regular.otf/2000_median|-68%
BM_subset/subset_glyphs/SourceSansPro-Regular.otf/nohinting/10_median|-60%
BM_subset/subset_glyphs/SourceSansPro-Regular.otf/nohinting/64_median|-58%
BM_subset/subset_glyphs/SourceSansPro-Regular.otf/nohinting/512_median|-72%
BM_subset/subset_glyphs/SourceSansPro-Regular.otf/nohinting/2000_median|-71%
BM_subset/subset_glyphs/AdobeVFPrototype.otf/10_median|-70%
BM_subset/subset_glyphs/AdobeVFPrototype.otf/64_median|-64%
BM_subset/subset_glyphs/AdobeVFPrototype.otf/300_median|-73%
BM_subset/subset_glyphs/AdobeVFPrototype.otf/nohinting/10_median|-71%
BM_subset/subset_glyphs/AdobeVFPrototype.otf/nohinting/64_median|-68%
BM_subset/subset_glyphs/AdobeVFPrototype.otf/nohinting/300_median|-72%
BM_subset/subset_glyphs/MPLUS1-Variable.ttf/10_median|-90%
BM_subset/subset_glyphs/MPLUS1-Variable.ttf/64_median|-82%
BM_subset/subset_glyphs/MPLUS1-Variable.ttf/512_median|-31%
BM_subset/subset_glyphs/MPLUS1-Variable.ttf/4096_median|-9%
BM_subset/subset_glyphs/MPLUS1-Variable.ttf/6000_median|-22%
BM_subset/subset_glyphs/MPLUS1-Variable.ttf/nohinting/10_median|-88%
BM_subset/subset_glyphs/MPLUS1-Variable.ttf/nohinting/64_median|-83%
BM_subset/subset_glyphs/MPLUS1-Variable.ttf/nohinting/512_median|-31%
BM_subset/subset_glyphs/MPLUS1-Variable.ttf/nohinting/4096_median|-16%
BM_subset/subset_glyphs/MPLUS1-Variable.ttf/nohinting/6000_median|-18%
BM_subset/subset_glyphs/RobotoFlex-Variable.ttf/10_median|-44%
BM_subset/subset_glyphs/RobotoFlex-Variable.ttf/64_median|-18%
BM_subset/subset_glyphs/RobotoFlex-Variable.ttf/512_median|-2%
BM_subset/subset_glyphs/RobotoFlex-Variable.ttf/900_median|-6%
BM_subset/subset_glyphs/RobotoFlex-Variable.ttf/nohinting/10_median|-45%
BM_subset/subset_glyphs/RobotoFlex-Variable.ttf/nohinting/64_median|-17%
BM_subset/subset_glyphs/RobotoFlex-Variable.ttf/nohinting/512_median|-15%
BM_subset/subset_glyphs/RobotoFlex-Variable.ttf/nohinting/900_median|-3%
BM_subset/subset_codepoints/Roboto-Regular.ttf/10_median|-20%
BM_subset/subset_codepoints/Roboto-Regular.ttf/64_median|-16%
BM_subset/subset_codepoints/Roboto-Regular.ttf/512_median|-12%
BM_subset/subset_codepoints/Roboto-Regular.ttf/1000_median|-10%
BM_subset/subset_codepoints/Roboto-Regular.ttf/nohinting/10_median|-24%
BM_subset/subset_codepoints/Roboto-Regular.ttf/nohinting/64_median|-14%
BM_subset/subset_codepoints/Roboto-Regular.ttf/nohinting/512_median|-15%
BM_subset/subset_codepoints/Roboto-Regular.ttf/nohinting/1000_median|-9%
BM_subset/subset_codepoints/Amiri-Regular.ttf/10_median|-51%
BM_subset/subset_codepoints/Amiri-Regular.ttf/64_median|-37%
BM_subset/subset_codepoints/Amiri-Regular.ttf/512_median|-12%
BM_subset/subset_codepoints/Amiri-Regular.ttf/4096_median|-1%
BM_subset/subset_codepoints/Amiri-Regular.ttf/nohinting/10_median|-49%
BM_subset/subset_codepoints/Amiri-Regular.ttf/nohinting/64_median|-35%
BM_subset/subset_codepoints/Amiri-Regular.ttf/nohinting/512_median|-6%
BM_subset/subset_codepoints/Amiri-Regular.ttf/nohinting/4096_median|-1%
BM_subset/subset_codepoints/NotoNastaliqUrdu-Regular.ttf/10_median|-82%
BM_subset/subset_codepoints/NotoNastaliqUrdu-Regular.ttf/64_median|-9%
BM_subset/subset_codepoints/NotoNastaliqUrdu-Regular.ttf/512_median|0%
BM_subset/subset_codepoints/NotoNastaliqUrdu-Regular.ttf/1400_median|0%
BM_subset/subset_codepoints/NotoNastaliqUrdu-Regular.ttf/nohinting/10_median|-82%
BM_subset/subset_codepoints/NotoNastaliqUrdu-Regular.ttf/nohinting/64_median|-13%
BM_subset/subset_codepoints/NotoNastaliqUrdu-Regular.ttf/nohinting/512_median|-3%
BM_subset/subset_codepoints/NotoNastaliqUrdu-Regular.ttf/nohinting/1400_median|2%
BM_subset/subset_codepoints/NotoSansDevanagari-Regular.ttf/10_median|-40%
BM_subset/subset_codepoints/NotoSansDevanagari-Regular.ttf/64_median|-26%
BM_subset/subset_codepoints/NotoSansDevanagari-Regular.ttf/512_median|-5%
BM_subset/subset_codepoints/NotoSansDevanagari-Regular.ttf/1000_median|3%
BM_subset/subset_codepoints/NotoSansDevanagari-Regular.ttf/nohinting/10_median|-43%
BM_subset/subset_codepoints/NotoSansDevanagari-Regular.ttf/nohinting/64_median|-24%
BM_subset/subset_codepoints/NotoSansDevanagari-Regular.ttf/nohinting/512_median|-2%
BM_subset/subset_codepoints/NotoSansDevanagari-Regular.ttf/nohinting/1000_median|2%
BM_subset/subset_codepoints/Mplus1p-Regular.ttf/10_median|-83%
BM_subset/subset_codepoints/Mplus1p-Regular.ttf/64_median|-67%
BM_subset/subset_codepoints/Mplus1p-Regular.ttf/512_median|-39%
BM_subset/subset_codepoints/Mplus1p-Regular.ttf/4096_median|-20%
BM_subset/subset_codepoints/Mplus1p-Regular.ttf/10000_median|-25%
BM_subset/subset_codepoints/Mplus1p-Regular.ttf/nohinting/10_median|-83%
BM_subset/subset_codepoints/Mplus1p-Regular.ttf/nohinting/64_median|-65%
BM_subset/subset_codepoints/Mplus1p-Regular.ttf/nohinting/512_median|-42%
BM_subset/subset_codepoints/Mplus1p-Regular.ttf/nohinting/4096_median|-34%
BM_subset/subset_codepoints/Mplus1p-Regular.ttf/nohinting/10000_median|-21%
BM_subset/subset_codepoints/SourceHanSans-Regular_subset.otf/10_median|-69%
BM_subset/subset_codepoints/SourceHanSans-Regular_subset.otf/64_median|-69%
BM_subset/subset_codepoints/SourceHanSans-Regular_subset.otf/512_median|-70%
BM_subset/subset_codepoints/SourceHanSans-Regular_subset.otf/4096_median|-84%
BM_subset/subset_codepoints/SourceHanSans-Regular_subset.otf/10000_median|-83%
BM_subset/subset_codepoints/SourceHanSans-Regular_subset.otf/nohinting/10_median|-71%
BM_subset/subset_codepoints/SourceHanSans-Regular_subset.otf/nohinting/64_median|-68%
BM_subset/subset_codepoints/SourceHanSans-Regular_subset.otf/nohinting/512_median|-70%
BM_subset/subset_codepoints/SourceHanSans-Regular_subset.otf/nohinting/4096_median|-86%
BM_subset/subset_codepoints/SourceHanSans-Regular_subset.otf/nohinting/10000_median|-88%
BM_subset/subset_codepoints/SourceSansPro-Regular.otf/10_median|-45%
BM_subset/subset_codepoints/SourceSansPro-Regular.otf/64_median|-48%
BM_subset/subset_codepoints/SourceSansPro-Regular.otf/512_median|-57%
BM_subset/subset_codepoints/SourceSansPro-Regular.otf/2000_median|-66%
BM_subset/subset_codepoints/SourceSansPro-Regular.otf/nohinting/10_median|-43%
BM_subset/subset_codepoints/SourceSansPro-Regular.otf/nohinting/64_median|-50%
BM_subset/subset_codepoints/SourceSansPro-Regular.otf/nohinting/512_median|-63%
BM_subset/subset_codepoints/SourceSansPro-Regular.otf/nohinting/2000_median|-72%
BM_subset/subset_codepoints/AdobeVFPrototype.otf/10_median|-69%
BM_subset/subset_codepoints/AdobeVFPrototype.otf/64_median|-66%
BM_subset/subset_codepoints/AdobeVFPrototype.otf/300_median|-74%
BM_subset/subset_codepoints/AdobeVFPrototype.otf/nohinting/10_median|-70%
BM_subset/subset_codepoints/AdobeVFPrototype.otf/nohinting/64_median|-71%
BM_subset/subset_codepoints/AdobeVFPrototype.otf/nohinting/300_median|-75%
BM_subset/subset_codepoints/MPLUS1-Variable.ttf/10_median|-66%
BM_subset/subset_codepoints/MPLUS1-Variable.ttf/64_median|-46%
BM_subset/subset_codepoints/MPLUS1-Variable.ttf/512_median|-15%
BM_subset/subset_codepoints/MPLUS1-Variable.ttf/4096_median|-5%
BM_subset/subset_codepoints/MPLUS1-Variable.ttf/6000_median|-16%
BM_subset/subset_codepoints/MPLUS1-Variable.ttf/nohinting/10_median|-66%
BM_subset/subset_codepoints/MPLUS1-Variable.ttf/nohinting/64_median|-45%
BM_subset/subset_codepoints/MPLUS1-Variable.ttf/nohinting/512_median|-14%
BM_subset/subset_codepoints/MPLUS1-Variable.ttf/nohinting/4096_median|-11%
BM_subset/subset_codepoints/MPLUS1-Variable.ttf/nohinting/6000_median|-27%
BM_subset/subset_codepoints/RobotoFlex-Variable.ttf/10_median|-38%
BM_subset/subset_codepoints/RobotoFlex-Variable.ttf/64_median|-9%
BM_subset/subset_codepoints/RobotoFlex-Variable.ttf/512_median|-3%
BM_subset/subset_codepoints/RobotoFlex-Variable.ttf/900_median|-16%
BM_subset/subset_codepoints/RobotoFlex-Variable.ttf/nohinting/10_median|-39%
BM_subset/subset_codepoints/RobotoFlex-Variable.ttf/nohinting/64_median|-12%
BM_subset/subset_codepoints/RobotoFlex-Variable.ttf/nohinting/512_median|-4%
BM_subset/subset_codepoints/RobotoFlex-Variable.ttf/nohinting/900_median|-2%
BM_subset/instance/MPLUS1-Variable.ttf/10_median|-68%
BM_subset/instance/MPLUS1-Variable.ttf/64_median|-45%
BM_subset/instance/MPLUS1-Variable.ttf/512_median|-18%
BM_subset/instance/MPLUS1-Variable.ttf/4096_median|-2%
BM_subset/instance/MPLUS1-Variable.ttf/6000_median|4%
BM_subset/instance/MPLUS1-Variable.ttf/nohinting/10_median|-69%
BM_subset/instance/MPLUS1-Variable.ttf/nohinting/64_median|-46%
BM_subset/instance/MPLUS1-Variable.ttf/nohinting/512_median|-11%
BM_subset/instance/MPLUS1-Variable.ttf/nohinting/4096_median|4%
BM_subset/instance/MPLUS1-Variable.ttf/nohinting/6000_median|-5%
BM_subset/instance/RobotoFlex-Variable.ttf/10_median|-34%
BM_subset/instance/RobotoFlex-Variable.ttf/64_median|-12%
BM_subset/instance/RobotoFlex-Variable.ttf/512_median|6%
BM_subset/instance/RobotoFlex-Variable.ttf/900_median|-6%
BM_subset/instance/RobotoFlex-Variable.ttf/nohinting/10_median|-33%
BM_subset/instance/RobotoFlex-Variable.ttf/nohinting/64_median|-11%
BM_subset/instance/RobotoFlex-Variable.ttf/nohinting/512_median|3%
BM_subset/instance/RobotoFlex-Variable.ttf/nohinting/900_median|0%

View File

@@ -0,0 +1,412 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<chapter id="buffers-language-script-and-direction">
<title>Buffers, language, script and direction</title>
<para>
The input to the HarfBuzz shaper is a series of Unicode characters, stored in a
buffer. In this chapter, we'll look at how to set up a buffer with
the text that we want and how to customize the properties of the
buffer. We'll also look at a piece of lower-level machinery that
you will need to understand before proceeding: the functions that
HarfBuzz uses to retrieve Unicode information.
</para>
<para>
After shaping is complete, HarfBuzz puts its output back
into the buffer. But getting that output requires setting up a
face and a font first, so we will look at that in the next chapter
instead of here.
</para>
<section id="creating-and-destroying-buffers">
<title>Creating and destroying buffers</title>
<para>
As we saw in our <emphasis>Getting Started</emphasis> example, a
buffer is created and
initialized with <function>hb_buffer_create()</function>. This
produces a new, empty buffer object, instantiated with some
default values and ready to accept your Unicode strings.
</para>
<para>
HarfBuzz manages the memory of objects (such as buffers) that it
creates, so you don't have to. When you have finished working on
a buffer, you can call <function>hb_buffer_destroy()</function>:
</para>
<programlisting language="C">
hb_buffer_t *buf = hb_buffer_create();
...
hb_buffer_destroy(buf);
</programlisting>
<para>
This will destroy the object and free its associated memory -
unless some other part of the program holds a reference to this
buffer. If you acquire a HarfBuzz buffer from another subsystem
and want to ensure that it is not garbage collected by someone
else destroying it, you should increase its reference count:
</para>
<programlisting language="C">
void somefunc(hb_buffer_t *buf) {
buf = hb_buffer_reference(buf);
...
</programlisting>
<para>
And then decrease it once you're done with it:
</para>
<programlisting language="C">
hb_buffer_destroy(buf);
}
</programlisting>
<para>
While we are on the subject of reference-counting buffers, it is
worth noting that an individual buffer can only meaningfully be
used by one thread at a time.
</para>
<para>
To throw away all the data in your buffer and start from scratch,
call <function>hb_buffer_reset(buf)</function>. If you want to
throw away the string in the buffer but keep the options, you can
instead call <function>hb_buffer_clear_contents(buf)</function>.
</para>
</section>
<section id="adding-text-to-the-buffer">
<title>Adding text to the buffer</title>
<para>
Now we have a brand new HarfBuzz buffer. Let's start filling it
with text! From HarfBuzz's perspective, a buffer is just a stream
of Unicode code points, but your input string is probably in one of
the standard Unicode character encodings (UTF-8, UTF-16, or
UTF-32). HarfBuzz provides convenience functions that accept
each of these encodings:
<function>hb_buffer_add_utf8()</function>,
<function>hb_buffer_add_utf16()</function>, and
<function>hb_buffer_add_utf32()</function>. Other than the
character encoding they accept, they function identically.
</para>
<para>
You can add UTF-8 text to a buffer by passing in the text array,
the array's length, an offset into the array for the first
character to add, and the length of the segment to add:
</para>
<programlisting language="C">
hb_buffer_add_utf8 (hb_buffer_t *buf,
const char *text,
int text_length,
unsigned int item_offset,
int item_length)
</programlisting>
<para>
So, in practice, you can say:
</para>
<programlisting language="C">
hb_buffer_add_utf8(buf, text, strlen(text), 0, strlen(text));
</programlisting>
<para>
This will append your new characters to
<parameter>buf</parameter>, not replace its existing
contents. Also, note that you can use <literal>-1</literal> in
place of the first instance of <function>strlen(text)</function>
if your text array is NULL-terminated. Similarly, you can also use
<literal>-1</literal> as the final argument want to add its full
contents.
</para>
<para>
Whatever start <parameter>item_offset</parameter> and
<parameter>item_length</parameter> you provide, HarfBuzz will also
attempt to grab the five characters <emphasis>before</emphasis>
the offset point and the five characters
<emphasis>after</emphasis> the designated end. These are the
before and after "context" segments, which are used internally
for HarfBuzz to make shaping decisions. They will not be part of
the final output, but they ensure that HarfBuzz's
script-specific shaping operations are correct. If there are
fewer than five characters available for the before or after
contexts, HarfBuzz will just grab what is there.
</para>
<para>
For longer text runs, such as full paragraphs, it might be
tempting to only add smaller sub-segments to a buffer and
shape them in piecemeal fashion. Generally, this is not a good
idea, however, because a lot of shaping decisions are
dependent on this context information. For example, in Arabic
and other connected scripts, HarfBuzz needs to know the code
points before and after each character in order to correctly
determine which glyph to return.
</para>
<para>
The safest approach is to add all of the text available (even
if your text contains a mix of scripts, directions, languages
and fonts), then use <parameter>item_offset</parameter> and
<parameter>item_length</parameter> to indicate which characters you
want shaped (which must all have the same script, direction,
language and font), so that HarfBuzz has access to any context.
</para>
<para>
You can also add Unicode code points directly with
<function>hb_buffer_add_codepoints()</function>. The arguments
to this function are the same as those for the UTF
encodings. But it is particularly important to note that
HarfBuzz does not do validity checking on the text that is added
to a buffer. Invalid code points will be replaced, but it is up
to you to do any deep-sanity checking necessary.
</para>
</section>
<section id="setting-buffer-properties">
<title>Setting buffer properties</title>
<para>
Buffers containing input characters still need several
properties set before HarfBuzz can shape their text correctly.
</para>
<para>
Initially, all buffers are set to the
<literal>HB_BUFFER_CONTENT_TYPE_INVALID</literal> content
type. After adding text, the buffer should be set to
<literal>HB_BUFFER_CONTENT_TYPE_UNICODE</literal> instead, which
indicates that it contains un-shaped input
characters. After shaping, the buffer will have the
<literal>HB_BUFFER_CONTENT_TYPE_GLYPHS</literal> content type.
</para>
<para>
<function>hb_buffer_add_utf8()</function> and the
other UTF functions set the content type of their buffer
automatically. But if you are reusing a buffer you may want to
check its state with
<function>hb_buffer_get_content_type(buffer)</function>. If
necessary you can set the content type with
</para>
<programlisting language="C">
hb_buffer_set_content_type(buf, HB_BUFFER_CONTENT_TYPE_UNICODE);
</programlisting>
<para>
to prepare for shaping.
</para>
<para>
Buffers also need to carry information about the script,
language, and text direction of their contents. You can set
these properties individually:
</para>
<programlisting language="C">
hb_buffer_set_direction(buf, HB_DIRECTION_LTR);
hb_buffer_set_script(buf, HB_SCRIPT_LATIN);
hb_buffer_set_language(buf, hb_language_from_string("en", -1));
</programlisting>
<para>
However, since these properties are often repeated for
multiple text runs, you can also save them in a
<literal>hb_segment_properties_t</literal> for reuse:
</para>
<programlisting language="C">
hb_segment_properties_t *savedprops;
hb_buffer_get_segment_properties (buf, savedprops);
...
hb_buffer_set_segment_properties (buf2, savedprops);
</programlisting>
<para>
HarfBuzz also provides getter functions to retrieve a buffer's
direction, script, and language properties individually.
</para>
<para>
HarfBuzz recognizes four text directions in
<type>hb_direction_t</type>: left-to-right
(<literal>HB_DIRECTION_LTR</literal>), right-to-left (<literal>HB_DIRECTION_RTL</literal>),
top-to-bottom (<literal>HB_DIRECTION_TTB</literal>), and
bottom-to-top (<literal>HB_DIRECTION_BTT</literal>). For the
script property, HarfBuzz uses identifiers based on the
<ulink
url="https://unicode.org/iso15924/">ISO 15924
standard</ulink>. For languages, HarfBuzz uses tags based on the
<ulink url="https://tools.ietf.org/html/bcp47">IETF BCP 47</ulink> standard.
</para>
<para>
Helper functions are provided to convert character strings into
the necessary script and language tag types.
</para>
<para>
Two additional buffer properties to be aware of are the
"invisible glyph" and the replacement code point. The
replacement code point is inserted into buffer output in place of
any invalid code points encountered in the input. By default, it
is the Unicode <literal>REPLACEMENT CHARACTER</literal> code
point, <literal>U+FFFD</literal> "&#xFFFD;". You can change this with
</para>
<programlisting language="C">
hb_buffer_set_replacement_codepoint(buf, replacement);
</programlisting>
<para>
passing in the replacement Unicode code point as the
<parameter>replacement</parameter> parameter.
</para>
<para>
The invisible glyph is used to replace all output glyphs that
are invisible. By default, the standard space character
<literal>U+0020</literal> is used; you can replace this (for
example, when using a font that provides script-specific
spaces) with
</para>
<programlisting language="C">
hb_buffer_set_invisible_glyph(buf, replacement_glyph);
</programlisting>
<para>
Do note that in the <parameter>replacement_glyph</parameter>
parameter, you must provide the glyph ID of the replacement you
wish to use, not the Unicode code point.
</para>
<para>
HarfBuzz supports a few additional flags you might want to set
on your buffer under certain circumstances. The
<literal>HB_BUFFER_FLAG_BOT</literal> and
<literal>HB_BUFFER_FLAG_EOT</literal> flags tell HarfBuzz
that the buffer represents the beginning or end (respectively)
of a text element (such as a paragraph or other block). Knowing
this allows HarfBuzz to apply certain contextual font features
when shaping, such as initial or final variants in connected
scripts.
</para>
<para>
<literal>HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES</literal>
tells HarfBuzz not to hide glyphs with the
<literal>Default_Ignorable</literal> property in Unicode. This
property designates control characters and other non-printing
code points, such as joiners and variation selectors. Normally
HarfBuzz replaces them in the output buffer with zero-width
space glyphs (using the "invisible glyph" property discussed
above); setting this flag causes them to be printed, which can
be helpful for troubleshooting.
</para>
<para>
Conversely, setting the
<literal>HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES</literal> flag
tells HarfBuzz to remove <literal>Default_Ignorable</literal>
glyphs from the output buffer entirely. Finally, setting the
<literal>HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE</literal>
flag tells HarfBuzz not to insert the dotted-circle glyph
(<literal>U+25CC</literal>, "&#x25CC;"), which is normally
inserted into buffer output when broken character sequences are
encountered (such as combining marks that are not attached to a
base character).
</para>
</section>
<section id="customizing-unicode-functions">
<title>Customizing Unicode functions</title>
<para>
HarfBuzz requires some simple functions for accessing
information from the Unicode Character Database (such as the
<literal>General_Category</literal> (gc) and
<literal>Script</literal> (sc) properties) that is useful
for shaping, as well as some useful operations like composing and
decomposing code points.
</para>
<para>
HarfBuzz includes its own internal, lightweight set of Unicode
functions. At build time, it is also possible to compile support
for some other options, such as the Unicode functions provided
by GLib or the International Components for Unicode (ICU)
library. Generally, this option is only of interest for client
programs that have specific integration requirements or that do
a significant amount of customization.
</para>
<para>
If your program has access to other Unicode functions, however,
such as through a system library or application framework, you
might prefer to use those instead of the built-in
options. HarfBuzz supports this by implementing its Unicode
functions as a set of virtual methods that you can replace —
without otherwise affecting HarfBuzz's functionality.
</para>
<para>
The Unicode functions are specified in a structure called
<literal>unicode_funcs</literal> which is attached to each
buffer. But even though <literal>unicode_funcs</literal> is
associated with a <type>hb_buffer_t</type>, the functions
themselves are called by other HarfBuzz APIs that access
buffers, so it would be unwise for you to hook different
functions into different buffers.
</para>
<para>
In addition, you can mark your <literal>unicode_funcs</literal>
as immutable by calling
<function>hb_unicode_funcs_make_immutable (ufuncs)</function>.
This is especially useful if your code is a
library or framework that will have its own client programs. By
marking your Unicode function choices as immutable, you prevent
your own client programs from changing the
<literal>unicode_funcs</literal> configuration and introducing
inconsistencies and errors downstream.
</para>
<para>
You can retrieve the Unicode-functions configuration for
your buffer by calling <function>hb_buffer_get_unicode_funcs()</function>:
</para>
<programlisting language="C">
hb_unicode_funcs_t *ufunctions;
ufunctions = hb_buffer_get_unicode_funcs(buf);
</programlisting>
<para>
The current version of <literal>unicode_funcs</literal> uses six functions:
</para>
<itemizedlist>
<listitem>
<para>
<function>hb_unicode_combining_class_func_t</function>:
returns the Canonical Combining Class of a code point.
</para>
</listitem>
<listitem>
<para>
<function>hb_unicode_general_category_func_t</function>:
returns the General Category (gc) of a code point.
</para>
</listitem>
<listitem>
<para>
<function>hb_unicode_mirroring_func_t</function>: returns
the Mirroring Glyph code point (for bi-directional
replacement) of a code point.
</para>
</listitem>
<listitem>
<para>
<function>hb_unicode_script_func_t</function>: returns the
Script (sc) property of a code point.
</para>
</listitem>
<listitem>
<para>
<function>hb_unicode_compose_func_t</function>: returns the
canonical composition of a sequence of two code points.
</para>
</listitem>
<listitem>
<para>
<function>hb_unicode_decompose_func_t</function>: returns
the canonical decomposition of a code point.
</para>
</listitem>
</itemizedlist>
<para>
Note, however, that future HarfBuzz releases may alter this set.
</para>
<para>
Each Unicode function has a corresponding setter, with which you
can assign a callback to your replacement function. For example,
to replace
<function>hb_unicode_general_category_func_t</function>, you can call
</para>
<programlisting language="C">
hb_unicode_funcs_set_general_category_func (*ufuncs, func, *user_data, destroy)
</programlisting>
<para>
Virtualizing this set of Unicode functions is primarily intended
to improve portability. There is no need for every client
program to make the effort to replace the default options, so if
you are unsure, do not feel any pressure to customize
<literal>unicode_funcs</literal>.
</para>
</section>
</chapter>

View File

@@ -0,0 +1,701 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<chapter id="clusters">
<title>Clusters</title>
<section id="clusters-and-shaping">
<title>Clusters and shaping</title>
<para>
In text shaping, a <emphasis>cluster</emphasis> is a sequence of
characters that needs to be treated as a single, indivisible
unit. A single letter or symbol can be a cluster of its
own. Other clusters correspond to longer subsequences of the
input code points &mdash; such as a ligature or conjunct form
&mdash; and require the shaper to ensure that the cluster is not
broken during the shaping process.
</para>
<para>
A cluster is distinct from a <emphasis>grapheme</emphasis>,
which is the smallest unit of meaning in a writing system or
script.
</para>
<para>
The definitions of the two terms are similar. However, clusters
are only relevant for script shaping and glyph layout. In
contrast, graphemes are a property of the underlying script, and
are of interest when client programs implement orthographic
or linguistic functionality.
</para>
<para>
For example, two individual letters are often two separate
graphemes. When two letters form a ligature, however, they
combine into a single glyph. They are then part of the same
cluster and are treated as a unit by the shaping engine &mdash;
even though the two original, underlying letters remain separate
graphemes.
</para>
<para>
HarfBuzz is concerned with clusters, <emphasis>not</emphasis>
with graphemes &mdash; although client programs using HarfBuzz
may still care about graphemes for other reasons from time to time.
</para>
<para>
During the shaping process, there are several shaping operations
that may merge adjacent characters (for example, when two code
points form a ligature or a conjunct form and are replaced by a
single glyph) or split one character into several (for example,
when decomposing a code point through the
<literal>ccmp</literal> feature). Operations like these alter
clusters; HarfBuzz tracks the changes to ensure that no clusters
get lost or broken during shaping.
</para>
<para>
HarfBuzz records cluster information independently from how
shaping operations affect the individual glyphs returned in an
output buffer. Consequently, a client program using HarfBuzz can
utilize the cluster information to implement features such as:
</para>
<itemizedlist>
<listitem>
<para>
Correctly positioning the cursor within a shaped text run,
even when characters have formed ligatures, composed or
decomposed, reordered, or undergone other shaping operations.
</para>
</listitem>
<listitem>
<para>
Correctly highlighting a text selection that includes some,
but not all, of the characters in a word.
</para>
</listitem>
<listitem>
<para>
Applying text attributes (such as color or underlining) to
part, but not all, of a word.
</para>
</listitem>
<listitem>
<para>
Generating output document formats (such as PDF) with
embedded text that can be fully extracted.
</para>
</listitem>
<listitem>
<para>
Determining the mapping between input characters and output
glyphs, such as which glyphs are ligatures.
</para>
</listitem>
<listitem>
<para>
Performing line-breaking, justification, and other
line-level or paragraph-level operations that must be done
after shaping is complete, but which require examining
character-level properties.
</para>
</listitem>
</itemizedlist>
</section>
<section id="working-with-harfbuzz-clusters">
<title>Working with HarfBuzz clusters</title>
<para>
When you add text to a HarfBuzz buffer, each code point must be
assigned a <emphasis>cluster value</emphasis>.
</para>
<para>
This cluster value is an arbitrary number; HarfBuzz uses it only
to distinguish between clusters. Many client programs will use
the index of each code point in the input text stream as the
cluster value. This is for the sake of convenience; the actual
value does not matter.
</para>
<para>
Some of the shaping operations performed by HarfBuzz &mdash;
such as reordering, composition, decomposition, and substitution
&mdash; may alter the cluster values of some characters. The
final cluster values in the buffer at the end of the shaping
process will indicate to client programs which subsequences of
glyphs represent a cluster and, therefore, must not be
separated.
</para>
<para>
In addition, client programs can query the final cluster values
to discern other potentially important information about the
glyphs in the output buffer (such as whether or not a ligature
was formed).
</para>
<para>
For example, if the initial sequence of cluster values was:
</para>
<programlisting>
0,1,2,3,4
</programlisting>
<para>
and the final sequence of cluster values is:
</para>
<programlisting>
0,0,3,3
</programlisting>
<para>
then there are two clusters in the output buffer: the first
cluster includes the first two glyphs, and the second cluster
includes the third and fourth glyphs. It is also evident that a
ligature or conjunct has been formed, because there are fewer
glyphs in the output buffer (four) than there were code points
in the input buffer (five).
</para>
<para>
Although client programs using HarfBuzz are free to assign
initial cluster values in any manner they choose to, HarfBuzz
does offer some useful guarantees if the cluster values are
assigned in a monotonic (either non-decreasing or non-increasing)
order.
</para>
<para>
For buffers in the left-to-right (LTR)
or top-to-bottom (TTB) text flow direction,
HarfBuzz will preserve the monotonic property: client programs
are guaranteed that monotonically increasing initial cluster
values will be returned as monotonically increasing final
cluster values.
</para>
<para>
For buffers in the right-to-left (RTL)
or bottom-to-top (BTT) text flow direction,
the directionality of the buffer itself is reversed for final
output as a matter of design. Therefore, HarfBuzz inverts the
monotonic property: client programs are guaranteed that
monotonically increasing initial cluster values will be
returned as monotonically <emphasis>decreasing</emphasis> final
cluster values.
</para>
<para>
Client programs can adjust how HarfBuzz handles clusters during
shaping by setting the
<literal>cluster_level</literal> of the
buffer. HarfBuzz offers three <emphasis>levels</emphasis> of
clustering support for this property:
</para>
<itemizedlist>
<listitem>
<para><emphasis>Level 0</emphasis> is the default.
</para>
<para>
The distinguishing feature of level 0 behavior is that, at
the beginning of processing the buffer, all code points that
are categorized as <emphasis>marks</emphasis>,
<emphasis>modifier symbols</emphasis>, or
<emphasis>Emoji extended pictographic</emphasis> modifiers,
as well as the <emphasis>Zero Width Joiner</emphasis> and
<emphasis>Zero Width Non-Joiner</emphasis> code points, are
assigned the cluster value of the closest preceding code
point from <emphasis>different</emphasis> category.
</para>
<para>
In essence, whenever a base character is followed by a mark
character or a sequence of mark characters, those marks are
reassigned to the same initial cluster value as the base
character. This reassignment is referred to as
"merging" the affected clusters. This behavior is based on
the Grapheme Cluster Boundary specification in <ulink
url="https://www.unicode.org/reports/tr29/#Regex_Definitions">Unicode
Technical Report 29</ulink>.
</para>
<para>
This cluster level is suitable for code that likes to use
HarfBuzz cluster values as an approximation of the Unicode
Grapheme Cluster Boundaries as well.
</para>
<para>
Client programs can specify level 0 behavior for a buffer by
setting its <literal>cluster_level</literal> to
<literal>HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES</literal>.
</para>
</listitem>
<listitem>
<para>
<emphasis>Level 1</emphasis> tweaks the old behavior
slightly to produce better results. Therefore, level 1
clustering is recommended for code that is not required to
implement backward compatibility with the old HarfBuzz.
</para>
<para>
<emphasis>Level 1</emphasis> differs from level 0 by not merging the
clusters of marks and other modifier code points with the
preceding "base" code point's cluster. By preserving the
separate cluster values of these marks and modifier code
points, script shapers can perform additional operations
that might lead to improved results (for example, coloring
mark glyphs differently than their base).
</para>
<para>
Client programs can specify level 1 behavior for a buffer by
setting its <literal>cluster_level</literal> to
<literal>HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS</literal>.
</para>
</listitem>
<listitem>
<para>
<emphasis>Level 2</emphasis> differs significantly in how it
treats cluster values. In level 2, HarfBuzz never merges
clusters.
</para>
<para>
This difference can be seen most clearly when HarfBuzz processes
ligature substitutions and glyph decompositions. In level 0
and level 1, ligatures and glyph decomposition both involve
merging clusters; in level 2, neither of these operations
triggers a merge.
</para>
<para>
Client programs can specify level 2 behavior for a buffer by
setting its <literal>cluster_level</literal> to
<literal>HB_BUFFER_CLUSTER_LEVEL_CHARACTERS</literal>.
</para>
</listitem>
</itemizedlist>
<para>
As mentioned earlier, client programs using HarfBuzz often
assign initial cluster values in a buffer by reusing the indices
of the code points in the input text. This gives a sequence of
cluster values that is monotonically increasing (for example,
0,1,2,3,4).
</para>
<para>
It is not <emphasis>required</emphasis> that the cluster values
in a buffer be monotonically increasing. However, if the initial
cluster values in a buffer are monotonic and the buffer is
configured to use cluster level 0 or 1, then HarfBuzz
guarantees that the final cluster values in the shaped buffer
will also be monotonic. No such guarantee is made for cluster
level 2.
</para>
<para>
In levels 0 and 1, HarfBuzz implements the following conceptual
model for cluster values:
</para>
<itemizedlist spacing="compact">
<listitem>
<para>
If the sequence of input cluster values is monotonic, the
sequence of cluster values will remain monotonic.
</para>
</listitem>
<listitem>
<para>
Each cluster value represents a single cluster.
</para>
</listitem>
<listitem>
<para>
Each cluster contains one or more glyphs and one or more
characters.
</para>
</listitem>
</itemizedlist>
<para>
In practice, this model offers several benefits. Assuming that
the initial cluster values were monotonically increasing
and distinct before shaping began, then, in the final output:
</para>
<itemizedlist spacing="compact">
<listitem>
<para>
All adjacent glyphs having the same final cluster
value belong to the same cluster.
</para>
</listitem>
<listitem>
<para>
Each character belongs to the cluster that has the highest
cluster value <emphasis>not larger than</emphasis> its
initial cluster value.
</para>
</listitem>
</itemizedlist>
</section>
<section id="a-clustering-example-for-levels-0-and-1">
<title>A clustering example for levels 0 and 1</title>
<para>
The basic shaping operations affect clusters in a predictable
manner when using level 0 or level 1:
</para>
<itemizedlist>
<listitem>
<para>
When two or more clusters <emphasis>merge</emphasis>, the
resulting merged cluster takes as its cluster value the
<emphasis>minimum</emphasis> of the incoming cluster values.
</para>
</listitem>
<listitem>
<para>
When a cluster <emphasis>decomposes</emphasis>, all of the
resulting child clusters inherit as their cluster value the
cluster value of the parent cluster.
</para>
</listitem>
<listitem>
<para>
When a character is <emphasis>reordered</emphasis>, the
reordered character and all clusters that the character
moves past as part of the reordering are merged into one cluster.
</para>
</listitem>
</itemizedlist>
<para>
The functionality, guarantees, and benefits of level 0 and level
1 behavior can be seen with some examples. First, let us examine
what happens with cluster values when shaping involves cluster
merging with ligatures and decomposition.
</para>
<para>
Let's say we start with the following character sequence (top row) and
initial cluster values (bottom row):
</para>
<programlisting>
A,B,C,D,E
0,1,2,3,4
</programlisting>
<para>
During shaping, HarfBuzz maps these characters to glyphs from
the font. For simplicity, let us assume that each character maps
to the corresponding, identical-looking glyph:
</para>
<programlisting>
A,B,C,D,E
0,1,2,3,4
</programlisting>
<para>
Now if, for example, <literal>B</literal> and <literal>C</literal>
form a ligature, then the clusters to which they belong
&quot;merge&quot;. This merged cluster takes for its cluster
value the minimum of all the cluster values of the clusters that
went in to the ligature. In this case, we get:
</para>
<programlisting>
A,BC,D,E
0,1 ,3,4
</programlisting>
<para>
because 1 is the minimum of the set {1,2}, which were the
cluster values of <literal>B</literal> and
<literal>C</literal>.
</para>
<para>
Next, let us say that the <literal>BC</literal> ligature glyph
decomposes into three components, and <literal>D</literal> also
decomposes into two components. Whenever a cluster decomposes,
its components each inherit the cluster value of their parent:
</para>
<programlisting>
A,BC0,BC1,BC2,D0,D1,E
0,1 ,1 ,1 ,3 ,3 ,4
</programlisting>
<para>
Next, if <literal>BC2</literal> and <literal>D0</literal> form a
ligature, then their clusters (cluster values 1 and 3) merge into
<literal>min(1,3) = 1</literal>:
</para>
<programlisting>
A,BC0,BC1,BC2D0,D1,E
0,1 ,1 ,1 ,1 ,4
</programlisting>
<para>
Note that the entirety of cluster 3 merges into cluster 1, not
just the <literal>D0</literal> glyph. This reflects the fact
that the cluster <emphasis>must</emphasis> be treated as an
indivisible unit.
</para>
<para>
At this point, cluster 1 means: the character sequence
<literal>BCD</literal> is represented by glyphs
<literal>BC0,BC1,BC2D0,D1</literal> and cannot be broken down any
further.
</para>
</section>
<section id="reordering-in-levels-0-and-1">
<title>Reordering in levels 0 and 1</title>
<para>
Another common operation in some shapers is glyph
reordering. In order to maintain a monotonic cluster sequence
when glyph reordering takes place, HarfBuzz merges the clusters
of everything in the reordering sequence.
</para>
<para>
For example, let us again start with the character sequence (top
row) and initial cluster values (bottom row):
</para>
<programlisting>
A,B,C,D,E
0,1,2,3,4
</programlisting>
<para>
If <literal>D</literal> is reordered to the position immediately
before <literal>B</literal>, then HarfBuzz merges the
<literal>B</literal>, <literal>C</literal>, and
<literal>D</literal> clusters &mdash; all the clusters between
the final position of the reordered glyph and its original
position. This means that we get:
</para>
<programlisting>
A,D,B,C,E
0,1,1,1,4
</programlisting>
<para>
as the final cluster sequence.
</para>
<para>
Merging this many clusters is not ideal, but it is the only
sensible way for HarfBuzz to maintain the guarantee that the
sequence of cluster values remains monotonic and to retain the
true relationship between glyphs and characters.
</para>
</section>
<section id="the-distinction-between-levels-0-and-1">
<title>The distinction between levels 0 and 1</title>
<para>
The preceding examples demonstrate the main effects of using
cluster levels 0 and 1. The only difference between the two
levels is this: in level 0, at the very beginning of the shaping
process, HarfBuzz merges the cluster of each base character
with the clusters of all Unicode marks (combining or not) and
modifiers that follow it.
</para>
<para>
For example, let us start with the following character sequence
(top row) and accompanying initial cluster values (bottom row):
</para>
<programlisting>
A,acute,B
0,1 ,2
</programlisting>
<para>
The <literal>acute</literal> is a Unicode mark. If HarfBuzz is
using cluster level 0 on this sequence, then the
<literal>A</literal> and <literal>acute</literal> clusters will
merge, and the result will become:
</para>
<programlisting>
A,acute,B
0,0 ,2
</programlisting>
<para>
This merger is performed before any other script-shaping
steps.
</para>
<para>
This initial cluster merging is the default behavior of the
Windows shaping engine, and the old HarfBuzz codebase copied
that behavior to maintain compatibility. Consequently, it has
remained the default behavior in the new HarfBuzz codebase.
</para>
<para>
But this initial cluster-merging behavior makes it impossible
for client programs to implement some features (such as to
color diacritic marks differently from their base
characters). That is why, in level 1, HarfBuzz does not perform
the initial merging step.
</para>
<para>
For client programs that rely on HarfBuzz cluster values to
perform cursor positioning, level 0 is more convenient. But
relying on cluster boundaries for cursor positioning is wrong: cursor
positions should be determined based on Unicode grapheme
boundaries, not on shaping-cluster boundaries. As such, using
level 1 clustering behavior is recommended.
</para>
<para>
One final facet of levels 0 and 1 is worth noting. HarfBuzz
currently does not allow any
<emphasis>multiple-substitution</emphasis> GSUB lookups to
replace a glyph with zero glyphs (in other words, to delete a
glyph).
</para>
<para>
But, in some other situations, glyphs can be deleted. In
those cases, if the glyph being deleted is the last glyph of its
cluster, HarfBuzz makes sure to merge the deleted glyph's
cluster with a neighboring cluster.
</para>
<para>
This is done primarily to make sure that the starting cluster of the
text always has the cluster index pointing to the start of the text
for the run; more than one client program currently relies on this
guarantee.
</para>
<para>
Incidentally, Apple's CoreText does something different to
maintain the same promise: it inserts a glyph with id 65535 at
the beginning of the glyph string if the glyph corresponding to
the first character in the run was deleted. HarfBuzz might do
something similar in the future.
</para>
</section>
<section id="level-2">
<title>Level 2</title>
<para>
HarfBuzz's level 2 cluster behavior uses a significantly
different model than that of level 0 and level 1.
</para>
<para>
The level 2 behavior is easy to describe, but it may be
difficult to understand in practical terms. In brief, level 2
performs no merging of clusters whatsoever.
</para>
<para>
This means that there is no initial base-and-mark merging step
(as is done in level 0), and it means that reordering moves and
ligature substitutions do not trigger a cluster merge.
</para>
<para>
Only one shaping operation directly affects clusters when using
level 2:
</para>
<itemizedlist>
<listitem>
<para>
When a cluster <emphasis>decomposes</emphasis>, all of the
resulting child clusters inherit as their cluster value the
cluster value of the parent cluster.
</para>
</listitem>
</itemizedlist>
<para>
When glyphs do form a ligature (or when some other feature
substitutes multiple glyphs with one glyph) the cluster value
of the first glyph is retained as the cluster value for the
resulting ligature.
</para>
<para>
This occurrence sounds similar to a cluster merge, but it is
different. In particular, no subsequent characters &mdash;
including marks and modifiers &mdash; are affected. They retain
their previous cluster values.
</para>
<para>
Level 2 cluster behavior is ultimately less complex than level 0
or level 1, but there are several cases for which processing
cluster values produced at level 2 may be tricky.
</para>
<section id="ligatures-with-combining-marks-in-level-2">
<title>Ligatures with combining marks in level 2</title>
<para>
The first example of how HarfBuzz's level 2 cluster behavior
can be tricky is when the text to be shaped includes combining
marks attached to ligatures.
</para>
<para>
Let us start with an input sequence with the following
characters (top row) and initial cluster values (bottom row):
</para>
<programlisting>
A,acute,B,breve,C,circumflex
0,1 ,2,3 ,4,5
</programlisting>
<para>
If the sequence <literal>A,B,C</literal> forms a ligature,
then these are the cluster values HarfBuzz will return under
the various cluster levels:
</para>
<para>
Level 0:
</para>
<programlisting>
ABC,acute,breve,circumflex
0 ,0 ,0 ,0
</programlisting>
<para>
Level 1:
</para>
<programlisting>
ABC,acute,breve,circumflex
0 ,0 ,0 ,5
</programlisting>
<para>
Level 2:
</para>
<programlisting>
ABC,acute,breve,circumflex
0 ,1 ,3 ,5
</programlisting>
<para>
Making sense of the level 2 result is the hardest for a client
program, because there is nothing in the cluster values that
indicates that <literal>B</literal> and <literal>C</literal>
formed a ligature with <literal>A</literal>.
</para>
<para>
In contrast, the "merged" cluster values of the mark glyphs
that are seen in the level 0 and level 1 output are evidence
that a ligature substitution took place.
</para>
</section>
<section id="reordering-in-level-2">
<title>Reordering in level 2</title>
<para>
Another example of how HarfBuzz's level 2 cluster behavior
can be tricky is when glyphs reorder. Consider an input sequence
with the following characters (top row) and initial cluster
values (bottom row):
</para>
<programlisting>
A,B,C,D,E
0,1,2,3,4
</programlisting>
<para>
Now imagine <literal>D</literal> moves before
<literal>B</literal> in a reordering operation. The cluster
values will then be:
</para>
<programlisting>
A,D,B,C,E
0,3,1,2,4
</programlisting>
<para>
Next, if <literal>D</literal> forms a ligature with
<literal>B</literal>, the output is:
</para>
<programlisting>
A,DB,C,E
0,3 ,2,4
</programlisting>
<para>
However, in a different scenario, in which the shaping rules
of the script instead caused <literal>A</literal> and
<literal>B</literal> to form a ligature
<emphasis>before</emphasis> the <literal>D</literal> reordered, the
result would be:
</para>
<programlisting>
AB,D,C,E
0 ,3,2,4
</programlisting>
<para>
There is no way for a client program to differentiate between
these two scenarios based on the cluster values
alone. Consequently, client programs that use level 2 might
need to undertake additional work in order to manage cursor
positioning, text attributes, or other desired features.
</para>
</section>
<section id="other-considerations-in-level-2">
<title>Other considerations in level 2</title>
<para>
There may be other problems encountered with ligatures under
level 2, such as if the direction of the text is forced to
the opposite of its natural direction (for example, Arabic text
that is forced into left-to-right directionality). But,
generally speaking, these other scenarios are minor corner
cases that are too obscure for most client programs to need to
worry about.
</para>
</section>
</section>
</chapter>

View File

@@ -0,0 +1,518 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<chapter id="fonts-and-faces">
<title>Fonts, faces, and output</title>
<para>
In the previous chapter, we saw how to set up a buffer and fill
it with text as Unicode code points. In order to shape this
buffer text with HarfBuzz, you will need also need a font
object.
</para>
<para>
HarfBuzz provides abstractions to help you cache and reuse the
heavier parts of working with binary fonts, so we will look at
how to do that. We will also look at how to work with the
FreeType font-rendering library and at how you can customize
HarfBuzz to work with other libraries.
</para>
<para>
Finally, we will look at how to work with OpenType variable
fonts, the latest update to the OpenType font format, and at
some other recent additions to OpenType.
</para>
<section id="fonts-and-faces-objects">
<title>Font and face objects</title>
<para>
The outcome of shaping a run of text depends on the contents of
a specific font file (such as the substitutions and positioning
moves in the 'GSUB' and 'GPOS' tables), so HarfBuzz makes
accessing those internals fast.
</para>
<para>
An <type>hb_face_t</type> represents a <emphasis>face</emphasis>
in HarfBuzz. This data type is a wrapper around an
<type>hb_blob_t</type> blob that holds the contents of a binary
font file. Since HarfBuzz supports TrueType Collections and
OpenType Collections (each of which can include multiple
typefaces), a HarfBuzz face also requires an index number
specifying which typeface in the file you want to use. Most of
the font files you will encounter in the wild include just a
single face, however, so most of the time you would pass in
<literal>0</literal> as the index when you create a face:
</para>
<programlisting language="C">
hb_blob_t* blob = hb_blob_create_from_file(file);
...
hb_face_t* face = hb_face_create(blob, 0);
</programlisting>
<para>
On its own, a face object is not quite ready to use for
shaping. The typeface must be set to a specific point size in
order for some details (such as hinting) to work. In addition,
if the font file in question is an OpenType Variable Font, then
you may need to specify one or more variation-axis settings (or a
named instance) in order to get the output you need.
</para>
<para>
In HarfBuzz, you do this by creating a <emphasis>font</emphasis>
object from your face.
</para>
<para>
Font objects also have the advantage of being considerably
lighter-weight than face objects (remember that a face contains
the contents of a binary font file mapped into memory). As a
result, you can cache and reuse a font object, but you could
also create a new one for each additional size you needed.
Creating new fonts incurs some additional overhead, of course,
but whether or not it is excessive is your call in the end. In
contrast, face objects are substantially larger, and you really
should cache them and reuse them whenever possible.
</para>
<para>
You can create a font object from a face object:
</para>
<programlisting language="C">
hb_font_t* hb_font = hb_font_create(hb_face);
</programlisting>
<para>
After creating a font, there are a few properties you should
set. Many fonts enable and disable hints based on the size it
is used at, so setting this is important for font
objects. <function>hb_font_set_ppem(font, x_ppem,
y_ppem)</function> sets the pixels-per-EM value of the font. You
can also set the point size of the font with
<function>hb_font_set_ptem(font, ptem)</function>. HarfBuzz uses the
industry standard 72 points per inch.
</para>
<para>
HarfBuzz lets you specify the degree subpixel precision you want
through a scaling factor. You can set horizontal and
vertical scaling factors on the
font by calling <function>hb_font_set_scale(font, x_scale,
y_scale)</function>.
</para>
<para>
There may be times when you are handed a font object and need to
access the face object that it comes from. For that, you can call
</para>
<programlisting language="C">
hb_face = hb_font_get_face(hb_font);
</programlisting>
<para>
You can also create a font object from an existing font object
using the <function>hb_font_create_sub_font()</function>
function. This creates a child font object that is initiated
with the same attributes as its parent; it can be used to
quickly set up a new font for the purpose of overriding a specific
font-functions method.
</para>
<para>
All face objects and font objects are lifecycle-managed by
HarfBuzz. After creating a face, you increase its reference
count with <function>hb_face_reference(face)</function> and
decrease it with
<function>hb_face_destroy(face)</function>. Likewise, you
increase the reference count on a font with
<function>hb_font_reference(font)</function> and decrease it
with <function>hb_font_destroy(font)</function>.
</para>
<para>
You can also attach user data to face objects and font objects.
</para>
</section>
<section id="fonts-and-faces-custom-functions">
<title>Customizing font functions</title>
<para>
During shaping, HarfBuzz frequently needs to query font objects
to get at the contents and parameters of the glyphs in a font
file. It includes a built-in set of functions that is tailored
to working with OpenType fonts. However, as was the case with
Unicode functions in the buffers chapter, HarfBuzz also wants to
make it easy for you to assign a substitute set of font
functions if you are developing a program to work with a library
or platform that provides its own font functions.
</para>
<para>
Therefore, the HarfBuzz API defines a set of virtual
methods for accessing font-object properties, and you can
replace the defaults with your own selections without
interfering with the shaping process. Each font object in
HarfBuzz includes a structure called
<literal>font_funcs</literal> that serves as a vtable for the
font object. The virtual methods in
<literal>font_funcs</literal> are:
</para>
<itemizedlist>
<listitem>
<para>
<function>hb_font_get_font_h_extents_func_t</function>: returns
the extents of the font for horizontal text.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_font_v_extents_func_t</function>: returns
the extents of the font for vertical text.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_nominal_glyph_func_t</function>: returns
the font's nominal glyph for a given code point.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_variation_glyph_func_t</function>: returns
the font's glyph for a given code point when it is followed by a
given Variation Selector.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_nominal_glyphs_func_t</function>: returns
the font's nominal glyphs for a series of code points.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_advance_func_t</function>: returns
the advance for a glyph.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_h_advance_func_t</function>: returns
the advance for a glyph for horizontal text.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_v_advance_func_t</function>:returns
the advance for a glyph for vertical text.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_advances_func_t</function>: returns
the advances for a series of glyphs.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_h_advances_func_t</function>: returns
the advances for a series of glyphs for horizontal text .
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_v_advances_func_t</function>: returns
the advances for a series of glyphs for vertical text.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_origin_func_t</function>: returns
the origin coordinates of a glyph.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_h_origin_func_t</function>: returns
the origin coordinates of a glyph for horizontal text.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_v_origin_func_t</function>: returns
the origin coordinates of a glyph for vertical text.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_extents_func_t</function>: returns
the extents for a glyph.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_contour_point_func_t</function>:
returns the coordinates of a specific contour point from a glyph.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_name_func_t</function>: returns the
name of a glyph (from its glyph index).
</para>
</listitem>
<listitem>
<para>
<function>hb_font_get_glyph_from_name_func_t</function>: returns
the glyph index that corresponds to a given glyph name.
</para>
</listitem>
<listitem>
<para>
<function>hb_font_draw_glyph_func_t</function>: gets the outlines
of a glyph (by calling #hb_draw_funcs_t callbacks).
</para>
</listitem>
<listitem>
<para>
<function>hb_font_paint_glyph_func_t</function>: paints a glyph
(by calling #hb_paint_funcs_t callbacks).
</para>
</listitem>
</itemizedlist>
<para>
You can create new font-functions by calling
<function>hb_font_funcs_create()</function>:
</para>
<programlisting language="C">
hb_font_funcs_t *ffunctions = hb_font_funcs_create ();
hb_font_set_funcs (font, ffunctions, font_data, destroy);
</programlisting>
<para>
The individual methods can each be set with their own setter
function, such as
<function>hb_font_funcs_set_nominal_glyph_func(ffunctions,
func, user_data, destroy)</function>.
</para>
<para>
Font-functions structures can be reused for multiple font
objects, and can be reference counted with
<function>hb_font_funcs_reference()</function> and
<function>hb_font_funcs_destroy()</function>. Just like other
objects in HarfBuzz, you can set user-data for each
font-functions structure and assign a destroy callback for
it.
</para>
<para>
You can also mark a font-functions structure as immutable,
with <function>hb_font_funcs_make_immutable()</function>. This
is especially useful if your code is a library or framework that
will have its own client programs. By marking your
font-functions structures as immutable, you prevent your client
programs from changing the configuration and introducing
inconsistencies and errors downstream.
</para>
<para>
To override only some functions while using the default implementation
for the others, you will need to create a sub-font. By default, the
sub-font uses the font functions of its parent except for the functions
that were explicitly set. The following code will override only the
<function>hb_font_get_nominal_glyph_func_t</function> for the sub-font:
</para>
<programlisting language="C">
hb_font_t *subfont = hb_font_create_sub_font (font)
hb_font_funcs_t *ffunctions = hb_font_funcs_create ();
hb_font_funcs_set_nominal_glyph_func (ffunctions, func, user_data, destroy);
hb_font_set_funcs (subfont, ffunctions, font_data, destroy);
hb_font_funcs_destroy (ffunctions);
</programlisting>
</section>
<section id="fonts-and-faces-native-opentype">
<title>Font objects and HarfBuzz's native OpenType implementation</title>
<para>
By default, whenever HarfBuzz creates a font object, it will
configure the font to use a built-in set of font functions that
supports contemporary OpenType font internals. If you want to
work with OpenType or TrueType fonts, you should be able to use
these functions without difficulty.
</para>
<para>
Many of the methods in the font-functions structure deal with
the fundamental properties of glyphs that are required for
shaping text: extents (the maximums and minimums on each axis),
origins (the <literal>(0,0)</literal> coordinate point which
glyphs are drawn in reference to), and advances (the amount that
the cursor needs to be moved after drawing each glyph, including
any empty space for the glyph's side bearings).
</para>
<para>
As you can see in the list of functions, there are separate "horizontal"
and "vertical" variants depending on whether the text is set in
the horizontal or vertical direction. For some scripts, fonts
that are designed to support text set horizontally or vertically (for
example, in Japanese) may include metrics for both text
directions. When fonts don't include this information, HarfBuzz
does its best to transform what the font provides.
</para>
<para>
In addition to the direction-specific functions, HarfBuzz
provides some higher-level functions for fetching information
like extents and advances for a glyph. If you call
</para>
<programlisting language="C">
hb_font_get_glyph_advance_for_direction(font, direction, extents);
</programlisting>
<para>
then you can provide any <type>hb_direction_t</type> as the
<parameter>direction</parameter> parameter, and HarfBuzz will
use the correct function variant for the text direction. There
are similar higher-level versions of the functions for fetching
extents, origin coordinates, and contour-point
coordinates. There are also addition and subtraction functions
for moving points with respect to the origin.
</para>
<para>
There are also methods for fetching the glyph ID that
corresponds to a Unicode code point (possibly when followed by a
variation-selector code point), fetching the glyph name from the
font, and fetching the glyph ID that corresponds to a glyph name
you already have.
</para>
<para>
HarfBuzz also provides functions for converting between glyph
names and string
variables. <function>hb_font_glyph_to_string(font, glyph, s,
size)</function> retrieves the name for the glyph ID
<parameter>glyph</parameter> from the font object. It generates a
generic name of the form <literal>gidDDD</literal> (where DDD is
the glyph index) if there is no name for the glyph in the
font. The <function>hb_font_glyph_from_string(font, s, len,
glyph)</function> takes an input string <parameter>s</parameter>
and looks for a glyph with that name in the font, returning its
glyph ID in the <parameter>glyph</parameter>
output parameter. It automatically parses
<literal>gidDDD</literal> and <literal>uniUUUU</literal> strings.
</para>
</section>
<section id="fonts-and-faces-variable">
<title>Working with OpenType Variable Fonts</title>
<para>
If you are working with OpenType Variable Fonts, there are a few
additional functions you should use to specify the
variation-axis settings of your font object. Without doing so,
your variable font's font object can still be used, but only at
the default setting for every axis (which, of course, is
sometimes what you want, but does not cover general usage).
</para>
<para>
HarfBuzz manages variation settings in the
<type>hb_variation_t</type> data type, which holds a <property>tag</property> for the
variation-axis identifier tag and a <property>value</property> for its
setting. You can retrieve the list of variation axes in a font
binary from the face object (not from a font object, notably) by
calling <function>hb_ot_var_get_axis_count(face)</function> to
find the number of axes, then using
<function>hb_ot_var_get_axis_infos()</function> to collect the
axis structures:
</para>
<programlisting language="C">
axes = hb_ot_var_get_axis_count(face);
...
hb_ot_var_get_axis_infos(face, 0, axes, axes_array);
</programlisting>
<para>
For each axis returned in the array, you can can access the
identifier in its <property>tag</property>. HarfBuzz also has
tag definitions predefined for the five standard axes specified
in OpenType (<literal>ital</literal> for italic,
<literal>opsz</literal> for optical size,
<literal>slnt</literal> for slant, <literal>wdth</literal> for
width, and <literal>wght</literal> for weight). Each axis also
has a <property>min_value</property>, a
<property>default_value</property>, and a <property>max_value</property>.
</para>
<para>
To set your font object's variation settings, you call the
<function>hb_font_set_variations()</function> function with an
array of <type>hb_variation_t</type> variation settings. Let's
say our font has weight and width axes. We need to specify each
of the axes by tag and assign a value on the axis:
</para>
<programlisting language="C">
unsigned int variation_count = 2;
hb_variation_t variation_data[variation_count];
variation_data[0].tag = HB_OT_TAG_VAR_AXIS_WIDTH;
variation_data[1].tag = HB_OT_TAG_VAR_AXIS_WEIGHT;
variation_data[0].value = 80;
variation_data[1].value = 750;
...
hb_font_set_variations(font, variation_data, variation_count);
</programlisting>
<para>
That should give us a slightly condensed font ("normal" on the
<literal>wdth</literal> axis is 100) at a noticeably bolder
weight ("regular" is 400 on the <literal>wght</literal> axis).
</para>
<para>
In practice, though, you should always check that the value you
want to set on the axis is within the
[<property>min_value</property>,<property>max_value</property>]
range actually implemented in the font's variation axis. After
all, a font might only provide lighter-than-regular weights, and
setting a heavier value on the <literal>wght</literal> axis will
not change that.
</para>
<para>
Once your variation settings are specified on your font object,
however, shaping with a variable font is just like shaping a
static font.
</para>
<para>
In addition to providing the variation axes themselves, fonts may also
pre-define certain variation coordinates as named instances. HarfBuzz
makes these coordinates (and their associated names) available via
<function>hb_ot_var_named_instance_get_design_coords()</function> and
<function>hb_ot_var_named_instance_get_subfamily_name_id()</function>.
</para>
<para>
Applications should treat named instances like multiple independent,
static fonts.
</para>
</section>
<section id="glyphs-and-rendering">
<title>Glyphs and rendering</title>
<para>
The main purpose of HarfBuzz is shaping, which creates a list of positioned
glyphs as output. The remaining task for text layout is to convert this list
into rendered output. While HarfBuzz does not handle rasterization of glyphs
per se, it does have APIs that provide access to the font data that is needed
to perform this task.
</para>
<para>
Traditionally, the shapes of glyphs in scalable fonts are provided as quadratic
or cubic Beziér curves defining outlines to be filled. To obtain the outlines
for a glyph, call <function>hb_font_draw_glyph()</function> and pass a
<type>hb_draw_funcs_t</type> struct. The callbacks in that struct will be called
for each segment of the outline. Note that this API provides access to outlines
as they are defined in the font, without applying hinting to fit the curves
to the pixel grid.
</para>
<para>
Fonts may provide pre-rendered images for glyphs instead of or in addition to
outlines. This is most common for fonts that contain colored glyphs, such as
Emoji. To access these images, use <function>hb_ot_color_reference_png()</function>
or <function>hb_ot_color_reference_svg()</function>.
</para>
<para>
Another way in which fonts provide colored glyphs is via paint graphs that
combine glyph outlines with gradients and allow for transformations and
compositing. In its simplest form, this can be presented as a series of
layers that are rendered on top of each other, each with its own color.
HarfBuzz has the <function>hb_ot_color_glyph_get_layers()</function> to
access glyph data in this form.
</para>
<para>
In the general case, you have to use <function>hb_font_paint_glyph()</function>
and pass a <type>hb_paint_funcs_t</type> struct with callbacks to obtain paint
graphs for glyphs that have them. The <function>hb_font_paint_glyph()</function>
API can handle outline and image glyphs as well, so it provides a unified API for
access to glyph rendering information.
</para>
</section>
</chapter>

View File

@@ -0,0 +1,316 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<chapter id="getting-started">
<title>Getting started with HarfBuzz</title>
<section id="an-overview-of-the-harfbuzz-shaping-api">
<title>An overview of the HarfBuzz shaping API</title>
<para>
The core of the HarfBuzz shaping API is the function
<function>hb_shape()</function>. This function takes a font, a
buffer containing a string of Unicode codepoints and
(optionally) a list of font features as its input. It replaces
the codepoints in the buffer with the corresponding glyphs from
the font, correctly ordered and positioned, and with any of the
optional font features applied.
</para>
<para>
In addition to holding the pre-shaping input (the Unicode
codepoints that comprise the input string) and the post-shaping
output (the glyphs and positions), a HarfBuzz buffer has several
properties that affect shaping. The most important are the
text-flow direction (e.g., left-to-right, right-to-left,
top-to-bottom, or bottom-to-top), the script tag, and the
language tag.
</para>
<para>
For input string buffers, flags are available to denote when the
buffer represents the beginning or end of a paragraph, to
indicate whether or not to visibly render Unicode <literal>Default
Ignorable</literal> codepoints, and to modify the cluster-merging
behavior for the buffer. For shaped output buffers, the
individual X and Y offsets and <literal>advances</literal>
(the logical dimensions) of each glyph are
accessible. HarfBuzz also flags glyphs as
<literal>UNSAFE_TO_BREAK</literal> if breaking the string at
that glyph (e.g., in a line-breaking or hyphenation process)
would require re-shaping the text.
</para>
<para>
HarfBuzz also provides methods to compare the contents of
buffers, join buffers, normalize buffer contents, and handle
invalid codepoints, as well as to determine the state of a
buffer (e.g., input codepoints or output glyphs). Buffer
lifecycles are managed and all buffers are reference-counted.
</para>
<para>
Although the default <function>hb_shape()</function> function is
sufficient for most use cases, a variant is also provided that
lets you specify which of HarfBuzz's shapers to use on a buffer.
</para>
<para>
HarfBuzz can read TrueType fonts, TrueType collections, OpenType
fonts, and OpenType collections. Functions are provided to query
font objects about metrics, Unicode coverage, available tables and
features, and variation selectors. Individual glyphs can also be
queried for metrics, variations, and glyph names. OpenType
variable fonts are supported, and HarfBuzz allows you to set
variation-axis coordinates on font objects.
</para>
<para>
HarfBuzz provides glue code to integrate with various other
libraries, including FreeType, GObject, and CoreText. Support
for integrating with Uniscribe and DirectWrite is experimental
at present.
</para>
</section>
<section id="terminology">
<title>Terminology</title>
<para>
</para>
<variablelist>
<?dbfo list-presentation="blocks"?>
<varlistentry>
<term>script</term>
<listitem>
<para>
In text shaping, a <emphasis>script</emphasis> is a
writing system: a set of symbols, rules, and conventions
that is used to represent a language or multiple
languages.
</para>
<para>
In general computing lingo, the word "script" can also
be used to mean an executable program (usually one
written in a human-readable programming language). For
the sake of clarity, HarfBuzz documents will always use
more specific terminology when referring to this
meaning, such as "Python script" or "shell script." In
all other instances, "script" refers to a writing system.
</para>
<para>
For developers using HarfBuzz, it is important to note
the distinction between a script and a language. Most
scripts are used to write a variety of different
languages, and many languages may be written in more
than one script.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>shaper</term>
<listitem>
<para>
In HarfBuzz, a <emphasis>shaper</emphasis> is a
handler for a specific script-shaping model. HarfBuzz
implements separate shapers for Indic, Arabic, Thai and
Lao, Khmer, Myanmar, Tibetan, Hangul, Hebrew, the
Universal Shaping Engine (USE), and a default shaper for
scripts with no script-specific shaping model.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>cluster</term>
<listitem>
<para>
In text shaping, a <emphasis>cluster</emphasis> is a
sequence of codepoints that must be treated as an
indivisible unit. Clusters can include code-point
sequences that form a ligature or base-and-mark
sequences. Tracking and preserving clusters is important
when shaping operations might separate or reorder
code points.
</para>
<para>
HarfBuzz provides three cluster
<emphasis>levels</emphasis> that implement different
approaches to the problem of preserving clusters during
shaping operations.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>grapheme</term>
<listitem>
<para>
In linguistics, a <emphasis>grapheme</emphasis> is one
of the indivisible units that make up a writing system or
script. Often, graphemes are individual symbols (letters,
numbers, punctuation marks, logograms, etc.) but,
depending on the writing system, a particular grapheme
might correspond to a sequence of several Unicode code
points.
</para>
<para>
In practice, HarfBuzz and other text-shaping engines
are not generally concerned with graphemes. However, it
is important for developers using HarfBuzz to recognize
that there is a difference between graphemes and shaping
clusters (see above). The two concepts may overlap
frequently, but there is no guarantee that they will be
identical.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>syllable</term>
<listitem>
<para>
In linguistics, a <emphasis>syllable</emphasis> is an
a sequence of sounds that makes up a building block of a
particular language. Every language has its own set of
rules describing what constitutes a valid syllable.
</para>
<para>
For text-shaping purposes, the various definitions of
"syllable" are important because script-specific shaping
operations may be applied at the syllable level. For
example, a reordering rule might specify that a vowel
mark be reordered to the beginning of the syllable.
</para>
<para>
Syllables will consist of one or more Unicode code
points. The definition of a syllable for a particular
writing system might correspond to how HarfBuzz
identifies clusters (see above) for the same writing
system. However, it is important for developers using
HarfBuzz to recognize that there is a difference between
syllables and shaping clusters. The two concepts may
overlap frequently, but there is no guarantee that they
will be identical.
</para>
</listitem>
</varlistentry>
</variablelist>
</section>
<section id="a-simple-shaping-example">
<title>A simple shaping example</title>
<para>
Below is the simplest HarfBuzz shaping example possible.
</para>
<orderedlist numeration="arabic">
<listitem>
<para>
Create a buffer and put your text in it.
</para>
</listitem>
</orderedlist>
<programlisting language="C">
#include &lt;hb.h&gt;
hb_buffer_t *buf;
buf = hb_buffer_create();
hb_buffer_add_utf8(buf, text, -1, 0, -1);
</programlisting>
<orderedlist numeration="arabic">
<listitem override="2">
<para>
Set the script, language and direction of the buffer.
</para>
</listitem>
</orderedlist>
<programlisting language="C">
// If you know the direction, script, and language
hb_buffer_set_direction(buf, HB_DIRECTION_LTR);
hb_buffer_set_script(buf, HB_SCRIPT_LATIN);
hb_buffer_set_language(buf, hb_language_from_string("en", -1));
// If you don't know the direction, script, and language
hb_buffer_guess_segment_properties(buffer);
</programlisting>
<orderedlist numeration="arabic">
<listitem override="3">
<para>
Create a face and a font from a font file.
</para>
</listitem>
</orderedlist>
<programlisting language="C">
hb_blob_t *blob = hb_blob_create_from_file(filename); /* or hb_blob_create_from_file_or_fail() */
hb_face_t *face = hb_face_create(blob, 0);
hb_font_t *font = hb_font_create(face);
</programlisting>
<orderedlist numeration="arabic">
<listitem override="4">
<para>
Shape!
</para>
</listitem>
</orderedlist>
<programlisting>
hb_shape(font, buf, NULL, 0);
</programlisting>
<orderedlist numeration="arabic">
<listitem override="5">
<para>
Get the glyph and position information.
</para>
</listitem>
</orderedlist>
<programlisting language="C">
unsigned int glyph_count;
hb_glyph_info_t *glyph_info = hb_buffer_get_glyph_infos(buf, &amp;glyph_count);
hb_glyph_position_t *glyph_pos = hb_buffer_get_glyph_positions(buf, &amp;glyph_count);
</programlisting>
<orderedlist numeration="arabic">
<listitem override="6">
<para>
Iterate over each glyph.
</para>
</listitem>
</orderedlist>
<programlisting language="C">
hb_position_t cursor_x = 0;
hb_position_t cursor_y = 0;
for (unsigned int i = 0; i &lt; glyph_count; i++) {
hb_codepoint_t glyphid = glyph_info[i].codepoint;
hb_position_t x_offset = glyph_pos[i].x_offset;
hb_position_t y_offset = glyph_pos[i].y_offset;
hb_position_t x_advance = glyph_pos[i].x_advance;
hb_position_t y_advance = glyph_pos[i].y_advance;
/* draw_glyph(glyphid, cursor_x + x_offset, cursor_y + y_offset); */
cursor_x += x_advance;
cursor_y += y_advance;
}
</programlisting>
<orderedlist numeration="arabic">
<listitem override="7">
<para>
Tidy up.
</para>
</listitem>
</orderedlist>
<programlisting language="C">
hb_buffer_destroy(buf);
hb_font_destroy(font);
hb_face_destroy(face);
hb_blob_destroy(blob);
</programlisting>
<para>
This example shows enough to get us started using HarfBuzz. In
the sections that follow, we will use the remainder of
HarfBuzz's API to refine and extend the example and improve its
text-shaping capabilities.
</para>
</section>
</chapter>

View File

@@ -0,0 +1,14 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<sect1 id="glyph-information">
<title>Glyph information</title>
<sect2 id="names-and-numbers">
<title>Names and numbers</title>
<para>
</para>
</sect2>
</sect1>

View File

@@ -0,0 +1,349 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<chapter id="install-harfbuzz">
<title>Installing HarfBuzz</title>
<section id="download">
<title id="download.title">Downloading HarfBuzz</title>
<para>
The HarfBuzz source code is hosted at <ulink
url="https://github.com/harfbuzz/harfbuzz">github.com/harfbuzz/harfbuzz</ulink>.
</para>
<para>
Tarball releases and Win32 binary bundles (which include the
libharfbuzz DLL, hb-view.exe, hb-shape.exe, and all
dependencies) of HarfBuzz can be downloaded from <ulink
url="https://github.com/harfbuzz/harfbuzz/releases">github.com/harfbuzz/harfbuzz/releases</ulink>.
</para>
<para>
Release notes are posted with each new release to provide an
overview of the changes. The project <ulink url="https://github.com/harfbuzz/harfbuzz/issues">tracks bug
reports and other issues</ulink> on GitHub. Discussion and
questions are welcome on <ulink
url="https://github.com/harfbuzz/harfbuzz/discussions">GitHub</ulink> as well.
</para>
<para>
The API included in the <filename
class='headerfile'>hb.h</filename> file will not change in a
compatibility-breaking way in any release. However, other,
peripheral headers are more likely to go through minor
modifications. We will do our best to never change APIs in an
incompatible way. We will <emphasis>never</emphasis> break the ABI.
</para>
</section>
<section id="building">
<title>Building HarfBuzz</title>
<section id="building.linux">
<title>Building on Linux</title>
<para>
<emphasis>(1)</emphasis> To build HarfBuzz on Linux, you must first install the
development packages for FreeType, Cairo, and GLib. The exact
commands required for this step will vary depending on
the Linux distribution you use.
</para>
<para>
For example, on an Ubuntu or Debian system, you would run:
<programlisting><command>sudo apt install</command> <package>gcc g++ libfreetype6-dev libglib2.0-dev libcairo2-dev</package></programlisting>
On Fedora, RHEL, CentOS, or other Red-Hat&ndash;based systems, you would run:
<programlisting><command>sudo yum install</command> <package>gcc gcc-c++ freetype-devel glib2-devel cairo-devel</package></programlisting>
</para>
<para>
<emphasis>(2)</emphasis> The next step depends on whether you
are building from the source in a downloaded release tarball or
from the source directly from the git repository.
</para>
<para>
<emphasis>(2)(a)</emphasis> If you downloaded the HarfBuzz
source code in a tarball, you can now extract the source.
</para>
<para>
From a shell in the top-level directory of the extracted source
code, you can run <command>meson build</command> followed by
<command>meson compile -C build</command> as with any other standard package.
</para>
<para>
This should leave you with a shared
library in the <filename>src/</filename> directory, and a few
utility programs including <command>hb-view</command> and
<command>hb-shape</command> under the <filename>util/</filename>
directory.
</para>
<para>
<emphasis>(2)(b)</emphasis> If you are building from the source in the HarfBuzz git
repository, rather than installing from a downloaded tarball
release, then you must install two more auxiliary tools before you
can build for the first time: <package>pkg-config</package>.
</para>
<para>
On Ubuntu or Debian, run:
<programlisting><command>sudo apt-get install</command> <package>meson pkg-config gtk-doc-tools</package></programlisting>
On Fedora, RHEL, CentOS, run:
<programlisting><command>sudo yum install</command> <package>meson pkgconfig gtk-doc</package></programlisting>
</para>
<para>
With <package>pkg-config</package> installed, you can now run
<command>meson build</command> then
<command>meson compile -C build</command> to build HarfBuzz.
</para>
</section>
<section id="building.windows">
<title>Building on Windows</title>
<para>
<ulink url="https://mesonbuild.com/Getting-meson.html">Install meson</ulink>
and run (from the console) <command>meson build</command> (by default
bundled dependencies are not built, <command>--wrap-mode=default</command>
overrides this), then <command>meson compile -C build</command> to
build HarfBuzz.
</para>
</section>
<section id="building.macos">
<title>Building on macOS</title>
<para>
There are two ways to build HarfBuzz on Mac systems: MacPorts
and Homebrew. The process is similar to the process used on a
Linux system.
</para>
<para>
<emphasis>(1)</emphasis> You must first install the
development packages for FreeType, Cairo, and GLib. If you are
using MacPorts, you should run:
<programlisting><command>sudo port install</command> <package>freetype glib2 cairo</package></programlisting>
</para>
<para>
If you are using Homebrew, you should run:
<programlisting><command>brew install</command> <package>freetype glib cairo</package></programlisting>
</para>
<para>
<emphasis>(2)</emphasis> The next step depends on whether you are building from the
source in a downloaded release tarball or from the source directly
from the git repository.
</para>
<para>
<emphasis>(2)(a)</emphasis> If you are installing HarfBuzz
from a downloaded tarball release, extract the tarball and
open a Terminal in the extracted source-code directory. Run:
<programlisting><command>meson build</command></programlisting>
followed by:
<programlisting><command>meson compile -C build</command></programlisting>
to build HarfBuzz.
</para>
<para>
<emphasis>(2)(b)</emphasis> Alternatively, if you are building
HarfBuzz from the source in the HarfBuzz git repository, then
you must install several built-time dependencies before
proceeding.
</para>
<para>If you are
using MacPorts, you should run:
<programlisting><command>sudo port install</command> <package>meson pkgconfig gtk-doc</package></programlisting>
to install the build dependencies.
</para>
<para>If you are using Homebrew, you should run:
<programlisting><command>brew install</command> <package>meson pkgconfig gtk-doc</package></programlisting>
Finally, you can run:
<programlisting><command>meson build</command></programlisting>
</para>
<para>
<emphasis>(3)</emphasis> You can now build HarfBuzz (on either
a MacPorts or a Homebrew system) by running:
<programlisting><command>meson build</command></programlisting>
followed by:
<programlisting><command>meson compile -C build</command></programlisting>
</para>
<para>
This should leave you with a shared
library in the <filename>src/</filename> directory, and a few
utility programs including <command>hb-view</command> and
<command>hb-shape</command> under the <filename>util/</filename>
directory.
</para>
</section>
<section id="configuration">
<title>Configuration options</title>
<para>
The instructions in the "Building HarfBuzz" section will build
the source code under its default configuration. If needed,
the following additional configuration options are available.
</para>
<variablelist>
<?dbfo list-presentation="blocks"?>
<varlistentry>
<term><command>-Dglib=enabled</command></term>
<listitem>
<para>
Use <ulink url="https://developer.gnome.org/glib/">GLib</ulink>. <emphasis>(Default = auto)</emphasis>
</para>
<para>
This option enables or disables usage of the GLib
library. The default setting is to check for the
presence of GLib and, if it is found, build with
GLib support. GLib is native to GNU/Linux systems but is
available on other operating system as well.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><command>-Dgobject=enabled</command></term>
<listitem>
<para>
Use <ulink url="https://developer.gnome.org/gobject/stable/">GObject</ulink>. <emphasis>(Default = no)</emphasis>
</para>
<para>
This option enables or disables usage of the GObject
library. The default setting is to check for the
presence of GObject and, if it is found, build with
GObject support. GObject is native to GNU/Linux systems but is
available on other operating system as well.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><command>-Dcairo=enabled</command></term>
<listitem>
<para>
Use <ulink url="https://cairographics.org/">Cairo</ulink>. <emphasis>(Default = auto)</emphasis>
</para>
<para>
This option enables or disables usage of the Cairo
graphics-rendering library. The default setting is to
check for the presence of Cairo and, if it is found,
build with Cairo support.
</para>
<para>
Note: Cairo is used only by the HarfBuzz
command-line utilities, and not by the HarfBuzz library.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><command>-Dicu=enabled</command></term>
<listitem>
<para>
Use the <ulink url="http://site.icu-project.org/home">ICU</ulink> library. <emphasis>(Default = auto)</emphasis>
</para>
<para>
This option enables or disables usage of the
<emphasis>International Components for
Unicode</emphasis> (ICU) library, which provides access
to Unicode Character Database (UCD) properties as well
as normalization and conversion functions. The default
setting is to check for the presence of ICU and, if it
is found, build with ICU support.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><command>-Dgraphite=enabled</command></term>
<listitem>
<para>
Use the <ulink url="http://graphite.sil.org/">Graphite2</ulink> library. <emphasis>(Default = no)</emphasis>
</para>
<para>
This option enables or disables usage of the Graphite2
library, which provides support for the Graphite shaping
model.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><command>-Dfreetype=enabled</command></term>
<listitem>
<para>
Use the <ulink url="https://www.freetype.org/">FreeType</ulink> library. <emphasis>(Default = auto)</emphasis>
</para>
<para>
This option enables or disables usage of the FreeType
font-rendering library. The default setting is to check for the
presence of FreeType and, if it is found, build with
FreeType support.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><command>-Dgdi=enabled</command></term>
<listitem>
<para>
Use the <ulink
url="https://docs.microsoft.com/en-us/windows/desktop/intl/uniscribe">Uniscribe</ulink>
library (experimental). <emphasis>(Default = no)</emphasis>
</para>
<para>
This option enables or disables usage of the Uniscribe
font-rendering library. Uniscribe is available on
Windows systems. Uniscribe support is used only for
testing purposes and does not need to be enabled for
HarfBuzz to run on Windows systems.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><command>-Ddirectwrite=enabled</command></term>
<listitem>
<para>
Use the <ulink url="https://docs.microsoft.com/en-us/windows/desktop/directwrite/direct-write-portal">DirectWrite</ulink> library (experimental). <emphasis>(Default = no)</emphasis>
</para>
<para>
This option enables or disables usage of the DirectWrite
font-rendering library. DirectWrite is available on
Windows systems. DirectWrite support is used only for
testing purposes and does not need to be enabled for
HarfBuzz to run on Windows systems.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><command>-Dcoretext=enabled</command></term>
<listitem>
<para>
Use the <ulink url="https://developer.apple.com/documentation/coretext">CoreText</ulink> library. <emphasis>(Default = no)</emphasis>
</para>
<para>
This option enables or disables usage of the CoreText
library. CoreText is available on macOS and iOS systems.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><command>-Ddocs=enabled</command></term>
<listitem>
<para>
Use <ulink url="https://github.com/GNOME/gtk-doc">GTK-Doc</ulink>. <emphasis>(Default = no)</emphasis>
</para>
<para>
This option enables the building of the documentation.
</para>
</listitem>
</varlistentry>
</variablelist>
</section>
</section>
</chapter>

View File

@@ -0,0 +1,647 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<chapter id="integration">
<title>Platform Integration Guide</title>
<para>
HarfBuzz was first developed for use with the GNOME and GTK
software stack commonly found in desktop Linux
distributions. Nevertheless, it can be used on other operating
systems and platforms, from iOS and macOS to Windows. It can also
be used with other application frameworks and components, such as
Android, Qt, or application-specific widget libraries.
</para>
<para>
This chapter will look at how HarfBuzz fits into a typical
text-rendering pipeline, and will discuss the APIs available to
integrate HarfBuzz with contemporary Linux, Mac, and Windows
software. It will also show how HarfBuzz integrates with popular
external libraries like FreeType and International Components for
Unicode (ICU) and describe the HarfBuzz language bindings for
Python.
</para>
<para>
On a GNOME system, HarfBuzz is designed to tie in with several
other common system libraries. The most common architecture uses
Pango at the layer directly "above" HarfBuzz; Pango is responsible
for text segmentation and for ensuring that each input
<type>hb_buffer_t</type> passed to HarfBuzz for shaping contains
Unicode code points that share the same segment properties
(namely, direction, language, and script, but also higher-level
properties like the active font, font style, and so on).
</para>
<para>
The layer directly "below" HarfBuzz is typically FreeType, which
is used to rasterize glyph outlines at the necessary optical size,
hinting settings, and pixel resolution. FreeType provides APIs for
accessing font and face information, so HarfBuzz includes
functions to create <type>hb_face_t</type> and
<type>hb_font_t</type> objects directly from FreeType
objects. HarfBuzz can use FreeType's built-in functions for
<structfield>font_funcs</structfield> vtable in an <type>hb_font_t</type>.
</para>
<para>
FreeType's output is bitmaps of the rasterized glyphs; on a
typical Linux system these will then be drawn by a graphics
library like Cairo, but those details are beyond HarfBuzz's
control. On the other hand, at the top end of the stack, Pango is
part of the larger GNOME framework, and HarfBuzz does include APIs
for working with key components of GNOME's higher-level libraries
&mdash; most notably GLib.
</para>
<para>
For other operating systems or application frameworks, the
critical integration points are where HarfBuzz gets font and face
information about the font used for shaping and where HarfBuzz
gets Unicode data about the input-buffer code points.
</para>
<para>
The font and face information is necessary for text shaping
because HarfBuzz needs to retrieve the glyph indices for
particular code points, and to know the extents and advances of
glyphs. Note that, in an OpenType variable font, both of those
types of information can change with different variation-axis
settings.
</para>
<para>
The Unicode information is necessary for shaping because the
properties of a code point (such as its General Category (gc),
Canonical Combining Class (ccc), and decomposition) can directly
impact the shaping moves that HarfBuzz performs.
</para>
<section id="integration-glib">
<title>GNOME integration, GLib, and GObject</title>
<para>
As mentioned in the preceding section, HarfBuzz offers
integration APIs to help client programs using the
GNOME and GTK framework commonly found in desktop Linux
distributions.
</para>
<para>
GLib is the main utility library for GNOME applications. It
provides basic data types and conversions, file abstractions,
string manipulation, and macros, as well as facilities like
memory allocation and the main event loop.
</para>
<para>
Where text shaping is concerned, GLib provides several utilities
that HarfBuzz can take advantage of, including a set of
Unicode-data functions and a data type for script
information. Both are useful when working with HarfBuzz
buffers. To make use of them, you will need to include the
<filename>hb-glib.h</filename> header file.
</para>
<para>
GLib's <ulink
url="https://developer.gnome.org/glib/stable/glib-Unicode-Manipulation.html">Unicode
manipulation API</ulink> includes all the functionality
necessary to retrieve Unicode data for the
<structfield>unicode_funcs</structfield> structure of a HarfBuzz
<type>hb_buffer_t</type>.
</para>
<para>
The function <function>hb_glib_get_unicode_funcs()</function>
sets up a <type>hb_unicode_funcs_t</type> structure configured
with the GLib Unicode functions and returns a pointer to it.
</para>
<para>
You can attach this Unicode-functions structure to your buffer,
and it will be ready for use with GLib:
</para>
<programlisting language="C">
#include &lt;hb-glib.h&gt;
...
hb_unicode_funcs_t *glibufunctions;
glibufunctions = hb_glib_get_unicode_funcs();
hb_buffer_set_unicode_funcs(buf, glibufunctions);
</programlisting>
<para>
For script information, GLib uses the
<type>GUnicodeScript</type> type. Like HarfBuzz's own
<type>hb_script_t</type>, this data type is an enumeration
of Unicode scripts, but text segments passed in from GLib code
will be tagged with a <type>GUnicodeScript</type>. Therefore,
when setting the script property on a <type>hb_buffer_t</type>,
you will need to convert between the <type>GUnicodeScript</type>
of the input provided by GLib and HarfBuzz's
<type>hb_script_t</type> type.
</para>
<para>
The <function>hb_glib_script_to_script()</function> function
takes an <type>GUnicodeScript</type> script identifier as its
sole argument and returns the corresponding <type>hb_script_t</type>.
The <function>hb_glib_script_from_script()</function> does the
reverse, taking an <type>hb_script_t</type> and returning the
<type>GUnicodeScript</type> identifier for GLib.
</para>
<para>
Finally, GLib also provides a reference-counted object type called <ulink
url="https://developer.gnome.org/glib/stable/glib-Byte-Arrays.html#GBytes"><type>GBytes</type></ulink>
that is used for accessing raw memory segments with the benefits
of GLib's lifecycle management. HarfBuzz provides a
<function>hb_glib_blob_create()</function> function that lets
you create an <type>hb_blob_t</type> directly from a
<type>GBytes</type> object. This function takes only the
<type>GBytes</type> object as its input; HarfBuzz registers the
GLib <function>destroy</function> callback automatically.
</para>
<para>
The GNOME platform also features an object system called
GObject. For HarfBuzz, the main advantage of GObject is a
feature called <ulink
url="https://gi.readthedocs.io/en/latest/">GObject
Introspection</ulink>. This is a middleware facility that can be
used to generate language bindings for C libraries. HarfBuzz uses it
to build its Python bindings, which we will look at in a separate section.
</para>
</section>
<section id="integration-freetype">
<title>FreeType integration</title>
<para>
FreeType is the free-software font-rendering engine included in
desktop Linux distributions, Android, ChromeOS, iOS, and multiple Unix
operating systems, and used by cross-platform programs like
Chrome, Java, and GhostScript. Used together, HarfBuzz can
perform shaping on Unicode text segments, outputting the glyph
IDs that FreeType should rasterize from the active font as well
as the positions at which those glyphs should be drawn.
</para>
<para>
HarfBuzz provides integration points with FreeType at the
face-object and font-object level and for the font-functions
virtual-method structure of a font object. These functions
make it easy for clients that use FreeType for rasterization
or font-loading, to use HarfBuzz for shaping. To use the
FreeType-integration API, include the
<filename>hb-ft.h</filename> header.
</para>
<para>
In a typical client program, you will create your
<type>hb_face_t</type> face object and <type>hb_font_t</type>
font object from a FreeType <type>FT_Face</type>. HarfBuzz
provides a suite of functions for doing this.
</para>
<para>
In the most common case, you will want to use
<function>hb_ft_font_create_referenced()</function>, which
creates both an <type>hb_face_t</type> face object and
<type>hb_font_t</type> font object (linked to that face object),
and provides lifecycle management.
</para>
<para>
It is important to note,
though, that while HarfBuzz makes a distinction between its face and
font objects, FreeType's <type>FT_Face</type> does not. After
you create your <type>FT_Face</type>, you must set its size
parameter using <function>FT_Set_Char_Size()</function>, because
an <type>hb_font_t</type> is defined as an instance of an
<type>hb_face_t</type> with size specified.
</para>
<programlisting language="C">
#include &lt;hb-ft.h&gt;
...
FT_New_Face(ft_library, font_path, index, &amp;face);
FT_Set_Char_Size(face, 0, 1000, 0, 0);
hb_font_t *font = hb_ft_font_create(face);
</programlisting>
<para>
<function>hb_ft_font_create_referenced()</function> is
the recommended function for creating an <type>hb_face_t</type> face
object. This function calls <function>FT_Reference_Face()</function>
before using the <type>FT_Face</type> and calls
<function>FT_Done_Face()</function> when it is finished using the
<type>FT_Face</type>. Consequently, your client program does not need
to worry about destroying the <type>FT_Face</type> while HarfBuzz
is still using it.
</para>
<para>
Although <function>hb_ft_font_create_referenced()</function> is
the recommended function, there is another variant for client code
where special circumstances make it necessary. The simpler
version of the function is <function>hb_ft_font_create()</function>,
which takes an <type>FT_Face</type> and an optional destroy callback
as its arguments. Because <function>hb_ft_font_create()</function>
does not offer lifecycle management, however, your client code will
be responsible for tracking references to the <type>FT_Face</type>
objects and destroying them when they are no longer needed. If you
do not have a valid reason for doing this, use
<function>hb_ft_font_create_referenced()</function>.
</para>
<para>
After you have created your font object from your
<type>FT_Face</type>, you can set or retrieve the
<structfield>load_flags</structfield> of the
<type>FT_Face</type> through the <type>hb_font_t</type>
object. HarfBuzz provides
<function>hb_ft_font_set_load_flags()</function> and
<function>hb_ft_font_get_load_flags()</function> for this
purpose. The ability to set the
<structfield>load_flags</structfield> through the font object
could be useful for enabling or disabling hinting, for example,
or to activate vertical layout.
</para>
<para>
HarfBuzz also provides a utility function called
<function>hb_ft_font_has_changed()</function> that you should
call whenever you have altered the properties of your underlying
<type>FT_Face</type>, as well as a
<function>hb_ft_get_face()</function> that you can call on an
<type>hb_font_t</type> font object to fetch its underlying <type>FT_Face</type>.
</para>
<para>
With an <type>hb_face_t</type> and <type>hb_font_t</type> both linked
to your <type>FT_Face</type>, you will typically also want to
use FreeType for the <structfield>font_funcs</structfield>
vtable of your <type>hb_font_t</type>. As a reminder, this
font-functions structure is the set of methods that HarfBuzz
will use to fetch important information from the font, such as
the advances and extents of individual glyphs.
</para>
<para>
All you need to do is call
</para>
<programlisting language="C">
hb_ft_font_set_funcs(font);
</programlisting>
<para>
and HarfBuzz will use FreeType for the font-functions in
<literal>font</literal>.
</para>
<para>
As we noted above, an <type>hb_font_t</type> is derived from an
<type>hb_face_t</type> with size (and, perhaps, other
parameters, such as variation-axis coordinates)
specified. Consequently, you can reuse an <type>hb_face_t</type>
with several <type>hb_font_t</type> objects, and HarfBuzz
provides functions to simplify this.
</para>
<para>
The <function>hb_ft_face_create_referenced()</function>
function creates just an <type>hb_face_t</type> from a FreeType
<type>FT_Face</type> and, as with
<function>hb_ft_font_create_referenced()</function> above,
provides lifecycle management for the <type>FT_Face</type>.
</para>
<para>
Similarly, there is an <function>hb_ft_face_create()</function>
function variant that does not provide the lifecycle-management
feature. As with the font-object case, if you use this version
of the function, it will be your client code's respsonsibility
to track usage of the <type>FT_Face</type> objects.
</para>
<para>
A third variant of this function is
<function>hb_ft_face_create_cached()</function>, which is the
same as <function>hb_ft_face_create()</function> except that it
also uses the <structfield>generic</structfield> field of the
<type>FT_Face</type> structure to save a pointer to the newly
created <type>hb_face_t</type>. Subsequently, function calls
that pass the same <type>FT_Face</type> will get the same
<type>hb_face_t</type> returned &mdash; and the
<type>hb_face_t</type> will be correctly reference
counted. Still, as with
<function>hb_ft_face_create()</function>, your client code must
track references to the <type>FT_Face</type> itself, and destroy
it when it is unneeded.
</para>
</section>
<section id="integration-cairo">
<title>Cairo integration</title>
<para>
Cairo is a 2D graphics library that is frequently used together
with GTK and Pango. Cairo supports rendering text using FreeType, or
by using callback-based 'user fonts'.
</para>
<para>
HarfBuzz provides integration points with cairo for fonts as well as
for buffers. To use the Cairo-integration API, link against libharfbuzz-cairo,
and include the <filename>hb-cairo.h</filename> header. For easy buildsystem
integration, HarfBuzz comes with a <filename>harfbuzz-cairo.pc</filename>
pkg-config file.
</para>
<para>
To create a <type>cairo_scaled_font_t</type> font from a HarfBuzz
<type>hb_font_t</type>, you can use <function>hb_cairo_font_face_create_for_font()</function>
or <function>hb_cairo_font_face_create_for_face()</function>. The former API
applies variations and synthetic slant from the <type>hb_font_t</type> when
rendering, the latter takes them from the <type>cairo_font_options_t</type>
that were passed when creating the <type>cairo_scaled_font_t</type>.
</para>
<para>
The Cairo fonts created in this way make use of Cairo's user-font facilities.
They can be used to render on any Cairo context, and provide full support for
font rendering features, including color. One current limitation of the
implementation is that it does not support hinting for glyph outlines.
</para>
<para>
When using color fonts with this API, the color palette index is taken from
the <type>cairo_font_options_t</type> (with new enough Cairo), and the foreground
color is extracted from the source of the Cairo context.
</para>
<para>
To render the results of shaping a piece of text, use
<function>hb_cairo_glyphs_from_buffer()</function> to obtain the glyphs in
a form that can be passed to <function>cairo_show_text_glyphs()</function> or
<function>cairo_show_glyphs()</function>.
</para>
</section>
<section id="integration-uniscribe">
<title>Uniscribe integration</title>
<para>
If your client program is running on Windows, HarfBuzz offers
an additional API that can help integrate with Microsoft's
Uniscribe engine and the Windows GDI.
</para>
<para>
Overall, the Uniscribe API covers a broader set of typographic
layout functions than HarfBuzz implements, but HarfBuzz's
shaping API can serve as a drop-in replacement for Uniscribe's shaping
functionality. In fact, one of HarfBuzz's design goals is to
accurately reproduce the same output for shaping a given text
segment that Uniscribe produces &mdash; even to the point of
duplicating known shaping bugs or deviations from the
specification &mdash; so you can be confident that your users'
documents with their existing fonts will not be affected adversely by
switching to HarfBuzz.
</para>
<para>
At a basic level, HarfBuzz's <function>hb_shape()</function>
function replaces both the <ulink url=""><function>ScriptShape()</function></ulink>
and <ulink
url="https://docs.microsoft.com/en-us/windows/desktop/api/Usp10/nf-usp10-scriptplace"><function>ScriptPlace()</function></ulink>
functions from Uniscribe.
</para>
<para>
However, whereas <function>ScriptShape()</function> returns the
glyphs and clusters for a shaped sequence and
<function>ScriptPlace()</function> returns the advances and
offsets for those glyphs, <function>hb_shape()</function>
handles both. After <function>hb_shape()</function> shapes a
buffer, the output glyph IDs and cluster IDs are returned as
an array of <structname>hb_glyph_info_t</structname> structures, and the
glyph advances and offsets are returned as an array of
<structname>hb_glyph_position_t</structname> structures.
</para>
<para>
Your client program only needs to ensure that it converts
correctly between HarfBuzz's low-level data types (such as
<type>hb_position_t</type>) and Windows's corresponding types
(such as <type>GOFFSET</type> and <type>ABC</type>). Be sure you
read the <xref linkend="buffers-language-script-and-direction"
/>
chapter for a full explanation of how HarfBuzz input buffers are
used, and see <xref linkend="shaping-buffer-output" /> for the
details of what <function>hb_shape()</function> returns in the
output buffer when shaping is complete.
</para>
<para>
Although <function>hb_shape()</function> itself is functionally
equivalent to Uniscribe's shaping routines, there are two
additional HarfBuzz functions you may want to use to integrate
the libraries in your code. Both are used to link HarfBuzz font
objects to the equivalent Windows structures.
</para>
<para>
The <function>hb_uniscribe_font_get_logfontw()</function>
function takes a <type>hb_font_t</type> font object and returns
a pointer to the <ulink
url="https://docs.microsoft.com/en-us/windows/desktop/api/wingdi/ns-wingdi-logfontw"><type>LOGFONTW</type></ulink>
"logical font" that corresponds to it. A <type>LOGFONTW</type>
structure holds font-wide attributes, including metrics, size,
and style information.
</para>
<!--
<para>
In Uniscribe's model, the <type>SCRIPT_CACHE</type> holds the
device context, including the logical font that the shaping
functions apply.
https://docs.microsoft.com/en-us/windows/desktop/Intl/script-cache
</para>
-->
<para>
The <function>hb_uniscribe_font_get_hfont()</function> function
also takes a <type>hb_font_t</type> font object, but it returns
an <type>HFONT</type> &mdash; a handle to the underlying logical
font &mdash; instead.
</para>
<para>
<type>LOGFONTW</type>s and <type>HFONT</type>s are both needed
by other Uniscribe functions.
</para>
<para>
As a final note, you may notice a reference to an optional
<literal>uniscribe</literal> shaper back-end in the <xref
linkend="configuration" /> section of the HarfBuzz manual. This
option is not a Uniscribe-integration facility.
</para>
<para>
Instead, it is a internal code path used in the
<command>hb-shape</command> command-line utility, which hands
shaping functionality over to Uniscribe entirely, when run on a
Windows system. That allows testing HarfBuzz's native output
against the Uniscribe engine, for tracking compatibility and
debugging.
</para>
<para>
Because this back-end is only used when testing HarfBuzz
functionality, it is disabled by default when building the
HarfBuzz binaries.
</para>
</section>
<section id="integration-coretext">
<title>Core Text integration</title>
<para>
If your client program is running on macOS or iOS, HarfBuzz offers
an additional API that can help integrate with Apple's
Core Text engine and the underlying Core Graphics
framework. HarfBuzz does not attempt to offer the same
drop-in-replacement functionality for Core Text that it strives
for with Uniscribe on Windows, but you can still use HarfBuzz
to perform text shaping in native macOS and iOS applications.
</para>
<para>
Note, though, that if your interest is just in using fonts that
contain Apple Advanced Typography (AAT) features, then you do
not need to add Core Text integration. HarfBuzz natively
supports AAT features and will shape AAT fonts (on any platform)
automatically, without requiring additional work on your
part. This includes support for AAT-specific TrueType tables
such as <literal>mort</literal>, <literal>morx</literal>, and
<literal>kerx</literal>, which AAT fonts use instead of
<literal>GSUB</literal> and <literal>GPOS</literal>.
</para>
<para>
On a macOS or iOS system, the primary integration points offered
by HarfBuzz are for face objects and font objects.
</para>
<para>
The Apple APIs offer a pair of data structures that map well to
HarfBuzz's face and font objects. The Core Graphics API, which
is slightly lower-level than Core Text, provides
<ulink url="https://developer.apple.com/documentation/coregraphics/cgfontref"><type>CGFontRef</type></ulink>, which enables access to typeface
properties, but does not include size information. Core Text's
<ulink url="https://developer.apple.com/documentation/coretext/ctfont-q6r"><type>CTFontRef</type></ulink> is analogous to a HarfBuzz font object,
with all of the properties required to render text at a specific
size and configuration.
Consequently, a HarfBuzz <type>hb_font_t</type> font object can
be hooked up to a Core Text <type>CTFontRef</type>, and a HarfBuzz
<type>hb_face_t</type> face object can be hooked up to a
<type>CGFontRef</type>.
</para>
<para>
You can create a <type>hb_face_t</type> from a
<type>CGFontRef</type> by using the
<function>hb_coretext_face_create()</function>. Subsequently,
you can retrieve the <type>CGFontRef</type> from a
<type>hb_face_t</type> with <function>hb_coretext_face_get_cg_font()</function>.
</para>
<para>
Likewise, you create a <type>hb_font_t</type> from a
<type>CTFontRef</type> by calling
<function>hb_coretext_font_create()</function>, and you can
fetch the associated <type>CTFontRef</type> from a
<type>hb_font_t</type> font object with
<function>hb_coretext_face_get_ct_font()</function>.
</para>
<para>
HarfBuzz also offers a <function>hb_font_set_ptem()</function>
that you an use to set the nominal point size on any
<type>hb_font_t</type> font object. Core Text uses this value to
implement optical scaling.
</para>
<para>
When integrating your client code with Core Text, it is
important to recognize that Core Text <literal>points</literal>
are not typographic points (standardized at 72 per inch) as the
term is used elsewhere in OpenType. Instead, Core Text points
are CSS points, which are standardized at 96 per inch.
</para>
<para>
HarfBuzz's font functions take this distinction into account,
but it can be an easy detail to miss in cross-platform
code.
</para>
<para>
As a final note, you may notice a reference to an optional
<literal>coretext</literal> shaper back-end in the <xref
linkend="configuration" /> section of the HarfBuzz manual. This
option is not a Core Text-integration facility.
</para>
<para>
Instead, it is a internal code path used in the
<command>hb-shape</command> command-line utility, which hands
shaping functionality over to Core Text entirely, when run on a
macOS system. That allows testing HarfBuzz's native output
against the Core Text engine, for tracking compatibility and debugging.
</para>
<para>
Because this back-end is only used when testing HarfBuzz
functionality, it is disabled by default when building the
HarfBuzz binaries.
</para>
</section>
<section id="integration-icu">
<title>ICU integration</title>
<para>
Although HarfBuzz includes its own Unicode-data functions, it
also provides integration APIs for using the International
Components for Unicode (ICU) library as a source of Unicode data
on any supported platform.
</para>
<para>
The principal integration point with ICU is the
<type>hb_unicode_funcs_t</type> Unicode-functions structure
attached to a buffer. This structure holds the virtual methods
used for retrieving Unicode character properties, such as
General Category, Script, Combining Class, decomposition
mappings, and mirroring information.
</para>
<para>
To use ICU in your client program, you need to call
<function>hb_icu_get_unicode_funcs()</function>, which creates a
Unicode-functions structure populated with the ICU function for
each included method. Subsequently, you can attach the
Unicode-functions structure to your buffer:
</para>
<programlisting language="C">
hb_unicode_funcs_t *icufunctions;
icufunctions = hb_icu_get_unicode_funcs();
hb_buffer_set_unicode_funcs(buf, icufunctions);
</programlisting>
<para>
and ICU will be used for Unicode-data access.
</para>
<para>
HarfBuzz also supplies a pair of functions
(<function>hb_icu_script_from_script()</function> and
<function>hb_icu_script_to_script()</function>) for converting
between ICU's and HarfBuzz's internal enumerations of Unicode
scripts. The <function>hb_icu_script_from_script()</function>
function converts from a HarfBuzz <type>hb_script_t</type> to an
ICU <type>UScriptCode</type>. The
<function>hb_icu_script_to_script()</function> function does the
reverse: converting from a <type>UScriptCode</type> identifier
to a <type>hb_script_t</type>.
</para>
<para>
By default, HarfBuzz's ICU support is built as a separate shared
library (<filename class="libraryfile">libharfbuzz-icu.so</filename>)
when compiling HarfBuzz from source. This allows client programs
that do not need ICU to link against HarfBuzz without unnecessarily
adding ICU as a dependency. You can also build HarfBuzz with ICU
support built directly into the main HarfBuzz shared library
(<filename class="libraryfile">libharfbuzz.so</filename>),
by specifying the <literal>--with-icu=builtin</literal>
compile-time option.
</para>
</section>
<section id="integration-python">
<title>Python bindings</title>
<para>
As noted in the <xref linkend="integration-glib" /> section,
HarfBuzz uses a feature called <ulink
url="https://wiki.gnome.org/Projects/GObjectIntrospection">GObject
Introspection</ulink> (GI) to provide bindings for Python.
</para>
<para>
At compile time, the GI scanner analyzes the HarfBuzz C source
and builds metadata objects connecting the language bindings to
the C library. Your Python code can then use the HarfBuzz binary
through its Python interface.
</para>
<para>
HarfBuzz's Python bindings support Python 2 and Python 3. To use
them, you will need to have the <literal>pygobject</literal>
package installed. Then you should import
<literal>HarfBuzz</literal> from
<literal>gi.repository</literal>:
</para>
<programlisting language="Python">
from gi.repository import HarfBuzz
</programlisting>
<para>
and you can call HarfBuzz functions from Python. Sample code can
be found in the <filename>sample.py</filename> script in the
HarfBuzz <filename>src</filename> directory.
</para>
<para>
Do note, however, that the Python API is subject to change
without advance notice. GI allows the bindings to be
automatically updated, which is one of its advantages, but you
may need to update your Python code.
</para>
</section>
</chapter>

View File

@@ -0,0 +1,266 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<chapter id="object-model">
<title>The HarfBuzz object model</title>
<section id="object-model-intro">
<title>An overview of data types in HarfBuzz</title>
<para>
HarfBuzz features two kinds of data types: non-opaque,
pass-by-value types and opaque, heap-allocated types. This kind
of separation is common in C libraries that have to provide
API/ABI compatibility (almost) indefinitely.
</para>
<para>
<emphasis>Value types:</emphasis> The non-opaque, pass-by-value
types include integer types, enums, and small structs. Exposing
a struct in the public API makes it impossible to expand the
struct in the future. As such, exposing structs is reserved for
cases where its extremely inefficient to do otherwise.
</para>
<para>
In HarfBuzz, several structs, like <literal>hb_glyph_info_t</literal> and
<literal>hb_glyph_position_t</literal>, fall into that efficiency-sensitive
category and are non-opaque.
</para>
<para>
For all non-opaque structs where future extensibility may be
necessary, reserved members are included to hold space for
possible future members. As such, its important to provide
<function>equal()</function>, and <function>hash()</function>
methods for such structs, allowing users of the API do
effectively deal with the type without having to
adapt their code to future changes.
</para>
<para>
Important value types provided by HarfBuzz include the structs
for working with Unicode code points, glyphs, and tags for font
tables and features, as well as the enums for many Unicode and
OpenType properties.
</para>
</section>
<section id="object-model-object-types">
<title>Objects in HarfBuzz</title>
<para>
<emphasis>Object types:</emphasis> Opaque struct types are used
for what HarfBuzz loosely calls "objects." This doesnt have
much to do with the terminology from object-oriented programming
(OOP), although some of the concepts are similar.
</para>
<para>
In HarfBuzz, all object types provide certain
lifecycle-management APIs. Objects are reference-counted, and
constructed with various <function>create()</function> methods, referenced via
<function>reference()</function> and dereferenced using
<function>destroy()</function>.
</para>
<para>
For example,
the <literal>hb_buffer_t</literal> object has
<function>hb_buffer_create()</function> as its constructor,
<function>hb_buffer_reference()</function> to reference, and
<function>hb_buffer_destroy()</function> to dereference.
</para>
<para>
After construction, each object's properties are accessible only
through the setter and getter functions described in the API
Reference manual.
</para>
<para>
Note that many object types can be marked as read-only or immutable,
facilitating their use in multi-threaded environments.
</para>
<para>
Key object types provided by HarfBuzz include:
</para>
<itemizedlist spacing="compact">
<listitem>
<para>
<emphasis>blobs</emphasis>, which act as low-level wrappers around binary
data. Blobs are typically used to hold the contents of a
binary font file.
</para>
</listitem>
<listitem>
<para>
<emphasis>faces</emphasis>, which represent typefaces from a
font file, but without specific parameters (such as size) set.
</para>
</listitem>
<listitem>
<para>
<emphasis>fonts</emphasis>, which represent instances of a
face with all of their parameters specified.
</para>
</listitem>
<listitem>
<para>
<emphasis>buffers</emphasis>, which hold Unicode code points
for characters (before shaping) and the shaped glyph output
(after shaping).
</para>
</listitem>
<listitem>
<para>
<emphasis>shape plans</emphasis>, which store the settings
that HarfBuzz will use when shaping a particular text
segment. Shape plans are not generally used by client
programs directly, but as we will see in a later chapter,
they are still valuable to understand.
</para>
</listitem>
</itemizedlist>
</section>
<section id="object-model-lifecycle">
<title>Object lifecycle management</title>
<para>
Each object type in HarfBuzz provides a
<function>create()</function> method. Some object types provide
additional variants of <function>create()</function> to handle
special cases or to speed up common tasks; those variants are
documented in the API reference. For example,
<function>hb_blob_create_from_file()</function> constructs a new
blob directly from the contents of a file.
</para>
<para>
All objects are created with an initial reference count of
<literal>1</literal>. Client programs can increase the reference
count on an object by calling its
<function>reference()</function> method. Whenever a client
program is finished with an object, it should call its
corresponding <function>destroy()</function> method. The destroy
method will decrease the reference count on the object and,
whenever the reference count reaches zero, it will also destroy
the object and free all of the associated memory.
</para>
<para>
All of HarfBuzz's object-lifecycle-management APIs are
thread-safe (unless you compiled HarfBuzz from source with the
<literal>HB_NO_MT</literal> configuration flag), even when the
object as a whole is not thread-safe.
It is also permissible to <function>reference()</function> or to
<function>destroy()</function> the <literal>NULL</literal>
value.
</para>
<para>
Some objects are thread-safe after they have been constructed
and set up. The general pattern is to
<function>create()</function> the object, make a few
<function>set_*()</function> calls to set up the
object, and then use it without further modification.
</para>
<para>
To ensure that such an object is not modified, client programs
can explicitly mark an object as immutable. HarfBuzz provides
<function>make_immutable()</function> methods to mark an object
as immutable and <function>is_immutable()</function> methods to
test whether or not an object is immutable. Attempts to use
setter functions on immutable objects will fail silently; see the API
Reference manual for specifics.
</para>
<para>
Note also that there are no "make mutable" methods. If client
programs need to alter an object previously marked as immutable,
they will need to make a duplicate of the original.
</para>
<para>
Finally, object constructors (and, indeed, as much of the
shaping API as possible) will never return
<literal>NULL</literal>. Instead, if there is an allocation
error, each constructor will return an “empty” object
singleton.
</para>
<para>
These empty-object singletons are inert and safe (although
typically useless) to pass around. This design choice avoids
having to check for <literal>NULL</literal> pointers all
throughout the code.
</para>
<para>
In addition, this “empty” object singleton can also be accessed
using the <function>get_empty()</function> method of the object
type in question.
</para>
</section>
<section id="object-model-user-data">
<title>User data</title>
<para>
To better integrate with client programs, HarfBuzz's objects
offer a "user data" mechanism that can be used to attach
arbitrary data to the object. User-data attachment can be
useful for tying the lifecycles of various pieces of data
together, or for creating language bindings.
</para>
<para>
Each object type has a <function>set_user_data()</function>
method and a <function>get_user_data()</function> method. The
<function>set_user_data()</function> methods take a client-provided
<literal>key</literal> and a pointer,
<literal>user_data</literal>, pointing to the data itself. Once
the key-data pair has been attached to the object, the
<function>get_user_data()</function> method can be called with
the key, returning the <function>user_data</function> pointer.
</para>
<para>
The <function>set_user_data()</function> methods also support an
optional <function>destroy</function> callback. Client programs
can set the <function>destroy</function> callback and receive
notification from HarfBuzz whenever the object is destructed.
</para>
<para>
Finally, each <function>set_user_data()</function> method allows
the client program to set a <literal>replace</literal> Boolean
indicating whether or not the function call should replace any
existing <literal>user_data</literal>
associated with the specified key.
</para>
</section>
<section id="object-model-blobs">
<title>Blobs</title>
<para>
While most of HarfBuzz's object types are specific to the
shaping process, <emphasis>blobs</emphasis> are somewhat
different.
</para>
<para>
Blobs are an abstraction designed to negotiate lifecycle and
permissions for raw pieces of data. For example, when you load
the raw font data into memory and want to pass it to HarfBuzz,
you do so in a <literal>hb_blob_t</literal> wrapper.
</para>
<para>
This allows you to take advantage of HarfBuzz's
reference-counting and <function>destroy</function>
callbacks. If you allocated the memory for the data using
<function>malloc()</function>, you would create the blob using
</para>
<programlisting language="C">
hb_blob_create (data, length, HB_MEMORY_MODE_WRITABLE, data, free)
</programlisting>
<para>
That way, HarfBuzz will call <function>free()</function> on the
allocated memory whenever the blob drops its last reference and
is deconstructed. Consequently, the user code can stop worrying
about freeing memory and let the reference-counting machinery
take care of that.
</para>
<para>
Most of the time, blobs are read-only, facilitating their use in
immutable objects.
</para>
</section>
</chapter>

View File

@@ -0,0 +1,336 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<chapter id="shaping-and-shape-plans">
<title>Shaping and shape plans</title>
<para>
Once you have your face and font objects configured as desired and
your input buffer is filled with the characters you need to shape,
all you need to do is call <function>hb_shape()</function>.
</para>
<para>
HarfBuzz will return the shaped version of the text in the same
buffer that you provided, but it will be in output mode. At that
point, you can iterate through the glyphs in the buffer, drawing
each one at the specified position or handing them off to the
appropriate graphics library.
</para>
<para>
For the most part, HarfBuzz's shaping step is straightforward from
the outside. But that doesn't mean there will never be cases where
you want to look under the hood and see what is happening on the
inside. HarfBuzz provides facilities for doing that, too.
</para>
<section id="shaping-buffer-output">
<title>Shaping and buffer output</title>
<para>
The <function>hb_shape()</function> function call takes four arguments: the font
object to use, the buffer of characters to shape, an array of
user-specified features to apply, and the length of that feature
array. The feature array can be NULL, so for the sake of
simplicity we will start with that case.
</para>
<para>
Internally, HarfBuzz looks at the tables of the font file to
determine where glyph classes, substitutions, and positioning
are defined, using that information to decide which
<emphasis>shaper</emphasis> to use (<literal>ot</literal> for
OpenType fonts, <literal>aat</literal> for Apple Advanced
Typography fonts, and so on). It also looks at the direction,
script, and language properties of the segment to figure out
which script-specific shaping model is needed (at least, in
shapers that support multiple options).
</para>
<para>
If a font has a GDEF table, then that is used for
glyph classes; if not, HarfBuzz will fall back to Unicode
categorization by code point. If a font has an AAT <literal>morx</literal> table,
then it is used for substitutions; if not, but there is a GSUB
table, then the GSUB table is used. If the font has an AAT
<literal>kerx</literal> table, then it is used for positioning; if not, but
there is a GPOS table, then the GPOS table is used. If neither
table is found, but there is a <literal>kern</literal> table, then HarfBuzz will
use the <literal>kern</literal> table. If there is no <literal>kerx</literal>, no GPOS, and no
<literal>kern</literal>, HarfBuzz will fall back to positioning marks itself.
</para>
<para>
With a well-behaved OpenType font, you expect GDEF, GSUB, and
GPOS tables to all be applied. HarfBuzz implements the
script-specific shaping models in internal functions, rather
than in the public API.
</para>
<para>
The algorithms
used for shaping can be quite involved; HarfBuzz tries
to be compatible with the OpenType Layout specification
and, wherever there is any ambiguity, HarfBuzz attempts to replicate the
output of Microsoft's Uniscribe engine, to the extent that is feasible and desirable. See the <ulink
url="https://docs.microsoft.com/en-us/typography/script-development/standard">Microsoft
Typography pages</ulink> for more detail.
</para>
<para>
In general, though, all that you need to know is that
<function>hb_shape()</function> returns the results of shaping
in the same buffer that you provided. The buffer's content type
will now be set to
<literal>HB_BUFFER_CONTENT_TYPE_GLYPHS</literal>, indicating
that it contains shaped output, rather than input text. You can
now extract the glyph information and positioning arrays:
</para>
<programlisting language="C">
hb_glyph_info_t *glyph_info = hb_buffer_get_glyph_infos(buf, &amp;glyph_count);
hb_glyph_position_t *glyph_pos = hb_buffer_get_glyph_positions(buf, &amp;glyph_count);
</programlisting>
<para>
The glyph information array holds a <type>hb_glyph_info_t</type>
for each output glyph, which has two fields:
<parameter>codepoint</parameter> and
<parameter>cluster</parameter>. Whereas, in the input buffer,
the <parameter>codepoint</parameter> field contained the Unicode
code point, it now contains the glyph ID of the corresponding
glyph in the font. The <parameter>cluster</parameter> field is
an integer that you can use to help identify when shaping has
reordered, split, or combined code points; we will say more
about that in the next chapter.
</para>
<para>
The glyph positions array holds a corresponding
<type>hb_glyph_position_t</type> for each output glyph,
containing four fields: <parameter>x_advance</parameter>,
<parameter>y_advance</parameter>,
<parameter>x_offset</parameter>, and
<parameter>y_offset</parameter>. The advances tell you how far
you need to move the drawing point after drawing this glyph,
depending on whether you are setting horizontal text (in which
case you will have x advances) or vertical text (for which you
will have y advances). The x and y offsets tell you where to
move to start drawing the glyph; usually you will have both and
x and a y offset, regardless of the text direction.
</para>
<para>
Most of the time, you will rely on a font-rendering library or
other graphics library to do the actual drawing of glyphs, so
you will need to iterate through the glyphs in the buffer and
pass the corresponding values off.
</para>
</section>
<section id="shaping-opentype-features">
<title>OpenType features</title>
<para>
OpenType features enable fonts to include smart behavior,
implemented as "lookup" rules stored in the GSUB and GPOS
tables. The OpenType specification defines a long list of
standard features that fonts can use for these behaviors; each
feature has a four-character reserved name and a well-defined
semantic meaning.
</para>
<para>
Some OpenType features are defined for the purpose of supporting
script-specific shaping, and are automatically activated, but
only when a buffer's script property is set to a script that the
feature supports.
</para>
<para>
Other features are more generic and can apply to several (or
any) script, and shaping engines are expected to implement
them. By default, HarfBuzz activates several of these features
on every text run. They include <literal>abvm</literal>,
<literal>blwm</literal>, <literal>ccmp</literal>,
<literal>locl</literal>, <literal>mark</literal>,
<literal>mkmk</literal>, and <literal>rlig</literal>.
</para>
<para>
In addition, if the text direction is horizontal, HarfBuzz
also applies the <literal>calt</literal>,
<literal>clig</literal>, <literal>curs</literal>,
<literal>dist</literal>, <literal>kern</literal>,
<literal>liga</literal> and <literal>rclt</literal>, features.
</para>
<para>
Additionally, when HarfBuzz encounters a fraction slash
(<literal>U+2044</literal>), it looks backward and forward for decimal
digits (Unicode General Category = Nd), and enables features
<literal>numr</literal> on the sequence before the fraction slash,
<literal>dnom</literal> on the sequence after the fraction slash,
and <literal>frac</literal> on the whole sequence including the fraction
slash.
</para>
<para>
Some script-specific shaping models
(see <xref linkend="opentype-shaping-models" />) disable some of the
features listed above:
</para>
<itemizedlist>
<listitem>
<para>
Hangul: <literal>calt</literal>
</para>
</listitem>
<listitem>
<para>
Indic: <literal>liga</literal>
</para>
</listitem>
<listitem>
<para>
Khmer: <literal>liga</literal>
</para>
</listitem>
</itemizedlist>
<para>
If the text direction is vertical, HarfBuzz applies
the <literal>vert</literal> feature by default.
</para>
<para>
Still other features are designed to be purely optional and left
up to the application or the end user to enable or disable as desired.
</para>
<para>
You can adjust the set of features that HarfBuzz applies to a
buffer by supplying an array of <type>hb_feature_t</type>
features as the third argument to
<function>hb_shape()</function>. For a simple case, let's just
enable the <literal>dlig</literal> feature, which turns on any
"discretionary" ligatures in the font:
</para>
<programlisting language="C">
hb_feature_t userfeatures[1];
userfeatures[0].tag = HB_TAG('d','l','i','g');
userfeatures[0].value = 1;
userfeatures[0].start = HB_FEATURE_GLOBAL_START;
userfeatures[0].end = HB_FEATURE_GLOBAL_END;
</programlisting>
<para>
<literal>HB_FEATURE_GLOBAL_END</literal> and
<literal>HB_FEATURE_GLOBAL_END</literal> are macros we can use
to indicate that the features will be applied to the entire
buffer. We could also have used a literal <literal>0</literal>
for the start and a <literal>-1</literal> to indicate the end of
the buffer (or have selected other start and end positions, if needed).
</para>
<para>
When we pass the <varname>userfeatures</varname> array to
<function>hb_shape()</function>, any discretionary ligature
substitutions from our font that match the text in our buffer
will get performed:
</para>
<programlisting language="C">
hb_shape(font, buf, userfeatures, num_features);
</programlisting>
<para>
Just like we enabled the <literal>dlig</literal> feature by
setting its <parameter>value</parameter> to
<literal>1</literal>, you would disable a feature by setting its
<parameter>value</parameter> to <literal>0</literal>. Some
features can take other <parameter>value</parameter> settings;
be sure you read the full specification of each feature tag to
understand what it does and how to control it.
</para>
</section>
<section id="shaping-shaper-selection">
<title>Shaper selection</title>
<para>
The basic version of <function>hb_shape()</function> determines
its shaping strategy based on examining the capabilities of the
font file. OpenType font tables cause HarfBuzz to try the
<literal>ot</literal> shaper, while AAT font tables cause HarfBuzz to try the
<literal>aat</literal> shaper.
</para>
<para>
In the real world, however, a font might include some unusual
mix of tables, or one of the tables might simply be broken for
the script you need to shape. So, sometimes, you might not
want to rely on HarfBuzz's process for deciding what to do, and
just tell <function>hb_shape()</function> what you want it to try.
</para>
<para>
<function>hb_shape_full()</function> is an alternate shaping
function that lets you supply a list of shapers for HarfBuzz to
try, in order, when shaping your buffer. For example, if you
have determined that HarfBuzz's attempts to work around broken
tables gives you better results than the AAT shaper itself does,
you might move the AAT shaper to the end of your list of
preferences and call <function>hb_shape_full()</function>
</para>
<programlisting language="C">
char *shaperprefs[3] = {"ot", "default", "aat"};
...
hb_shape_full(font, buf, userfeatures, num_features, shaperprefs);
</programlisting>
<para>
to get results you are happier with.
</para>
<para>
You may also want to call
<function>hb_shape_list_shapers()</function> to get a list of
the shapers that were built at compile time in your copy of HarfBuzz.
</para>
</section>
<section id="shaping-plans-and-caching">
<title>Plans and caching</title>
<para>
Internally, HarfBuzz uses a structure called a shape plan to
track its decisions about how to shape the contents of a
buffer. The <function>hb_shape()</function> function builds up the shape plan by
examining segment properties and by inspecting the contents of
the font.
</para>
<para>
This process can involve some decision-making and
trade-offs — for example, HarfBuzz inspects the GSUB and GPOS
lookups for the script and language tags set on the segment
properties, but it falls back on the lookups under the
<literal>DFLT</literal> tag (and sometimes other common tags)
if there are actually no lookups for the tag requested.
</para>
<para>
HarfBuzz also includes some work-arounds for
handling well-known older font conventions that do not follow
OpenType or Unicode specifications, for buggy system fonts, and for
peculiarities of Microsoft Uniscribe. All of that means that a
shape plan, while not something that you should edit directly in
client code, still might be an object that you want to
inspect. Furthermore, if resources are tight, you might want to
cache the shape plan that HarfBuzz builds for your buffer and
font, so that you do not have to rebuild it for every shaping call.
</para>
<para>
You can create a cacheable shape plan with
<function>hb_shape_plan_create_cached(face, props,
user_features, num_user_features, shaper_list)</function>, where
<parameter>face</parameter> is a face object (not a font object,
notably), <parameter>props</parameter> is an
<type>hb_segment_properties_t</type>,
<parameter>user_features</parameter> is an array of
<type>hb_feature_t</type>s (with length
<parameter>num_user_features</parameter>), and
<parameter>shaper_list</parameter> is a list of shapers to try.
</para>
<para>
Shape plans are objects in HarfBuzz, so there are
reference-counting functions and user-data attachment functions
you can
use. <function>hb_shape_plan_reference(shape_plan)</function>
increases the reference count on a shape plan, while
<function>hb_shape_plan_destroy(shape_plan)</function> decreases
the reference count, destroying the shape plan when the last
reference is dropped.
</para>
<para>
You can attach user data to a shaper (with a key) using the
<function>hb_shape_plan_set_user_data(shape_plan,key,data,destroy,replace)</function>
function, optionally supplying a <function>destroy</function>
callback to use. You can then fetch the user data attached to a
shape plan with
<function>hb_shape_plan_get_user_data(shape_plan, key)</function>.
</para>
</section>
</chapter>

View File

@@ -0,0 +1,368 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<chapter id="shaping-concepts">
<title>Shaping concepts</title>
<section id="text-shaping-concepts">
<title>Text shaping</title>
<para>
Text shaping is the process of transforming a sequence of Unicode
codepoints that represent individual characters (letters,
diacritics, tone marks, numbers, symbols, etc.) into the
orthographically and linguistically correct two-dimensional layout
of glyph shapes taken from a specified font.
</para>
<para>
For some writing systems (or <emphasis>scripts</emphasis>) and
languages, the process is simple, requiring the shaper to do
little more than advance the horizontal position forward by the
correct amount for each successive glyph.
</para>
<para>
But, for other scripts (often unceremoniously called <emphasis>complex scripts</emphasis>), any combination of
several shaping operations may be required, and the rules for how
and when they are applied vary from script to script. HarfBuzz and
other shaping engines implement these rules.
</para>
<para>
The exact rules and necessary operations for a particular script
constitute a shaping <emphasis>model</emphasis>. OpenType
specifies a set of shaping models that covers all of
Unicode. Other shaping models are available, however, including
Graphite and Apple Advanced Typography (AAT).
</para>
</section>
<section id="script-specific-shaping">
<title>Script-specific shaping</title>
<para>
In many scripts, transforming the input
sequence into the final layout often requires some combination of
operations&mdash;such as context-dependent substitutions,
context-dependent mark positioning, glyph-to-glyph joining,
glyph reordering, or glyph stacking.
</para>
<para>
In some scripts, the shaping rules require that a text
run be divided into syllables before the operations can be
applied. Other scripts may apply shaping operations over
entire words or over the entire text run, with no subdivision
required.
</para>
<para>
Other scripts, do not require these
operations. However, correctly shaping a text run in
any script may still involve Unicode normalization,
ligature substitutions, mark positioning, kerning, and applying
other font features.
</para>
</section>
<section id="shaping-operations">
<title>Shaping operations</title>
<para>
Shaping a text run involves transforming the
input sequence of Unicode codepoints with some combination of
operations that is specified in the shaping model for the
script.
</para>
<para>
The specific conditions that trigger a given operation for a
text run varies from script to script, as do the order that the
operations are performed in and which codepoints are
affected. However, the same general set of shaping operations is
common to all of the script shaping models.
</para>
<itemizedlist>
<listitem>
<para>
A <emphasis>reordering</emphasis> operation moves a glyph
from its original ("logical") position in the sequence to
some other ("visual") position.
</para>
<para>
The shaping model for a given script might involve
more than one reordering step.
</para>
</listitem>
<listitem>
<para>
A <emphasis>joining</emphasis> operation replaces a glyph
with an alternate form that is designed to connect with one
or more of the adjacent glyphs in the sequence.
</para>
</listitem>
<listitem>
<para>
A contextual <emphasis>substitution</emphasis> operation
replaces either a single glyph or a subsequence of several
glyphs with an alternate glyph. This substitution is
performed when the original glyph or subsequence of glyphs
occurs in a specified position with respect to the
surrounding sequence. For example, one substitution might be
performed only when the target glyph is the first glyph in
the sequence, while another substitution is performed only
when a different target glyph occurs immediately after a
particular string pattern.
</para>
<para>
The shaping model for a given script might involve
multiple contextual-substitution operations, each applying
to different target glyphs and patterns, and which are
performed in separate steps.
</para>
</listitem>
<listitem>
<para>
A contextual <emphasis>positioning</emphasis> operation
moves the horizontal and/or vertical position of a
glyph. This positioning move is performed when the glyph
occurs in a specified position with respect to the
surrounding sequence.
</para>
<para>
Many contextual positioning operations are used to place
<emphasis>mark</emphasis> glyphs (such as diacritics, vowel
signs, and tone markers) with respect to
<emphasis>base</emphasis> glyphs. However, some
scripts may use contextual positioning operations to
correctly place base glyphs as well, such as
when the script uses <emphasis>stacking</emphasis> characters.
</para>
</listitem>
</itemizedlist>
</section>
<section id="unicode-character-categories">
<title>Unicode character categories</title>
<para>
Shaping models are typically specified with respect to how
scripts are defined in the Unicode standard.
</para>
<para>
Every codepoint in the Unicode Character Database (UCD) is
assigned a <emphasis>Unicode General Category</emphasis> (UGC),
which provides the most fundamental information about the
codepoint: whether the codepoint represents a
<emphasis>Letter</emphasis>, a <emphasis>Mark</emphasis>, a
<emphasis>Number</emphasis>, <emphasis>Punctuation</emphasis>, a
<emphasis>Symbol</emphasis>, a <emphasis>Separator</emphasis>,
or something else (<emphasis>Other</emphasis>).
</para>
<para>
These UGC properties are "Major" categories. Each codepoint is
further assigned to a "minor" category within its Major
category, such as "Letter, uppercase" (<literal>Lu</literal>) or
"Letter, modifier" (<literal>Lm</literal>).
</para>
<para>
Shaping models are concerned primarily with Letter and Mark
codepoints. The minor categories of Mark codepoints are
particularly important for shaping. Marks can be nonspacing
(<literal>Mn</literal>), spacing combining
(<literal>Mc</literal>), or enclosing (<literal>Me</literal>).
</para>
<para>
In addition to the UGC property, codepoints in the Indic and
Southeast Asian scripts are also assigned
<emphasis>Unicode Indic Syllabic Category</emphasis> (UISC) and
<emphasis>Unicode Indic Positional Category</emphasis> (UIPC)
properties that provide more detailed information needed for
shaping.
</para>
<para>
The UISC property sub-categorizes Letters and Marks according to
common script-shaping behaviors. For example, UISC distinguishes
between consonant letters, vowel letters, and vowel marks. The
UIPC property sub-categorizes Mark codepoints by the relative visual
position that they occupy (above, below, right, left, or in
multiple positions).
</para>
<para>
Some scripts require that the text run be split into
syllables. What constitutes a valid syllable in these
scripts is specified in regular expressions, formed from the
Letter and Mark codepoints, that take the UISC and UIPC
properties into account.
</para>
</section>
<section id="text-runs">
<title>Text runs</title>
<para>
Real-world text usually contains codepoints from a mixture of
different Unicode scripts (including punctuation, numbers, symbols,
white-space characters, and other codepoints that do not belong
to any script). Real-world text may also be marked up with
formatting that changes font properties (including the font,
font style, and font size).
</para>
<para>
For shaping purposes, all real-world text streams must be first
segmented into runs that have a uniform set of properties.
</para>
<para>
In particular, shaping models always assume that every codepoint
in a text run has the same <emphasis>direction</emphasis>,
<emphasis>script</emphasis> tag, and
<emphasis>language</emphasis> tag.
</para>
</section>
<section id="opentype-shaping-models">
<title>OpenType shaping models</title>
<para>
OpenType provides shaping models for the following scripts:
</para>
<itemizedlist>
<listitem>
<para>
The <emphasis>default</emphasis> shaping model handles all
scripts with no script-specific shaping model, and may also be used as a fallback for
handling unrecognized scripts.
</para>
</listitem>
<listitem>
<para>
The <emphasis>Indic</emphasis> shaping model handles the Indic
scripts Bengali, Devanagari, Gujarati, Gurmukhi, Kannada,
Malayalam, Oriya, Tamil, and Telugu.
</para>
<para>
The Indic shaping model was revised significantly in
2005. To denote the change, a new set of <emphasis>script
tags</emphasis> was assigned for Bengali, Devanagari,
Gujarati, Gurmukhi, Kannada, Malayalam, Oriya, Tamil, and
Telugu. For the sake of clarity, the term "Indic2" is
sometimes used to refer to the current, revised shaping
model.
</para>
</listitem>
<listitem>
<para>
The <emphasis>Arabic</emphasis> shaping model supports
Arabic, Mongolian, N'Ko, Syriac, and several other connected
or cursive scripts.
</para>
</listitem>
<listitem>
<para>
The <emphasis>Thai/Lao</emphasis> shaping model supports
the Thai and Lao scripts.
</para>
</listitem>
<listitem>
<para>
The <emphasis>Khmer</emphasis> shaping model supports the
Khmer script.
</para>
</listitem>
<listitem>
<para>
The <emphasis>Myanmar</emphasis> shaping model supports the
Myanmar (or Burmese) script.
</para>
</listitem>
<listitem>
<para>
The <emphasis>Tibetan</emphasis> shaping model supports the
Tibetan script.
</para>
</listitem>
<listitem>
<para>
The <emphasis>Hangul</emphasis> shaping model supports the
Hangul script.
</para>
</listitem>
<listitem>
<para>
The <emphasis>Hebrew</emphasis> shaping model supports the
Hebrew script.
</para>
</listitem>
<listitem>
<para>
The <emphasis>Universal Shaping Engine</emphasis> (USE)
shaping model supports scripts not covered by one of
the above, script-specific shaping models, including
Javanese, Balinese, Buginese, Batak, Chakma, Lepcha, Modi,
Phags-pa, Tagalog, Siddham, Sundanese, Tai Le, Tai Tham, Tai
Viet, and many others.
</para>
</listitem>
<listitem>
<para>
Text runs that do not fall under one of the above shaping
models may still require processing by a shaping engine. Of
particular note is <emphasis>Emoji</emphasis> shaping, which
may involve variation-selector sequences and glyph
substitution. Emoji shaping is handled by the default
shaping model.
</para>
</listitem>
</itemizedlist>
</section>
<section id="graphite-shaping">
<title>Graphite shaping</title>
<para>
In contrast to OpenType shaping, Graphite shaping does not
specify a predefined set of shaping models or a set of supported
scripts.
</para>
<para>
Instead, each Graphite font contains a complete set of rules that
implement the required shaping model for the intended
script. These rules include finite-state machines to match
sequences of codepoints to the shaping operations to perform.
</para>
<para>
Graphite shaping can perform the same shaping operations used in
OpenType shaping, as well as other functions that have not been
defined for OpenType shaping.
</para>
</section>
<section id="aat-shaping">
<title>AAT shaping</title>
<para>
In contrast to OpenType shaping, AAT shaping does not specify a
predefined set of shaping models or a set of supported scripts.
</para>
<para>
Instead, each AAT font includes a complete set of rules that
implement the desired shaping model for the intended
script. These rules include finite-state machines to match glyph
sequences and the shaping operations to perform.
</para>
<para>
Notably, AAT shaping rules are expressed for glyphs in the font,
not for Unicode codepoints. AAT shaping can perform the same
shaping operations used in OpenType shaping, as well as other
functions that have not been defined for OpenType shaping.
</para>
</section>
</chapter>

View File

@@ -0,0 +1,218 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<chapter id="utilities">
<title>Utilities</title>
<para>
HarfBuzz includes several auxiliary components in addition to the
main APIs. These include a set of command-line tools, a set of
lower-level APIs for common data types that may be of interest to
client programs.
</para>
<section id="utilities-command-line-tools">
<title>Command-line tools</title>
<para>
HarfBuzz include three command-line tools:
<command>hb-shape</command>, <command>hb-view</command>, and
<command>hb-subset</command>. They can be used to examine
HarfBuzz's functionality, debug font binaries, or explore the
various shaping models and features from a terminal.
</para>
<section id="utilities-command-line-hbshape">
<title>hb-shape</title>
<para>
<emphasis><command>hb-shape</command></emphasis> allows you to run HarfBuzz's
<function>hb_shape()</function> function on an input string and
to examine the outcome, in human-readable form, as terminal
output. <command>hb-shape</command> does
<emphasis>not</emphasis> render the results of the shaping call
into rendered text (you can use <command>hb-view</command>, below, for
that). Instead, it prints out the final glyph indices and
positions, taking all shaping operations into account, as if the
input string were a HarfBuzz input buffer.
</para>
<para>
You can specify the font to be used for shaping and, with
command-line options, you can add various aspects of the
internal state to the output that is sent to the terminal. The
general format is
</para>
<programlisting>
<command>hb-shape</command> <optional>[OPTIONS]</optional>
<parameter>path/to/font/file.ttf</parameter>
<parameter>yourinputtext</parameter>
</programlisting>
<para>
The default output format is plain text (although JSON output
can be selected instead by specifying the option
<optional>--output-format=json</optional>). The default output
syntax reports each glyph name (or glyph index if there is no
name) followed by its cluster value, its horizontal and vertical
position displacement, and its horizontal and vertical advances.
</para>
<para>
Output options exist to skip any of these elements in the
output, and to include additional data, such as Unicode
code-point values, glyph extents, glyph flags, or interim
shaping results.
</para>
<para>
Output can also be redirected to a file, or input read from a
file. Additional options enable you to enable or disable
specific font features, to set variation-font axis values, to
alter the language, script, direction, and clustering settings
used, to enable sanity checks, or to change which shaping engine is used.
</para>
<para>
For a complete explanation of the options available, run
</para>
<programlisting>
<command>hb-shape</command> <parameter>--help</parameter>
</programlisting>
</section>
<section id="utilities-command-line-hbview">
<title>hb-view</title>
<para>
<emphasis><command>hb-view</command></emphasis> allows you to
see the shaped output of an input string in rendered
form. Like <command>hb-shape</command>,
<command>hb-view</command> takes a font file and a text string
as its arguments:
</para>
<programlisting>
<command>hb-view</command> <optional>[OPTIONS]</optional>
<parameter>path/to/font/file.ttf</parameter>
<parameter>yourinputtext</parameter>
</programlisting>
<para>
By default, <command>hb-view</command> renders the shaped
text in ASCII block-character images as terminal output. By
appending the
<command>--output-file=<optional>filename</optional></command>
switch, you can write the output to a PNG, SVG, or PDF file
(among other formats).
</para>
<para>
As with <command>hb-shape</command>, a lengthy set of options
is available, with which you can enable or disable
specific font features, set variation-font axis values,
alter the language, script, direction, and clustering settings
used, enable sanity checks, or change which shaping engine is
used.
</para>
<para>
You can also set the foreground and background colors used for
the output, independently control the width of all four
margins, alter the line spacing, and annotate the output image
with
</para>
<para>
In general, <command>hb-view</command> is a quick way to
verify that the output of HarfBuzz's shaping operation looks
correct for a given text-and-font combination, but you may
want to use <command>hb-shape</command> to figure out exactly
why something does not appear as expected.
</para>
</section>
<section id="utilities-command-line-hbsubset">
<title>hb-subset</title>
<para>
<emphasis><command>hb-subset</command></emphasis> allows you
to generate a subset of a given font, with a limited set of
supported characters, features, and variation settings.
</para>
<para>
By default, you provide an input font and an input text string
as the arguments to <command>hb-subset</command>, and it will
generate a font that covers the input text exactly like the
input font does, but includes no other characters or features.
</para>
<programlisting>
<command>hb-subset</command> <optional>[OPTIONS]</optional>
<parameter>path/to/font/file.ttf</parameter>
<parameter>yourinputtext</parameter>
</programlisting>
<para>
For example, to create a subset of Noto Serif that just includes the
numerals and the lowercase Latin alphabet, you could run
</para>
<programlisting>
<command>hb-subset</command> <optional>[OPTIONS]</optional>
<parameter>NotoSerif-Regular.ttf</parameter>
<parameter>0123456789abcdefghijklmnopqrstuvwxyz</parameter>
</programlisting>
<para>
There are options available to remove hinting from the
subsetted font and to specify a list of variation-axis settings.
</para>
</section>
</section>
<section id="utilities-common-types-apis">
<title>Common data types and APIs</title>
<para>
HarfBuzz includes several APIs for working with general-purpose
data that you may find convenient to leverage in your own
software. They include set operations and integer-to-integer
mapping operations.
</para>
<para>
HarfBuzz uses set operations for internal bookkeeping, such as
when it collects all of the glyph IDs covered by a particular
font feature. You can also use the set API to build sets, add
and remove elements, test whether or not sets contain particular
elements, or compute the unions, intersections, or differences
between sets.
</para>
<para>
All set elements are integers (specifically,
<type>hb_codepoint_t</type> 32-bit unsigned ints), and there are
functions for fetching the minimum and maximum element from a
set. The set API also includes some functions that might not
be part of a generic set facility, such as the ability to add a
contiguous range of integer elements to a set in bulk, and the
ability to fetch the next-smallest or next-largest element.
</para>
<para>
The HarfBuzz set API includes some conveniences as well. All
sets are lifecycle-managed, just like other HarfBuzz
objects. You increase the reference count on a set with
<function>hb_set_reference()</function> and decrease it with
<function>hb_set_destroy()</function>. You can also attach
user data to a set, just like you can to blobs, buffers, faces,
fonts, and other objects, and set destroy callbacks.
</para>
<para>
HarfBuzz also provides an API for keeping track of
integer-to-integer mappings. As with the set API, each integer is
stored as an unsigned 32-bit <type>hb_codepoint_t</type>
element. Maps, like other objects, are reference counted with
reference and destroy functions, and you can attach user data to
them. The mapping operations include adding and deleting
integer-to-integer key:value pairs to the map, testing for the
presence of a key, fetching the population of the map, and so on.
</para>
<para>
There are several other internal HarfBuzz facilities that are
exposed publicly and which you may want to take advantage of
while processing text. HarfBuzz uses a common
<type>hb_tag_t</type> for a variety of OpenType tag identifiers (for
scripts, languages, font features, table names, variation-axis
names, and more), and provides functions for converting strings
to tags and vice-versa.
</para>
<para>
Finally, HarfBuzz also includes data type for Booleans, bit
masks, and other simple types.
</para>
</section>
</chapter>

View File

@@ -0,0 +1,441 @@
<?xml version="1.0"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY version SYSTEM "version.xml">
]>
<chapter id="what-is-harfbuzz">
<title>What is HarfBuzz?</title>
<para>
HarfBuzz is a <emphasis>text-shaping engine</emphasis>. If you
give HarfBuzz a font and a string containing a sequence of Unicode
codepoints, HarfBuzz selects and positions the corresponding
glyphs from the font, applying all of the necessary layout rules
and font features. HarfBuzz then returns the string to you in the
form that is correctly arranged for the language and writing
system.
</para>
<para>
HarfBuzz can properly shape all of the world's major writing
systems. It runs on all major operating systems and software
platforms and it supports the major font formats in use
today.
</para>
<section id="what-is-text-shaping">
<title>What is text shaping?</title>
<para>
Text shaping is the process of translating a string of character
codes (such as Unicode codepoints) into a properly arranged
sequence of glyphs that can be rendered onto a screen or into
final output form for inclusion in a document.
</para>
<para>
The shaping process is dependent on the input string, the active
font, the script (or writing system) that the string is in, and
the language that the string is in.
</para>
<para>
Modern software systems generally only deal with strings in the
Unicode encoding scheme (although legacy systems and documents may
involve other encodings).
</para>
<para>
There are several font formats that a program might
encounter, each of which has a set of standard text-shaping
rules.
</para>
<para>The dominant format is <ulink
url="http://www.microsoft.com/typography/otspec/">OpenType</ulink>. The
OpenType specification defines a series of <ulink url="https://github.com/n8willis/opentype-shaping-documents">shaping models</ulink> for
various scripts from around the world. These shaping models depend on
the font incorporating certain features as
<emphasis>lookups</emphasis> in its <literal>GSUB</literal>
and <literal>GPOS</literal> tables.
</para>
<para>
Alternatively, OpenType fonts can include shaping features for
the <ulink url="https://graphite.sil.org/">Graphite</ulink> shaping model.
</para>
<para>
TrueType fonts can also include OpenType shaping
features. Alternatively, TrueType fonts can also include <ulink url="https://developer.apple.com/fonts/TrueType-Reference-Manual/RM09/AppendixF.html">Apple
Advanced Typography</ulink> (AAT) tables to implement shaping
support. AAT fonts are generally only found on macOS and iOS systems.
</para>
<para>
Text strings will usually be tagged with a script and language
tag that provide the context needed to perform text shaping
correctly. The necessary <ulink
url="https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags">script</ulink>
and <ulink
url="https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags">language</ulink>
tags are defined by OpenType.
</para>
</section>
<section id="why-do-i-need-a-shaping-engine">
<title>Why do I need a shaping engine?</title>
<para>
Text shaping is an integral part of preparing text for
display. Before a Unicode sequence can be rendered, the
codepoints in the sequence must be mapped to the corresponding
glyphs provided in the font, and those glyphs must be positioned
correctly relative to each other. For many of the scripts
supported in Unicode, these steps involve script-specific layout
rules, including complex joining, reordering, and positioning
behavior. Implementing these rules is the job of the shaping engine.
</para>
<para>
Text shaping is a fairly low-level operation. HarfBuzz is
used directly by text-handling libraries like <ulink
url="https://www.pango.org/">Pango</ulink>, as well as by the layout
engines in Firefox, LibreOffice, and Chromium. Unless you are
<emphasis>writing</emphasis> one of these layout engines
yourself, you will probably not need to use HarfBuzz: normally,
a layout engine, toolkit, or other library will turn text into
glyphs for you.
</para>
<para>
However, if you <emphasis>are</emphasis> writing a layout engine
or graphics library yourself, then you will need to perform text
shaping, and this is where HarfBuzz can help you.
</para>
<para>
Here are some specific scenarios where a text-shaping engine
like HarfBuzz helps you:
</para>
<itemizedlist>
<listitem>
<para>
OpenType fonts contain a set of glyphs (that is, shapes
to represent the letters, numbers, punctuation marks, and
all other symbols), which are indexed by a <literal>glyph ID</literal>.
</para>
<para>
A particular glyph ID within the font does not necessarily
correlate to a predictable Unicode codepoint. For instance,
some fonts have the letter &quot;a&quot; as glyph ID 1, but
many others do not. In order to retrieve the right glyph
from the font to display &quot;a&quot;, you need to consult
the table inside the font (the <literal>cmap</literal>
table) that maps Unicode codepoints to glyph IDs. In other
words, <emphasis>text shaping turns codepoints into glyph
IDs</emphasis>.
</para>
</listitem>
<listitem>
<para>
Many OpenType fonts contain ligatures: combinations of
characters that are rendered as a single unit. For instance,
it is common for the &quot;f, i&quot; letter
sequence to appear in print as the single ligature glyph
&quot;&quot;.
</para>
<para>
Whether you should render an &quot;f, i&quot; sequence
as <literal>fi</literal> or as &quot;&quot; does not
depend on the input text. Instead, it depends on the whether
or not the font includes an &quot;&quot; glyph and on the
level of ligature application you wish to perform. The font
and the amount of ligature application used are under your
control. In other words, <emphasis>text shaping involves
querying the font's ligature tables and determining what
substitutions should be made</emphasis>.
</para>
</listitem>
<listitem>
<para>
While ligatures like &quot;&quot; are optional typographic
refinements, some languages <emphasis>require</emphasis> certain
substitutions to be made in order to display text correctly.
</para>
<para>
For example, in Tamil, when the letter &quot;TTA&quot; (ட)
letter is followed by the vowel sign &quot;U&quot; (ு), the pair
must be replaced by the single glyph &quot;டு&quot;. The
sequence of Unicode characters &quot;ட,ு&quot; needs to be
substituted with a single &quot;டு&quot; glyph from the
font.
</para>
<para>
But &quot;டு&quot; does not have a Unicode codepoint. To
find this glyph, you need to consult the table inside
the font (the <literal>GSUB</literal> table) that contains
substitution information. In other words, <emphasis>text shaping
chooses the correct glyph for a sequence of characters
provided</emphasis>.
</para>
</listitem>
<listitem>
<para>
Similarly, each Arabic character has four different variants
corresponding to the different positions it might appear in
within a sequence. Inside a font, there will be separate
glyphs for the initial, medial, final, and isolated forms of
each letter, each at a different glyph ID.
</para>
<para>
Unicode only assigns one codepoint per character, so a
Unicode string will not tell you which glyph variant to use
for each character. To decide, you need to analyze the whole
string and determine the appropriate glyph for each character
based on its position. In other words, <emphasis>text
shaping chooses the correct form of the letter by its
position and returns the correct glyph from the font</emphasis>.
</para>
</listitem>
<listitem>
<para>
Other languages involve marks and accents that need to be
rendered in specific positions relative a base character. For
instance, the Moldovan language includes the Cyrillic letter
&quot;zhe&quot; (ж) with a breve accent, like so: &quot;ӂ&quot;.
</para>
<para>
Some fonts will provide this character as a single
zhe-with-breve glyph, but other fonts will not and, instead,
will expect the rendering engine to form the character by
superimposing the separate &quot;ж&quot; and &quot;˘&quot;
glyphs.
</para>
<para>
But exactly where you should draw the breve depends on the
height and width of the preceding zhe glyph. To find the
right position, you need to consult the table inside
the font (the <literal>GPOS</literal> table) that contains
positioning information.
In other words, <emphasis>text shaping tells you whether you
have a precomposed glyph within your font or if you need to
compose a glyph yourself out of combining marks&mdash;and,
if so, where to position those marks.</emphasis>
</para>
</listitem>
</itemizedlist>
<para>
If tasks like these are something that you need to do, then you
need a text shaping engine. You could use Uniscribe if you are
writing Windows software; you could use CoreText on macOS; or
you could use HarfBuzz.
</para>
<note>
<para>
In the rest of this manual, the text will assume that the reader
is that implementor of a text-layout engine.
</para>
</note>
</section>
<section id="what-does-harfbuzz-do">
<title>What does HarfBuzz do?</title>
<para>
HarfBuzz provides text shaping through a cross-platform
C API that accepts sequences of Unicode codepoints as input. Currently,
the following OpenType shaping models are supported:
</para>
<itemizedlist>
<listitem>
<para>
Indic (covering Devanagari, Bengali, Gujarati,
Gurmukhi, Kannada, Malayalam, Oriya, Tamil, and Telugu)
</para>
</listitem>
<listitem>
<para>
Arabic (covering Arabic, N'Ko, Syriac, and Mongolian)
</para>
</listitem>
<listitem>
<para>
Thai and Lao
</para>
</listitem>
<listitem>
<para>
Khmer
</para>
</listitem>
<listitem>
<para>
Myanmar
</para>
</listitem>
<listitem>
<para>
Tibetan
</para>
</listitem>
<listitem>
<para>
Hangul
</para>
</listitem>
<listitem>
<para>
Hebrew
</para>
</listitem>
<listitem>
<para>
The Universal Shaping Engine or <emphasis>USE</emphasis>
(covering complex scripts not covered by the above shaping
models)
</para>
</listitem>
<listitem>
<para>
A default shaping model for non-complex scripts
(covering Latin, Cyrillic, Greek, Armenian, Georgian, Tifinagh,
and many others)
</para>
</listitem>
<listitem>
<para>
Emoji (including emoji modifier sequences, flag sequences,
and ZWJ sequences)
</para>
</listitem>
</itemizedlist>
<para>
In addition to OpenType shaping, HarfBuzz supports the latest
version of Graphite shaping (the "Graphite 2" model) and AAT
shaping.
</para>
<para>
HarfBuzz can read and understand TrueType fonts (.ttf), TrueType
collections (.ttc), and OpenType fonts (.otf, including those
fonts that contain TrueType-style outlines and those that
contain PostScript CFF or CFF2 outlines).
</para>
<para>
HarfBuzz is designed and tested to run on top of the FreeType
font renderer. It can run on Linux, Android, Windows, macOS, and
iOS systems.
</para>
<para>
In addition to its core shaping functionality, HarfBuzz provides
functions for accessing other font features, including optional
GSUB and GPOS OpenType features, as well as
all color-font formats (<literal>CBDT</literal>,
<literal>sbix</literal>, <literal>COLR/CPAL</literal>, and
<literal>SVG-OT</literal>) and OpenType variable fonts. HarfBuzz
also includes a font-subsetting feature. HarfBuzz can perform
some low-level math-shaping operations, although it does not
currently perform full shaping for mathematical typesetting.
</para>
<para>
A suite of command-line utilities is also provided in the
source-code tree, designed to help users test and debug
HarfBuzz's features on real-world fonts and input.
</para>
</section>
<section id="what-harfbuzz-doesnt-do">
<title>What HarfBuzz doesn't do</title>
<para>
HarfBuzz will take a Unicode string, shape it, and give you the
information required to lay it out correctly on a single
horizontal (or vertical) line using the font provided. That is the
extent of HarfBuzz's responsibility.
</para>
<para>
It is important to note that if you are implementing a complete
text-layout engine you may have other responsibilities that
HarfBuzz will <emphasis>not</emphasis> help you with. For example:
</para>
<itemizedlist>
<listitem>
<para>
HarfBuzz won't help you with bidirectionality. If you want to
lay out text that includes a mix of Hebrew and English, you
will need to ensure that each buffer provided to HarfBuzz
has all of its characters in the same order and that the
directionality of the buffer is set correctly. This may mean
segmenting the text before it is placed into HarfBuzz buffers. In
other words, the user will hit the keys in the following
sequence:
</para>
<programlisting>
A B C [space] ג ב א [space] D E F
</programlisting>
<para>
but will expect to see in the output:
</para>
<programlisting>
ABC אבג DEF
</programlisting>
<para>
This reordering is called <emphasis>bidi processing</emphasis>
(&quot;bidi&quot; is short for bidirectional), and there's an
algorithm as an annex to the Unicode Standard which tells you how
to process a string of mixed directionality.
Before sending your string to HarfBuzz, you may need to apply the
bidi algorithm to it. Libraries such as <ulink
url="http://icu-project.org/">ICU</ulink> and <ulink
url="http://fribidi.org/">fribidi</ulink> can do this for you.
</para>
</listitem>
<listitem>
<para>
HarfBuzz won't help you with text that contains different font
properties. For instance, if you have the string &quot;a
<emphasis>huge</emphasis> breakfast&quot;, and you expect
&quot;huge&quot; to be italic, then you will need to send three
strings to HarfBuzz: <literal>a</literal>, in your Roman font;
<literal>huge</literal> using your italic font; and
<literal>breakfast</literal> using your Roman font again.
</para>
<para>
Similarly, if you change the font, font size, script,
language, or direction within your string, then you will
need to shape each run independently and output them
independently. HarfBuzz expects to shape a run of characters
that all share the same properties.
</para>
</listitem>
<listitem>
<para>
HarfBuzz won't help you with line breaking, hyphenation, or
justification. As mentioned above, HarfBuzz lays out the string
along a <emphasis>single line</emphasis> of, notionally,
infinite length. If you want to find out where the potential
word, sentence and line break points are in your text, you
could use the ICU library's break iterator functions.
</para>
<para>
HarfBuzz can tell you how wide a shaped piece of text is, which is
useful input to a justification algorithm, but it knows nothing
about paragraphs, lines or line lengths. Nor will it adjust the
space between words to fit them proportionally into a line.
</para>
</listitem>
</itemizedlist>
<para>
As a layout-engine implementor, HarfBuzz will help you with the
interface between your text and your font, and that's something
that you'll need&mdash;what you then do with the glyphs that your font
returns is up to you.
</para>
</section>
<section id="why-is-it-called-harfbuzz">
<title>Why is it called HarfBuzz?</title>
<para>
HarfBuzz began its life as text-shaping code within the FreeType
project (and you will see references to the FreeType authors
within the source code copyright declarations), but was then
extracted out to its own project. This project is maintained by
Behdad Esfahbod, who named it HarfBuzz. Originally, it was a
shaping engine for OpenType fonts&mdash;&quot;HarfBuzz&quot; is
the Persian for &quot;open type&quot;.
</para>
</section>
</chapter>

View File

@@ -0,0 +1 @@
@HB_VERSION@

View File

@@ -0,0 +1,544 @@
# The web assembly shaper
If the standard OpenType shaping engine doesn't give you enough flexibility, Harfbuzz allows you to write your own shaping engine in WebAssembly and embed it into your font! Any font which contains a `Wasm` table will be passed to the WebAssembly shaper.
## What you can and can't do: the WASM shaper's role in shaping
The Harfbuzz shaping engine, unlike its counterparts CoreText and DirectWrite, is only responsible for a small part of the text rendering process. Specifically, Harfbuzz is purely responsible for *shaping*; although Harfbuzz does have APIs for accessing glyph outlines, typically other libraries in the free software text rendering stack are responsible for text segmentation into runs, outline scaling and rasterizing, setting text on lines, and so on.
Harfbuzz is therefore restricted to turning a buffer of codepoints for a segmented run of the same script, language, font, and variation settings, into glyphs and positioning them. This is also all that you can do with the WASM shaper; you can influence the process of mapping a string of characters into an array of glyphs, you can determine how those glyphs are positioned and their advance widths, but you cannot manipulate outlines, variations, line breaks, or affect text layout between texts of different font, variation, language, script or OpenType feature selection.
## The WASM shaper interface
The WASM code inside a font is expected to export a function called `shape` which takes five int32 arguments and returns an int32 status value. (Zero for failure, any other value for success.) Three of the five arguments are tokens which can be passed to the API functions exported to your WASM code by the host shaping engine:
* A *shape plan* token, which can largely be ignored.
* A *font* token.
* A *buffer* token.
* A *feature* array.
* The number of features.
The general goal of WASM shaping involves receiving and manipulating a *buffer contents* structure, which is an array of *infos* and *positions* (as defined below). Initially this buffer will represent an input string in Unicode codepoints. By the end of your `shape` function, it should represent a set of glyph IDs and their positions. (User-supplied WASM code will manipulate the buffer through *buffer tokens*; the `buffer_copy_contents` and `buffer_set_contents` API functions, defined below, use these tokens to exchange buffer information with the host shaping engine.)
* The `buffer_contents_t` structure
| type | field | description|
| - | - | - |
| uint32 | length | Number of items (characters or glyphs) in the buffer
| glyph_info_t | infos | An array of `length` glyph infos |
| glyph_position_t | positions | An array of `length` glyph positions |
* The `glyph_info_t` structure
| type | field | description|
| - | - | - |
| uint32 | codepoint | (On input) A Unicode codepoint. (On output) A glyph ID. |
| uint32 | mask | Unused in WASM; can be user-defined |
| uint32 | cluster | Index of start of this graphical cluster in input string |
| uint32 | var1 | Reserved |
| uint32 | var2 | Reserved |
The `cluster` field is used to glyphs in the output glyph stream back to characters in the input Unicode sequence for hit testing, cursor positioning, etc. It must be set to a monotonically increasing value across the buffer.
* The `glyph_position_t` structure
| type | field | description|
| - | - | - |
| int32 | x_advance | X advance of the glyph |
| int32 | y_advance | Y advance of the glyph |
| int32 | x_offset | X offset of the glyph |
| int32 | y_offset | Y offset of the glyph |
| uint32 | var | Reserved |
* The `feature_t` array
To communicate user-selected OpenType features to the user-defined WASM shaper, the host shaping engine passes an array of feature structures:
| type | field | description|
| - | - | - |
| uint32 | tag | Byte-encoded feature tag |
| uint32 | value | Value: 0=off, 1=on, other values used for alternate selection |
| uint32 | start | Index into the input string representing start of the active region for this feature selection (0=start of string) |
| uint32 | end | Index into the input string representing end of the active region for this feature selection (-1=end of string) |
## API functions available
To assist the shaping code in mapping codepoints to glyphs, the WASM shaper exports the following functions. Note that these are the low level API functions; WASM authors may prefer to use higher-level abstractions around these functions, such as the `harfbuzz-wasm` Rust crate provided by Harfbuzz.
### Sub-shaping
* `shape_with`
```C
bool shape_with(
uint32 font_token,
uint32 buffer_token,
feature_t* features,
uint32 num_features,
char* shaper
)
```
Run another shaping engine's shaping process on the given font and buffer. The only shaping engine guaranteed to be available is `ot`, the OpenType shaper, but others may also be available. This allows the WASM author to process a buffer "normally", before further manipulating it.
### Buffer access
* `buffer_copy_contents`
```C
bool buffer_copy_contents(
uint32 buffer_token,
buffer_contents_t* buffer_contents
)
```
Retrieves the contents of the host shaping engine's buffer into the `buffer_contents` structure. This should typically be called at the beginning of shaping.
* `buffer_set_contents`
```C
bool buffer_set_contents(
uint32 buffer_token,
buffer_contents_t* buffer_contents
)
```
Copy the `buffer_contents` structure back into the host shaping engine's buffer. This should typically be called at the end of shaping.
* `buffer_contents_free`
```C
bool buffer_contents_free(buffer_contents_t* buffer_contents)
```
Releases the memory taken up by the buffer contents structure.
* `buffer_contents_realloc`
```C
bool buffer_contents_realloc(
buffer_contents_t* buffer_contents,
uint32 size
)
```
Requests that the buffer contents structure be resized to the given size.
* `buffer_get_direction`
```C
uint32 buffer_get_direction(uint32 buffer_token)
```
Returns the buffer's direction:
* 0 = invalid
* 4 = left to right
* 5 = right to left
* 6 = top to bottom
* 7 = bottom to top
* `buffer_get_script`
```C
uint32 buffer_get_script(uint32 buffer_token)
```
Returns the byte-encoded OpenType script tag of the buffer.
* `buffer_reverse`
```C
void buffer_reverse(uint32 buffer_token)
```
Reverses the order of items in the buffer.
* `buffer_reverse_clusters`
```C
void buffer_reverse_clusters(uint32 buffer_token)
```
Reverses the order of items in the buffer while keeping items of the same cluster together.
## Font handling functions
(In the following functions, a *font* is a specific instantiation of a *face* at a particular scale factor and variation position.)
* `font_create`
```C
uint32 font_create(uint32 face_token)
```
Returns a new *font token* from the given *face token*.
* `font_get_face`
```C
uint32 font_get_face(uint32 font_token)
```
Creates a new *face token* from the given *font token*.
* `font_get_scale`
```C
void font_get_scale(
uint32 font_token,
int32* x_scale,
int32* y_scale
)
```
Returns the scale of the current font.
* `font_get_glyph`
```C
uint32 font_get_glyph(
uint32 font_token,
uint32 codepoint,
uint32 variation_selector
)
```
Returns the nominal glyph ID for the given codepoint, using the `cmap` table of the font to map Unicode codepoint (and variation selector) to glyph ID.
* `font_get_glyph_h_advance`/`font_get_glyph_v_advance`
```C
uint32 font_get_glyph_h_advance(uint32 font_token, uint32 glyph_id)
uint32 font_get_glyph_v_advance(uint32 font_token, uint32 glyph_id)
```
Returns the default horizontal and vertical advance respectively for the given glyph ID the current scale and variations settings.
* `font_get_glyph_extents`
```C
typedef struct
{
uint32 x_bearing;
uint32 y_bearing;
uint32 width;
uint32 height;
} glyph_extents_t;
bool font_get_glyph_extents(
uint32 font_token,
uint32 glyph_id,
glyph_extents_t* extents
)
```
Returns the glyph's extents for the given glyph ID at current scale and variation settings.
* `font_glyph_to_string`
```C
void font_glyph_to_string(
uint32 font_token,
uint32 glyph_id,
char* string,
uint32 size
)
```
Copies the name of the given glyph, or, if no name is available, a string of the form `gXXXX` into the given string.
* `font_copy_glyph_outline`
```C
typedef struct
{
float x;
float y;
uint32_t type;
} glyph_outline_point_t;
typedef struct
{
uint32_t n_points;
glyph_outline_point_t* points;
uint32_t n_contours;
uint32_t* contours;
} glyph_outline_t;
bool font_copy_glyph_outline(
uint32 font_token,
uint32 glyph_id,
glyph_outline_t* outline
);
```
Copies the outline of the given glyph ID, at current scale and variation settings, into the outline structure provided. The outline structure returns an array of points (specifying coordinates and whether the point is oncurve or offcurve) and an array of indexes into the points array representing the end of each contour, similar to the `glyf` table structure.
* `font_copy_coords`/`font_set_coords`
```C
typedef struct
{
uint32 length;
int32* coords;
} coords_t;
bool font_copy_coords(uint32 font_token, &coords_t coords);
bool font_set_coords(uint32 font_token, &coords_t coords);
```
`font_copy_coords` copies the font's variation coordinates into the given structure; the resulting structure has `length` equal to the number of variation axes, with each member of the `coords` array being a F2DOT14 encoding of the normalized variation value.
`font_set_coords` sets the font's variation coordinates. Because the WASM shaper is only responsible for shaping and positioning, not outline drawing, the user should *not* expect this to affect the rendered outlines; the function is only useful in very limited circumstances, such as when instantiating a second variable font and sub-shaping a buffer using this new font.
## Face handling functions
* `face_create`
```C
typedef struct
{
uint32_t length;
char* data;
} blob_t;
uint32 font_get_face(blob_t* blob)
```
Creates a new *face token* from the given binary data.
* `face_copy_table`
```C
void face_copy_table(uint32 face_token, uint32 tag, blob_t* blob)
```
Copies the binary data in the OpenType table referenced by `tag` into the supplied `blob` structure.
* `face_get_upem`
```C
uint32 font_get_upem(uint32 face_token)
```
Returns the units-per-em of the font face.
### Other functions
* `blob_free`
```C
void blob_free(blob_t* blob)
```
Frees the memory allocated to a blob structure.
* `glyph_outline_free`
```C
void glyph_outline_free(glyph_outline_t* glyph_outline)
```
Frees the memory allocated to a glyph outline structure.
* `script_get_horizontal_direction`
```C
uint32 script_get_horizontal_direction(uint32 tag)
```
Returns the horizontal direction for the given ISO 15924 script tag. For return values, see `buffer_get_direction` above.
* `debugprint` / `debugprint1` ... `debugprint4`
```C
void debugprint(char* str)
void debugprint1(char* str, int32 arg1)
void debugprint2(char* str, int32 arg1, int32 arg2)
void debugprint3(char* str, int32 arg1, int32 arg2, int32 arg3)
void debugprint4(
char* str,
int32 arg1,
int32 arg2,
int32 arg3,
int32 arg4
)
```
Produces a debugging message in the host shaper's log output; the variants `debugprint1` ... `debugprint4` suffix the message with a comma-separated list of the integer arguments.
## Enabling the WASM shaper when building Harfbuzz
First, you will need the `wasm-micro-runtime` library installed on your computer. Download `wasm-micro-runtime` from [its GitHub repository](https://github.com/bytecodealliance/wasm-micro-runtime/tree/main); then follow [the instructions for building](https://github.com/bytecodealliance/wasm-micro-runtime/blob/main/product-mini/README.md), except run the cmake command from the repository root directory and add the `-DWAMR_BUILD_REF_TYPES=1` flag to the `cmake` line. (You may want to enable "fast JIT".) Then, install it.
So, for example:
```
$ cmake -B build -DWAMR_BUILD_REF_TYPES=1 -DWAMR_BUILD_FAST_JIT=1
$ cmake --build build --parallel
$ sudo cmake --build build --target install
```
(If you don't want to install `wasm-micro-runtime` globally, you can copy `libiwasm.*` and `libvmlib.a` into a directory that your compiler can see when building Harfbuzz.)
Once `wasm-micro-runtime` is installed, to enable the WASM shaper, you need to add the string `-Dwasm=enabled` to your meson build line. For example:
```
$ meson setup build -Dwasm=enabled
...
Additional shapers
Graphite2 : NO
WebAssembly (experimental): YES
...
$ meson compile -C build
```
## How to write a shaping engine in Rust
You may write shaping engines in any language supported by WASM, by conforming to the API described above, but Rust is particularly easy, and we have one of those high-level interface wrappers which makes the process easier. Here are the steps to create an example shaping engine in Rust: (These examples can also be found in `src/wasm/sample/rust`)
* First, install wasm-pack, which helps us to generate optimized WASM files. It writes some Javascript bridge code that we don't need, but it makes the build and deployment process much easier:
```
$ cargo install wasm-pack
```
* Now let's create a new library:
```
$ cargo new --lib hello-wasm
```
* We need the target to be a dynamic library, and we're going to use `bindgen` to export our Rust function to WASM, so let's put these lines in the `Cargo.toml`. The Harfbuzz sources contain a Rust crate which makes it easy to create the shaper, so we'll specify that as a dependency as well:
```toml
[lib]
crate-type = ["cdylib"]
[dependencies]
wasm-bindgen = "0.2"
harfbuzz-wasm = { path = "your-harfbuzz-source/src/wasm/rust/harfbuzz-wasm"}
```
*
* And now we'll create our shaper code. In `src/lib.rs`:
```rust
use wasm_bindgen::prelude::*;
#[wasm_bindgen]
pub fn shape(_shape_plan:u32, font_ref: u32, buf_ref: u32, _features: u32, _num_features: u32) -> i32 {
1 // success!
}
```
This exports a shaping function which takes four arguments, tokens representing the shaping plan, the font and the buffer, and returns a status value. We can pass these tokens back to Harfbuzz in order to use its native functions on the font and buffer objects. More on native functions later - let's get this shaper compiled and added into a font:
* To compile the shaper, run `wasm-pack build --target nodejs`:
```
INFO]: 🎯 Checking for the Wasm target...
[INFO]: 🌀 Compiling to Wasm...
Compiling hello-wasm v0.1.0 (...)
Finished release [optimized] target(s) in 0.20s
[WARN]: ⚠️ origin crate has no README
[INFO]: ⬇️ Installing wasm-bindgen...
[INFO]: Optimizing wasm binaries with `wasm-opt`...
[INFO]: Optional fields missing from Cargo.toml: 'description', 'repository', and 'license'. These are not necessary, but recommended
[INFO]: ✨ Done in 0.40s
```
You'll find the output WASM file in `pkg/hello_wasm_bg.wasm`
* Now we need to get it into a font.
We provide a utility to do this called `addTable.py` in the `src/` directory:
```
% python3 ~/harfbuzz/src/addTable.py test.ttf test-wasm.ttf pkg/hello_wasm_bg.wasm
```
And now we can run it!
```
% hb-shape test-wasm.ttf abc --shapers=wasm
[cent=0|sterling=1|fraction=2]
```
(The `--shapers=wasm` isn't necessary, as any font with a `Wasm` table will be sent to the WASM shaper if it's enabled, but it proves the point.)
Congratulations! Our shaper did nothing, but in Rust! Now let's do something - it's time for the Hello World of WASM shaping.
* To say hello world, we're going to have to use a native function.
In debugging builds of Harfbuzz, we can print some output from the web assembly module to the host's standard output using the `debug` function. To make this easier, we've got the `harfbuzz-wasm` crate:
```rust
use harfbuzz_wasm::debug;
#[wasm_bindgen]
pub fn shape(_shape_plan:u32, _font_ref: u32, _buf_ref: u32, _features: u32, _num_features: u32) -> i32 {
debug("Hello from Rust!\n");
1
}
```
With this compiled into a WASM module, and installed into our font again, finally our fonts can talk to us!
```
$ hb-shape test-wasm.ttf abc
Hello from Rust!
[cent=0|sterling=1|fraction=2]
```
Now let's start to do some actual, you know, *shaping*. The first thing a shaping engine normally does is (a) map the items in the buffer from Unicode codepoints into glyphs in the font, and (b) set the advance width of the buffer items to the default advance width for those glyphs. We're going to need to interrogate the font for this information, and write back to the buffer. Harfbuzz provides us with opaque pointers to the memory for the font and buffer, but we can turn those into useful Rust structures using the `harfbuzz-wasm` crate again:
```rust
use wasm_bindgen::prelude::*;
use harfbuzz_wasm::{Font, GlyphBuffer};
#[wasm_bindgen]
pub fn shape(_shape_plan:u32, font_ref: u32, buf_ref: u32, _features: u32, _num_features: u32) -> i32 {
let font = Font::from_ref(font_ref);
let mut buffer = GlyphBuffer::from_ref(buf_ref);
for mut item in buffer.glyphs.iter_mut() {
// Map character to glyph
item.codepoint = font.get_glyph(item.codepoint, 0);
// Set advance width
item.x_advance = font.get_glyph_h_advance(item.codepoint);
}
1
}
```
The `GlyphBuffer`, unlike in Harfbuzz, combines positioning and information in a single structure, to save you having to zip and unzip all the time. It also takes care of marshalling the buffer back to Harfbuzz-land; when a GlyphBuffer is dropped, it writes its contents back through the reference into Harfbuzz's address space. (If you want a different representation of buffer items, you can have one: `GlyphBuffer` is implemented as a `Buffer<Glyph>`, and if you make your own struct which implements the `BufferItem` trait, you can make a buffer out of that instead.)
One easy way to write your own shapers is to make use of OpenType shaping for the majority of your shaping work, and then make changes to the pre-shaped buffer afterwards. You can do this using the `Font.shape_with` method. Run this on a buffer reference, and then construct your `GlyphBuffer` object afterwards:
```rust
use harfbuzz_wasm::{Font, GlyphBuffer};
use tiny_rng::{Rand, Rng};
use wasm_bindgen::prelude::*;
#[wasm_bindgen]
pub fn shape(_shape_plan:u32, font_ref: u32, buf_ref: u32, _features: u32, _num_features: u32) -> i32 {
let mut rng = Rng::from_seed(123456);
// Use the default OpenType shaper
let font = Font::from_ref(font_ref);
font.shape_with(buf_ref, "ot");
// Now we have a buffer with glyph ids, advance widths etc.
// already filled in.
let mut buffer = GlyphBuffer::from_ref(buf_ref);
for mut item in buffer.glyphs.iter_mut() {
// Randomize it!
item.x_offset = ((rng.rand_u32() as i32) >> 24) - 120;
item.y_offset = ((rng.rand_u32() as i32) >> 24) - 120;
}
1
}
```
See the documentation for the `harfbuzz-wasm` crate for all the other