On Tue, Feb 16, 2021 at 3:27 PM Marcel Fabian Krüger <tex@2krueger.de> wrote:
Hi,

as documented in the manual, LuaTeX supports a restricted form of nested
discretionaries to handle cases like "of\-f\-ice" with ffi/ff/fi
ligatures. Sadly this does not seem to work correctly. Take for example
the document:

  \pretolerance-1
  \parfillskip0pt

  \setbox0\hbox{off-}
  \setbox2\hbox{ice}

  \noindent
  \parshape 2 0pt \wd0 0pt \wd2
  off\-ice

  \noindent
  \parshape 2 0pt \wd0 0pt \wd2
  of\-f\-ice

  \setbox0\hbox{of-}
  \setbox1\hbox{fice}

  \noindent
  \parshape 2 0pt \wd0 0pt \wd1
  of\-fice

  \noindent
  \parshape 2 0pt \wd0 0pt \wd1
  of\-f\-ice

  \setbox1\hbox{f-}

  \noindent
  \parshape 3 0pt \wd0 0pt \wd1 0pt \wd2
  of\-f\-ice
  \bye

Here the first two paragraphs are formed to prefectly fit off-ice and
this break works correctly when the input is off\-ice. But for
of\-f\-ice, no break occurs and the line becomes overfull.

The next two paragraphs fit to of-fice, which again works for of\-fice.
But for of\-f\-ice, it is broken as off\-ice instead, leading to a
overfull followed by a underfull box.

The last paragraph tries to trigger both breaks and doesn't work
correctly either.

The cause are multiple issues:

  1. In ext_do_line_break, the disc_width for the second case is for the
     case that both breaks occur, which is significantly less likely
     than the single break case. (Since the middle part will normally be
     much shorter than a paragraph)
     This can be fixed by switching to the disabled code which according
     to a comment "doesn't work". It can be fixed by adding a

       do_one_seven_eight(add_disc_width_to_active_width);

  2. In compute_break_width, the break_width for a select_disc is not
     handled specially. Therefore instead of subtracting the no_break
     width of the previous discretionary which is the width which is
     contributed for the construct to active_width.

  3. After both changes, the only remaining issue occurs when both
     breaks trigger at the same time. Since the calculations assume
     the other case, the length of the middle line is calculated
     incorrectly. Since no glue can appear in discretionary nodes,
     we know that this error can not affect the stretchable and
     shrikable component of the line, so we can just overwrite the
     natural width with the statically known length of this line to fix
     this.

IMO the thirs issue is much less important than the first two since it
only affects the corner-case of taking both breaks.

The diff below fixes all three issues and additionally allows the
disc_penalty of the select_disc to determine the penallty of the second
break. (That was useful during debugging and it seems like the expected
behavior.)

I also attach three diffs with more or less independent components:
First only the penalty change, than a change for 1&2 which fixes most
common issue and then the slightly more hacky fix for 3.

Best regards,
Marcel


diff --git a/source/texk/web2c/luatexdir/tex/linebreak.c b/source/texk/web2c/luatexdir/tex/linebreak.c
index 646a5a771..8c66dbf32 100644
--- a/source/texk/web2c/luatexdir/tex/linebreak.c
+++ b/source/texk/web2c/luatexdir/tex/linebreak.c
@@ -879,7 +879,8 @@ static int line_diff;
                 double_hyphen_demerits, \
                 final_hyphen_demerits, \
                 first_p, \
-                cur_p \
+                cur_p, \
+                null \
             ); \
     if (type(cur_p)!=math_node) \
         active_width[1] += width(cur_p); \
@@ -1094,7 +1095,10 @@ static void compute_break_width(int break_type, int line_break_dir, int adjust_s
             path, as we are talking about the breaking on {\it this} position.

         */
-        sub_from_widths(vlink_no_break(p), line_break_dir, adjust_spacing, break_width);
+        if (subtype(p) == select_disc)
+            sub_from_widths(vlink_no_break(alink(p)), line_break_dir, adjust_spacing, break_width);
+        else
+            sub_from_widths(vlink_no_break(p), line_break_dir, adjust_spacing, break_width);
         add_to_widths(vlink_post_break(p), line_break_dir, adjust_spacing, break_width);
         do_one_seven_eight(add_disc_width_to_break_width);
         if (vlink_post_break(p) == null) {
@@ -1273,7 +1277,8 @@ static void ext_try_break(
     int line_penalty,
     int last_line_fit,
     int double_hyphen_demerits,
-    int final_hyphen_demerits, halfword first_p, halfword cur_p
+    int final_hyphen_demerits, halfword first_p, halfword cur_p,
+    halfword init_p
 )
 {
     /*tex runs through the active list */
@@ -1311,6 +1316,8 @@ static void ext_try_break(
     scaled g = 0;
     /*tex distance from current active node */
     scaled cur_active_width[10] = { 0 };
+    /*tex if cur_p == init_p != null, the line length between init_disc and select_disc */
+    scaled init_select_width;
     /*tex Make sure that |pi| is in the proper range; */
     if (pi >= inf_penalty) {
         /*tex this breakpoint is inhibited by infinite penalty */
@@ -1371,6 +1378,11 @@ static void ext_try_break(
                     no_break_yet = false;
                     do_all_eight(set_break_width_to_background);
                     compute_break_width(break_type, line_break_dir, adjust_spacing, cur_p);
+                    if (init_p != null && init_p == cur_p) {
+                        scaled line_width[10] = { 0 };
+                        add_to_widths(vlink_pre_break(vlink(cur_p)), line_break_dir, adjust_spacing, line_width);
+                        init_select_width = background[1] + line_width[1];
+                    }
                 }
                 /*tex

@@ -1448,6 +1460,8 @@ static void ext_try_break(
                             active_short(q) = best_pl_short[fit_class];
                             active_glue(q) = best_pl_glue[fit_class];
                         }
+                        if (init_p != null && init_p == cur_p)
+                            active_select_width(q) = init_select_width;
                         vlink(q) = r;
                         vlink(prev_r) = q;
                         prev_r = q;
@@ -1513,7 +1527,10 @@ static void ext_try_break(

         */
         artificial_demerits = false;
-        shortfall = line_width - cur_active_width[1];
+        if (init_p != null && break_node(r) != null && cur_break(break_node(r)) == init_p)
+            shortfall = line_width - active_select_width(r);
+        else
+            shortfall = line_width - cur_active_width[1];
         if (break_node(r) == null)
             shortfall -= init_internal_left_box_width;
         else
@@ -2233,7 +2250,8 @@ void ext_do_line_break(
                                 double_hyphen_demerits,
                                 final_hyphen_demerits,
                                 first_p,
-                                cur_p
+                                cur_p,
+                                null
                             );
                         }
                     }
@@ -2285,7 +2303,7 @@ void ext_do_line_break(
                                           tracing_paragraphs, protrude_chars,
                                           line_penalty, last_line_fit,
                                           double_hyphen_demerits,
-                                          final_hyphen_demerits, first_p, cur_p);
+                                          final_hyphen_demerits, first_p, cur_p, null);
                         } else {
                             add_to_widths(s, line_break_dir, adjust_spacing, disc_width);
                             do_one_seven_eight(add_disc_width_to_active_width);
@@ -2295,45 +2313,35 @@ void ext_do_line_break(
                                           tracing_paragraphs, protrude_chars,
                                           line_penalty, last_line_fit,
                                           double_hyphen_demerits,
-                                          final_hyphen_demerits, first_p, cur_p);
+                                          final_hyphen_demerits, first_p, cur_p,
+                                          subtype(cur_p) == init_disc ? cur_p : null);
                             if (subtype(cur_p) == init_disc) {
                                 /*tex

-                                    We should at two break points after the one
-                                    we added above:
-
-                                    \startitemize[n]
-                                        \startitem
-                                            which does a possible break in INIT's
-                                            |post_break|
-                                        \stopitem
-                                        \startitem
-                                            which means the |no_break| actually
-                                            was broken just a character later
-                                        \stopitem
-                                    \stopitemize
-
-                                    Do the select-0 case |f-f-i|:
-
+                                    The breakpoint after the one we added above
+                                    is special since it's corresponding
+                                    disc_width depends on whether the previous
+                                    breakpoint will be taken. Since this would need
+                                    adjustments in multiple places, we avoid it by
+                                    exploiting the fact that we actually know how long
+                                    the line is going to be in that case: The length of
+                                    the background plus the SELECT's |pre_break|.
+                                    This length is stored in the active nodes for the
+                                    INIT since this allows to avoid the computation if
+                                    there is no feasible break at INIT.
+                                    To trigger this mechanism, we have to pass the
+                                    corresponding INIT to ext_try_break.
                                 */
-                                s = vlink_pre_break(vlink(cur_p));
-                                add_to_widths(s, line_break_dir, adjust_spacing, disc_width);
-                                ext_try_break(actual_penalty, hyphenated_node,
-                                              line_break_dir, adjust_spacing,
-                                              par_shape_ptr, adj_demerits,
-                                              tracing_paragraphs,
-                                              protrude_chars, line_penalty,
-                                              last_line_fit, double_hyphen_demerits,
-                                              final_hyphen_demerits, first_p,
-                                              vlink(cur_p));
-                                /*tex This does not work. */
-#if 0
+                                actual_penalty = (int) disc_penalty(vlink(cur_p));
                                 /*tex Go back to the starting situation. */
                                 do_one_seven_eight(sub_disc_width_from_active_width);
                                 do_one_seven_eight(reset_disc_width);
+                                /*tex Ensure that alink is set for |compute_break_width| */
+                                alink(vlink(cur_p)) = cur_p;
                                 /*tex Add select |no_break| to |active_width|. */
                                 s = vlink_no_break(vlink(cur_p));
                                 add_to_widths(s, line_break_dir, adjust_spacing, disc_width);
+                                do_one_seven_eight(add_disc_width_to_active_width);
                                 ext_try_break(actual_penalty, hyphenated_node,
                                               line_break_dir, adjust_spacing,
                                               par_shape_ptr, adj_demerits,
@@ -2341,8 +2349,7 @@ void ext_do_line_break(
                                               protrude_chars, line_penalty,
                                               last_line_fit, double_hyphen_demerits,
                                               final_hyphen_demerits, first_p,
-                                              vlink(cur_p));
-#endif
+                                              vlink(cur_p), cur_p);
                             }
                             do_one_seven_eight(sub_disc_width_from_active_width);
                         }
@@ -2356,7 +2363,7 @@ void ext_do_line_break(
                                   tracing_paragraphs, protrude_chars,
                                   line_penalty, last_line_fit,
                                   double_hyphen_demerits, final_hyphen_demerits,
-                                  first_p, cur_p);
+                                  first_p, cur_p, null);
                     break;
                 case boundary_node:
                 case whatsit_node:
@@ -2398,7 +2405,7 @@ void ext_do_line_break(
                           adjust_spacing, par_shape_ptr, adj_demerits,
                           tracing_paragraphs, protrude_chars, line_penalty,
                           last_line_fit, double_hyphen_demerits,
-                          final_hyphen_demerits, first_p, cur_p);
+                          final_hyphen_demerits, first_p, cur_p, null);
             if (vlink(active) != active) {
                 /*tex Find an active node with fewest demerits; */
                 r = vlink(active);
diff --git a/source/texk/web2c/luatexdir/tex/texnodes.h b/source/texk/web2c/luatexdir/tex/texnodes.h
index 9742ca496..3a1c5ff77 100644
--- a/source/texk/web2c/luatexdir/tex/texnodes.h
+++ b/source/texk/web2c/luatexdir/tex/texnodes.h
@@ -976,13 +976,14 @@ typedef enum {

 /* node sused in the parbuilder */

-#  define active_node_size 4                    /*number of words in extended active nodes */
+#  define active_node_size 5                    /*number of words in extended active nodes */
 #  define fitness subtype                       /*|very_loose_fit..tight_fit| on final line for this break */
 #  define break_node(a) vlink((a)+1)            /*pointer to the corresponding passive node */
 #  define line_number(a) vinfo((a)+1)           /*line that begins at this breakpoint */
 #  define total_demerits(a) varmem[(a)+2].cint  /* the quantity that \TeX\ minimizes */
 #  define active_short(a) vinfo(a+3)            /* |shortfall| of this line */
 #  define active_glue(a)  vlink(a+3)            /*corresponding glue stretch or shrink */
+#  define active_select_width(a) vlink(a+4)     /*if this break is a init_disc, the width of the following line when broken at select_disc */

 #  define passive_node_size 7
 #  define cur_break(a)                   vlink((a)+1)   /*in passive node, points to position of this breakpoint */
_______________________________________________
dev-luatex mailing list
dev-luatex@ntg.nl
https://mailman.ntg.nl/mailman/listinfo/dev-luatex

Confirmed.
I hope that it will be fixed for tl 2021.

--
luigi