[M3devel] TEXT & etc.

Mika Nystrom mika at async.caltech.edu
Wed Dec 31 09:22:41 CET 2008


Hello everyone,

I mentioned a week or two ago that I had run into horrific performance
issues with CM3's TEXTs.  It wasn't my intention to start a debate
about Unicode and Mahjong characters, just to point out that there's
a serious performance problem with CM3 in this area.  I run into
performance problems with CM3 from time to time because I am trying
to maintain a fairly large amount of software written in Modula-3
and it appears to me that the SRC/PM3 compilers often pay better
attention to performance issues than the newer CM3 compiler.
Generally speaking I think Modula-3 needs to avoid getting into the
Java situation: Java is squeezed between "fast enough" Python and
"much more efficient" C++... Modula-3 I see as the only hope for
getting the performance of C++ with the safety of Java.  The problems
I have seen with CM3 are in the following areas:

1. Thread mutex acquisition involving kernel calls (pthreads
   implementation)

2. ISTYPE and TYPECASE (appear much faster on PM3, don't know why,
   could just be a profiling issue).  In any case TYPECASE is very slow
   because of having to search the type tree.  This could be improved
   but I can't work on it because I always run into some kind of fatal
   problem trying to bootstrap the compiler.

3. The TEXT issue.  I mentioned performance problems before.  It turns 
   out that it was showing up in another area.  I upgraded my PowerBook
   a while back (to OS X 10.4) and of course everything stopped working,
   including at least half my Modula-3 programs.  I thought it was a 
   versioning problem with C libraries I was importing, but it turned out
   that in at least one case I was running out of stack space because of
   the calls to get_char on concatenated TEXTs.  (Not very long ones,
   at that.)

To get to the point.  What follows is my new version of TextCat.m3,
which I believe works, but I can't really say because I'm perennially
incapable of bootstrapping CM3.  The basic idea is that you can 
have *one* TextCat.T, at the "root" of your TEXT.  It will flatten
anything else.  

I don't see a simple way of modifying the CM3 TEXT implementation to 
do much better than this.  Of course throwing out TextCat.T completely
is an option.

An advantage of what I've done is that I haven't changed any interfaces
at all.  So I don't get problems with version stamps, etc.

     Mika


(* Copyright 1996-2000, Critical Mass, Inc.  All rights reserved. *)
(* See file COPYRIGHT-CMASS for details. *)

UNSAFE MODULE TextCat EXPORTS RTHooks, TextCat;

IMPORT TextClass, Text8, Text16;

(* changed 12/31/2008:
   only return a T when neither of the arguments to Concat is already
   a T; otherwise flatten to a Text8 or Text16.  Change affects
   only Concat and MultiCat 
*)

REVEAL
  T = Public BRANDED "TextCat.T" OBJECT OVERRIDES
    get_info       := MyGetInfo;
    get_char       := MyGetChar;
    get_wide_char  := MyGetWideChar;
    get_chars      := MyGetChars;
    get_wide_chars := MyGetWideChars;
  END;

PROCEDURE New (t, u: TEXT): TEXT =
  BEGIN
    RETURN Concat (t, u);
  END New;

(* RTHooks.Concat -- called by the inline "&" operator *)
PROCEDURE Concat (t, u: TEXT): TEXT =
  VAR ti, ui: TextClass.Info;
  BEGIN
    t.get_info (ti);  IF (ti.length <= 0) THEN RETURN u; END;
    u.get_info (ui);  IF (ui.length <= 0) THEN RETURN t; END;
    IF NOT ISTYPE(t,T) AND NOT ISTYPE(u,T) THEN
      RETURN NEW (T, a := t, b := u,
                  a_len := ti.length, b_len := ui.length,
                  a_or_b_wide := ti.wide OR ui.wide)
    ELSIF ti.wide OR ui.wide THEN
      WITH res = Text16.Create(ti.length+ui.length) DO
        FOR i := 0 TO ti.length-1 DO
          res.contents[i] := t.get_wide_char(i)
        END;
        FOR i := 0 TO ui.length-1 DO
          res.contents[ti.length+i] := t.get_wide_char(i)
        END;
        RETURN res
      END
    ELSE
      WITH res = Text8.Create(ti.length+ui.length) DO
        FOR i := 0 TO ti.length-1 DO
          res.contents[i] := t.get_char(i)
        END;
        FOR i := 0 TO ui.length-1 DO
          res.contents[ti.length+i] := t.get_char(i)
        END;
        RETURN res
      END
    END
  END Concat;

PROCEDURE NewMulti (READONLY x: ARRAY OF TEXT): TEXT =
  BEGIN
    RETURN MultiCat (x);
  END NewMulti;

(* RTHooks.MultiCat *)
PROCEDURE MultiCat (READONLY x: ARRAY OF TEXT): TEXT =
  VAR wide := FALSE;
      length, start := 0;
      xi : TextClass.Info;
  BEGIN
    IF NUMBER (x) <= 0 THEN RETURN "";   END;
    IF NUMBER (x) = 1  THEN RETURN x[0]; END;
    IF NUMBER (x) = 2  THEN RETURN Concat(x[0],x[1]) END;

    FOR i := FIRST(x) TO LAST(x) DO
      x[i].get_info (xi);
      wide := wide OR xi.wide;
      length := length + xi.length
    END;

    IF wide THEN
      WITH res = Text16.Create(length) DO
        FOR i := FIRST(x) TO LAST(x) DO
          x[i].get_info (xi);
          FOR j := 0 TO xi.length-1 DO
            res.contents[start+j] := x[i].get_wide_char(j)
          END;
          start := start + xi.length
        END;
        RETURN res
      END
    ELSE
      WITH res = Text8.Create(length) DO
        FOR i := FIRST(x) TO LAST(x) DO
          x[i].get_info (xi);
          FOR j := 0 TO xi.length-1 DO
            res.contents[start+j] := x[i].get_char(j)
          END;
          start := start + xi.length
        END;
        RETURN res
      END
    END
  END MultiCat;

PROCEDURE MyGetInfo (t: T;  VAR info: TextClass.Info) =
  BEGIN
    info.start  := NIL;
    info.length := t.a_len + t.b_len;
    info.wide   := t.a_or_b_wide;
  END MyGetInfo;

PROCEDURE MyGetChar (t: T;  index: CARDINAL): CHAR =
  BEGIN
    IF (index < t.a_len) THEN RETURN t.a.get_char (index); END;
    DEC (index, t.a_len);

    IF (index < t.b_len) THEN RETURN t.b.get_char (index); END;
    DEC (index, t.b_len);

    index := -1;  (* force a range fault *) <*NOWARN*>
  END MyGetChar;

PROCEDURE MyGetWideChar (t: T;  index: CARDINAL): WIDECHAR =
  BEGIN
    IF (index < t.a_len) THEN RETURN t.a.get_wide_char (index); END;
    DEC (index, t.a_len);

    IF (index < t.b_len) THEN RETURN t.b.get_wide_char (index); END;
    DEC (index, t.b_len);

    index := -1;  (* force a range fault *) <*NOWARN*>
  END MyGetWideChar;

PROCEDURE MyGetChars (t: T;  VAR a: ARRAY OF CHAR;  start: CARDINAL) =
  VAR u: TEXT;  a_offset, t_offset, u_offset: CARDINAL := 0;
  BEGIN
    u := t.a;
    IF (t_offset + t.a_len > start) THEN
      u_offset := MAX (start - t_offset, 0);
      u.get_chars (SUBARRAY (a, a_offset, NUMBER (a) - a_offset), u_offset);
      INC (a_offset, t.a_len - u_offset);
      IF (a_offset >= NUMBER (a)) THEN RETURN; END;
    END;
    INC (t_offset, t.a_len);

    u := t.b;
    IF (t_offset + t.b_len > start) THEN
      u_offset := MAX (start - t_offset, 0);
      u.get_chars (SUBARRAY (a, a_offset, NUMBER (a) - a_offset), u_offset);
      INC (a_offset, t.b_len - u_offset);
      IF (a_offset >= NUMBER (a)) THEN RETURN; END;
    END;
    INC (t_offset, t.b_len);
  END MyGetChars;

PROCEDURE MyGetWideChars (t: T;  VAR a: ARRAY OF WIDECHAR;  start: CARDINAL) =
  VAR u: TEXT;  a_offset, t_offset, u_offset: CARDINAL := 0;
  BEGIN
    u := t.a;
    IF (t_offset + t.a_len > start) THEN
      u_offset := MAX (start - t_offset, 0);
      u.get_wide_chars (SUBARRAY (a, a_offset, NUMBER (a) - a_offset), u_offset);
      INC (a_offset, t.a_len - u_offset);
      IF (a_offset >= NUMBER (a)) THEN RETURN; END;
    END;
    INC (t_offset, t.a_len);

    u := t.b;
    IF (t_offset + t.b_len > start) THEN
      u_offset := MAX (start - t_offset, 0);
      u.get_wide_chars (SUBARRAY (a, a_offset, NUMBER (a) - a_offset), u_offset);
      INC (a_offset, t.b_len - u_offset);
      IF (a_offset >= NUMBER (a)) THEN RETURN; END;
    END;
    INC (t_offset, t.b_len);
  END MyGetWideChars;

BEGIN
END TextCat.






More information about the M3devel mailing list