[M3devel] TEXT & etc.
Mika Nystrom
mika at async.caltech.edu
Wed Dec 31 09:22:41 CET 2008
Hello everyone,
I mentioned a week or two ago that I had run into horrific performance
issues with CM3's TEXTs. It wasn't my intention to start a debate
about Unicode and Mahjong characters, just to point out that there's
a serious performance problem with CM3 in this area. I run into
performance problems with CM3 from time to time because I am trying
to maintain a fairly large amount of software written in Modula-3
and it appears to me that the SRC/PM3 compilers often pay better
attention to performance issues than the newer CM3 compiler.
Generally speaking I think Modula-3 needs to avoid getting into the
Java situation: Java is squeezed between "fast enough" Python and
"much more efficient" C++... Modula-3 I see as the only hope for
getting the performance of C++ with the safety of Java. The problems
I have seen with CM3 are in the following areas:
1. Thread mutex acquisition involving kernel calls (pthreads
implementation)
2. ISTYPE and TYPECASE (appear much faster on PM3, don't know why,
could just be a profiling issue). In any case TYPECASE is very slow
because of having to search the type tree. This could be improved
but I can't work on it because I always run into some kind of fatal
problem trying to bootstrap the compiler.
3. The TEXT issue. I mentioned performance problems before. It turns
out that it was showing up in another area. I upgraded my PowerBook
a while back (to OS X 10.4) and of course everything stopped working,
including at least half my Modula-3 programs. I thought it was a
versioning problem with C libraries I was importing, but it turned out
that in at least one case I was running out of stack space because of
the calls to get_char on concatenated TEXTs. (Not very long ones,
at that.)
To get to the point. What follows is my new version of TextCat.m3,
which I believe works, but I can't really say because I'm perennially
incapable of bootstrapping CM3. The basic idea is that you can
have *one* TextCat.T, at the "root" of your TEXT. It will flatten
anything else.
I don't see a simple way of modifying the CM3 TEXT implementation to
do much better than this. Of course throwing out TextCat.T completely
is an option.
An advantage of what I've done is that I haven't changed any interfaces
at all. So I don't get problems with version stamps, etc.
Mika
(* Copyright 1996-2000, Critical Mass, Inc. All rights reserved. *)
(* See file COPYRIGHT-CMASS for details. *)
UNSAFE MODULE TextCat EXPORTS RTHooks, TextCat;
IMPORT TextClass, Text8, Text16;
(* changed 12/31/2008:
only return a T when neither of the arguments to Concat is already
a T; otherwise flatten to a Text8 or Text16. Change affects
only Concat and MultiCat
*)
REVEAL
T = Public BRANDED "TextCat.T" OBJECT OVERRIDES
get_info := MyGetInfo;
get_char := MyGetChar;
get_wide_char := MyGetWideChar;
get_chars := MyGetChars;
get_wide_chars := MyGetWideChars;
END;
PROCEDURE New (t, u: TEXT): TEXT =
BEGIN
RETURN Concat (t, u);
END New;
(* RTHooks.Concat -- called by the inline "&" operator *)
PROCEDURE Concat (t, u: TEXT): TEXT =
VAR ti, ui: TextClass.Info;
BEGIN
t.get_info (ti); IF (ti.length <= 0) THEN RETURN u; END;
u.get_info (ui); IF (ui.length <= 0) THEN RETURN t; END;
IF NOT ISTYPE(t,T) AND NOT ISTYPE(u,T) THEN
RETURN NEW (T, a := t, b := u,
a_len := ti.length, b_len := ui.length,
a_or_b_wide := ti.wide OR ui.wide)
ELSIF ti.wide OR ui.wide THEN
WITH res = Text16.Create(ti.length+ui.length) DO
FOR i := 0 TO ti.length-1 DO
res.contents[i] := t.get_wide_char(i)
END;
FOR i := 0 TO ui.length-1 DO
res.contents[ti.length+i] := t.get_wide_char(i)
END;
RETURN res
END
ELSE
WITH res = Text8.Create(ti.length+ui.length) DO
FOR i := 0 TO ti.length-1 DO
res.contents[i] := t.get_char(i)
END;
FOR i := 0 TO ui.length-1 DO
res.contents[ti.length+i] := t.get_char(i)
END;
RETURN res
END
END
END Concat;
PROCEDURE NewMulti (READONLY x: ARRAY OF TEXT): TEXT =
BEGIN
RETURN MultiCat (x);
END NewMulti;
(* RTHooks.MultiCat *)
PROCEDURE MultiCat (READONLY x: ARRAY OF TEXT): TEXT =
VAR wide := FALSE;
length, start := 0;
xi : TextClass.Info;
BEGIN
IF NUMBER (x) <= 0 THEN RETURN ""; END;
IF NUMBER (x) = 1 THEN RETURN x[0]; END;
IF NUMBER (x) = 2 THEN RETURN Concat(x[0],x[1]) END;
FOR i := FIRST(x) TO LAST(x) DO
x[i].get_info (xi);
wide := wide OR xi.wide;
length := length + xi.length
END;
IF wide THEN
WITH res = Text16.Create(length) DO
FOR i := FIRST(x) TO LAST(x) DO
x[i].get_info (xi);
FOR j := 0 TO xi.length-1 DO
res.contents[start+j] := x[i].get_wide_char(j)
END;
start := start + xi.length
END;
RETURN res
END
ELSE
WITH res = Text8.Create(length) DO
FOR i := FIRST(x) TO LAST(x) DO
x[i].get_info (xi);
FOR j := 0 TO xi.length-1 DO
res.contents[start+j] := x[i].get_char(j)
END;
start := start + xi.length
END;
RETURN res
END
END
END MultiCat;
PROCEDURE MyGetInfo (t: T; VAR info: TextClass.Info) =
BEGIN
info.start := NIL;
info.length := t.a_len + t.b_len;
info.wide := t.a_or_b_wide;
END MyGetInfo;
PROCEDURE MyGetChar (t: T; index: CARDINAL): CHAR =
BEGIN
IF (index < t.a_len) THEN RETURN t.a.get_char (index); END;
DEC (index, t.a_len);
IF (index < t.b_len) THEN RETURN t.b.get_char (index); END;
DEC (index, t.b_len);
index := -1; (* force a range fault *) <*NOWARN*>
END MyGetChar;
PROCEDURE MyGetWideChar (t: T; index: CARDINAL): WIDECHAR =
BEGIN
IF (index < t.a_len) THEN RETURN t.a.get_wide_char (index); END;
DEC (index, t.a_len);
IF (index < t.b_len) THEN RETURN t.b.get_wide_char (index); END;
DEC (index, t.b_len);
index := -1; (* force a range fault *) <*NOWARN*>
END MyGetWideChar;
PROCEDURE MyGetChars (t: T; VAR a: ARRAY OF CHAR; start: CARDINAL) =
VAR u: TEXT; a_offset, t_offset, u_offset: CARDINAL := 0;
BEGIN
u := t.a;
IF (t_offset + t.a_len > start) THEN
u_offset := MAX (start - t_offset, 0);
u.get_chars (SUBARRAY (a, a_offset, NUMBER (a) - a_offset), u_offset);
INC (a_offset, t.a_len - u_offset);
IF (a_offset >= NUMBER (a)) THEN RETURN; END;
END;
INC (t_offset, t.a_len);
u := t.b;
IF (t_offset + t.b_len > start) THEN
u_offset := MAX (start - t_offset, 0);
u.get_chars (SUBARRAY (a, a_offset, NUMBER (a) - a_offset), u_offset);
INC (a_offset, t.b_len - u_offset);
IF (a_offset >= NUMBER (a)) THEN RETURN; END;
END;
INC (t_offset, t.b_len);
END MyGetChars;
PROCEDURE MyGetWideChars (t: T; VAR a: ARRAY OF WIDECHAR; start: CARDINAL) =
VAR u: TEXT; a_offset, t_offset, u_offset: CARDINAL := 0;
BEGIN
u := t.a;
IF (t_offset + t.a_len > start) THEN
u_offset := MAX (start - t_offset, 0);
u.get_wide_chars (SUBARRAY (a, a_offset, NUMBER (a) - a_offset), u_offset);
INC (a_offset, t.a_len - u_offset);
IF (a_offset >= NUMBER (a)) THEN RETURN; END;
END;
INC (t_offset, t.a_len);
u := t.b;
IF (t_offset + t.b_len > start) THEN
u_offset := MAX (start - t_offset, 0);
u.get_wide_chars (SUBARRAY (a, a_offset, NUMBER (a) - a_offset), u_offset);
INC (a_offset, t.b_len - u_offset);
IF (a_offset >= NUMBER (a)) THEN RETURN; END;
END;
INC (t_offset, t.b_len);
END MyGetWideChars;
BEGIN
END TextCat.
More information about the M3devel
mailing list