gen-all-unicode.py 879 Bytes
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
import sys

# From glib/gutf8.c:
#
#define UNICODE_VALID(Char) \
#    ((Char) < 0x110000 && (((Char) & 0xFFFFF800) != 0xD800))

def is_valid_unicode(ch):
    if ch < 0x110000 and (ch & 0xFFFFF800) != 0xD800:
        return True

    return False

if __name__ == '__main__':
    if len(sys.argv) < 2:
        sys.exit('Usage: gen-all-unicode.py OUTFILE')

    with open(sys.argv[1], 'wb') as f:
        for j in range(0, 2):
            for i in range(0, 65536):
                if is_valid_unicode(i):
                    f.write(chr(i).encode('utf-8', 'surrogatepass'))

                if j == 1:
                    f.write(b' ')

                if j == 0:
                    if i % 40 == 0 and i != 0:
                        f.write(b'\n')
                else:
                    if i % 20 == 0 and i != 0:
                        f.write(b'\n')

        f.write(b'\n')