sourceafBson::BsonReader.fan


** Wraps an 'InStream' to read BSON objects.
** 
** Note that `Binary` objects with a subtype of 'BIN_GENERIC' will be read and returned as a [Buf]`sys::Buf`.
class BsonReader {
    private static const Log    log         := BsonReader#.pod.log
    private static const Int[]  regexFlags  := "dimsuxU".chars

    ** The 'TimeZone' in which all 'DateTimes' are returned in.
    ** 
    ** This does not change the *instant* in the date time continuum, just time zone it is reported in.
    ** This lets a stored date time of '12 Dec 2012 18:00 UTC' be returned as '12 Dec 2012 13:00 New_York'.
    TimeZone tz := TimeZone.cur
    
    ** The underlying 'InStream'.
    InStream in {
        private set
    }

    ** Creates a 'BsonReader', wrapping the given 'InSteam'.
    ** As per the BSON spec, the stream's endian is to 'little'.
    new make(InStream in) {
        this.in = in
        this.in.endian = Endian.little
    }

    ** Reads a BSON Document from the underlying 'InStream'.
    Str:Obj? readDocument() {
        _readDocument(BsonBasicTypeReader(in))
    }

    ** Reads a (null terminated) BSON String from the underlying 'InStream'.
    Str readCString() {
        _readCString(BsonBasicTypeReader(in))
    }

    ** Reads a BSON Integer32 from the underlying 'InStream'.
    Int readInteger32() {
        _readInteger32(BsonBasicTypeReader(in))
    }

    ** Reads a BSON Integer64 from the underlying 'InStream'.
    Int readInteger64() {
        _readInteger64(BsonBasicTypeReader(in))
    }

    private Str:Obj? _readDocument(BsonBasicTypeReader reader) {
        bson    := Str:Obj?[:] { ordered = true }
        posMark := reader.bytesRead
        objSize := reader.readInteger32
        
        while ((reader.bytesRead - posMark) < objSize) {
            type := BsonType.fromValue(reader.readByte, true)
            name := (type == BsonType.EOO) ? null : reader.readCString
            val  := null

            switch (type) {
                case BsonType.EOO:
                    bytesRead := reader.bytesRead - posMark
                    if (bytesRead < objSize)
                        log.warn(bsonReader_sizeMismatch("Document", objSize - bytesRead))
                    break

                case BsonType.DOUBLE:
                    val = reader.readDouble

                case BsonType.STRING:
                    val = reader.readString

                case BsonType.DOCUMENT:
                    val = _readDocument(reader)

                case BsonType.ARRAY:
                    doc := _readDocument(reader)
                    val = doc.vals
                    
                case BsonType.BINARY:
                    size := reader.readInteger32
                    subtype := reader.readByte
                    if (subtype == 2) {
                        newSize := reader.readInteger32
                        if ((newSize + 4) != size)
                            log.warn(bsonReader_sizeMismatch("Binary", size - (newSize + 4)))
                        size = newSize
                    }
                    buf := reader.readBinary(size)
                    val = (subtype == Binary.BIN_GENERIC) ? buf : Binary(buf, subtype) 

                case BsonType.UNDEFINED:
                    log.warn(bsonReader_deprecatedType("UNDEFINED", name))

                case BsonType.OBJECT_ID:
                    val = reader.readObjectId

                case BsonType.BOOLEAN:
                    val = (reader.readByte == 0x01)

                case BsonType.DATE:
                    val = DateTime.fromJava(reader.readInteger64, tz, false) 

                case BsonType.NULL:
                    val = null

                case BsonType.REGEX:
                    // Regex flags are not supported by Fantom but flag characters can be embedded into 
                    // the pattern itself --> /(?i)case-insensitive/
                    // see Java's Pattern class for a list of supported flags --> dimsuxU
                    // see http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#special
                    pattern := reader.readCString
                    flags   := reader.readCString
                    
                    // convert what flags we can into embedded flag characters
                    if (!flags.isEmpty) {
                        notSupported := Str.fromChars(flags.chars.findAll { !regexFlags.contains(it) })
                        if (!notSupported.isEmpty)
                            log.warn(bsonReader_regexFlagsNotSupported(pattern, notSupported, flags))
                        
                        supported := Str.fromChars(flags.chars.intersection(regexFlags))
                        if (!supported.isEmpty) {
                            oldRegex := "/${pattern}/${supported}"
                            newRegex := "(?${supported})${pattern}"
                            log.info(bsonReader_convertedRegexFlags(pattern, supported, newRegex))                          
                            pattern = newRegex
                        }
                    }
                    val = Regex.fromStr(pattern)

                case BsonType.DB_POINTER:
                    str  := reader.readString
                    data := reader.readBinary(12)
                    log.warn(bsonReader_deprecatedType("DB_POINTER", name))

                case BsonType.CODE:
                    code := reader.readString
                    val = Code(code)

                case BsonType.SYMBOL:
                    symbol := reader.readString
                    log.warn(bsonReader_deprecatedType("SYMBOL", name))

                case BsonType.CODE_W_SCOPE:
                    mark := reader.bytesRead
                    size := reader.readInteger32
                    code := reader.readString
                    scope := _readDocument(reader)
                    bytesRead := reader.bytesRead - mark
                    if (size != bytesRead)
                        log.warn(bsonReader_sizeMismatch("CODE_W_SCOPE", size - bytesRead))
                    val = Code(code, scope)

                case BsonType.INTEGER_32:
                    val = reader.readInteger32

                case BsonType.TIMESTAMP:
                    sec := reader.readInteger32
                    inc := reader.readInteger32
                    val = Timestamp(sec, inc)

                case BsonType.INTEGER_64:
                    val = reader.readInteger64

                case BsonType.MIN_KEY:
                    val = MinKey.val

                case BsonType.MAX_KEY:
                    val = MaxKey.val
            }
            
            if (name != null)
                bson[name] = val
        }
        return bson
    }
    
    private Str _readCString(BsonBasicTypeReader reader) {
        reader.readCString
    }
    
    private Int _readInteger32(BsonBasicTypeReader reader) {
        reader.readInteger32
    }

    private Int _readInteger64(BsonBasicTypeReader reader) {
        reader.readInteger64
    }
    
    private static Str bsonReader_sizeMismatch(Str what, Int remaining) {
        "BSON size mismatch - read ${what} with ${remaining} bytes remaining"
    }

    private static Str bsonReader_deprecatedType(Str type, Str name) {
        "Read deprecated BSON type '${type}' for property '${name}' - returning null"
    }

    private static Str bsonReader_regexFlagsNotSupported(Str regex, Str notSupported, Str flags) {
        "BSON Regex flag(s) '${notSupported}' are not supported by Fantom: /${regex}/${flags}"
    }

    private static Str bsonReader_convertedRegexFlags(Str oldRegex, Str flags, Str newRegex) {
        "Converted BSON Regex flag(s) '${flags}' to embedded chars: /${oldRegex}/${flags}  --->  /${newRegex}/"
    }
}

** Reads basic BSON types and keeps count of the number of bytes read.
internal class BsonBasicTypeReader {
    private static const Log log    := BsonBasicTypeReader#.pod.log

    Int bytesRead
    
    private InStream                in
    private BsonBasicTypeReader?    reader
    
    new make(InStream in) {
        this.in = in
    }
    new makeReader(BsonBasicTypeReader reader) {
        this.reader = reader
        this.in = reader.in
    }

    Str readCString() {
        str := in.readNullTerminatedStr(null)
        bytesRead += utf8Size(str) + 1
        return str
    }
    
    Str readString() {
        size := readInteger32 - 1
        // readBufFully() 'cos size is the no. of *bytes*, not chars
        str  := in.readBufFully(null, size).readAllStr(false)
        bytesRead += size
        readNull(str)
        return str
    }
    
    Int readByte() {
        val := in.readU1
        bytesRead += 1
        return val
    }

    Buf readBinary(Int size) {
        val := in.readBufFully(null, size)
        bytesRead += size
        return val
    }

    Float readDouble() {
        val := in.readF8
        bytesRead += 8
        return val
    }

    Int readInteger32() {
        val := in.readS4
        bytesRead += 4
        return val
    }

    Int readInteger64() {
        val := in.readS8
        bytesRead += 8
        return val
    }

    ObjectId readObjectId() {
        val := ObjectId(in)
        bytesRead += 12
        return val
    }

    ** Eat the null terminator
    private Void readNull(Str str) {
        nul := readByte
        if (nul != 0)
            log.warn(bsonReader_nullTerminatorNotZero(nul, str))
    }

    ** Nicked from HttpClient
    private static Int utf8Size(Str str) {
        size := 0
        chars := str.chars
        for (i := 0; i < chars.size; ++i) {
            ch := chars[i]
            if (ch < 0x0080)    size += 1; else
            if (ch < 0x0800)    size += 2; else
            if (ch < 0x8000)    size += 3; else
            throw Err("Unsupported UTF-8 char: 0x${ch.toHex(4).upper}")
        }
        return size
    }
    
    private static Str bsonReader_nullTerminatorNotZero(Int terminator, Str str) {
        "BSON string terminator was not zero, but '0x${terminator.toHex}' for string : ${str}"
    }
}